Changeset 3911


Ignore:
Timestamp:
Nov 6, 2007, 9:42:09 AM (14 years ago)
Author:
Nicklas Nordborg
Message:

Fixes #811: Wildcard or regular expressions in raw data column mappings

To be consistent with exact name matching the first column found is always returned

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/doc/src/docbook/admindoc/plugin_installation.xml

    r3871 r3911  
    13721372              mapping has better performance and we recommend that you use
    13731373              it unless you have to recalculate any of the numerical values.
     1374              In both cases, if no column matching the placeholder exactly is found
     1375              the placeholder is interpreted as a regular expression that
     1376              is matched against each column. The first one found is used.
    13741377              Here are a few mapping examples:
    13751378            </para>
    13761379           
    13771380<informalexample>
    1378 <literallayout>\Name\
    1379 \1\
    1380 [\row\, \column\]
    1381 =2 * col('radius')
     1381<literallayout>\Name\ --&gt; exact match is required
     1382\1\ --&gt; column with index 1 (the second column)
     1383[\row\, \column\] --&gt; combining row and column to a single coordinate
     1384=2 * col('radius') --&gt; calculate the diameter dynamically
     1385\F63(3|5) Median\ --&gt; use regular expression to match either F633 or F635
    13821386</literallayout>
    13831387</informalexample>
  • trunk/src/core/net/sf/basedb/util/parser/ColFunction.java

    r3675 r3911  
    2626import java.text.NumberFormat;
    2727import java.text.ParsePosition;
    28 import java.util.HashMap;
     28import java.util.LinkedHashMap;
    2929import java.util.List;
    3030import java.util.Map;
    3131import java.util.Stack;
     32import java.util.regex.Pattern;
     33import java.util.regex.PatternSyntaxException;
    3234
    3335import org.nfunk.jep.ParseException;
     
    7678    this.numberFormat = numberFormat;
    7779    this.pos = new ParsePosition(0);
    78     this.columnHeaders = new HashMap<String, Integer>(columnHeaders.size());
     80    this.columnHeaders = new LinkedHashMap<String, Integer>(columnHeaders.size());
    7981    int index = 0;
    8082    for (String header : columnHeaders)
     
    133135      String colName = (String)argument;
    134136      Integer column = columnHeaders.get(colName);
    135       if (column != null)
    136       {
    137         value = data.get(column);
    138       }
    139       else
     137      if (column == null) column = findColumn(colName);
     138      if (column == null)
    140139      {
    141140        throw new BaseException("Column '" + colName + "' not found in column headers.");
    142141      }
     142      value = data.get(column);
    143143    }
    144144    else
     
    180180    this.data = data;
    181181  }
     182 
     183  /**
     184    Find column index by checking each column header against a
     185    regular expression. The first match found is returned. The
     186    map is updated with a new regex -> index entry.
     187    @return The column index or null if no column is found or if the
     188      string is not a valid regular expression
     189  */
     190  private Integer findColumn(String regex)
     191  {
     192    try
     193    {
     194      Pattern p = Pattern.compile(regex);
     195      Integer index = null;
     196      for (Map.Entry<String, Integer> entry : columnHeaders.entrySet())
     197      {
     198        String column = entry.getKey();
     199        if (p.matcher(column).matches())
     200        {
     201          index = entry.getValue();
     202          break;
     203        }
     204      }
     205      if (index != null) columnHeaders.put(regex, index);
     206      return index;
     207    }
     208    catch (PatternSyntaxException ex)
     209    {
     210      return null;
     211    }
     212  }
    182213
    183214}
  • trunk/src/core/net/sf/basedb/util/parser/FlatFileParser.java

    r3679 r3911  
    3535import java.util.regex.Pattern;
    3636import java.util.regex.Matcher;
     37import java.util.regex.PatternSyntaxException;
    3738import java.util.Arrays;
    3839import java.util.LinkedList;
     
    765766    @param name The name of the column header
    766767    @return The index, or null if no header with that name exists
     768    @see #findColumnHeaderIndex(String)
    767769  */
    768770  public Integer getColumnHeaderIndex(String name)
     
    770772    int index = columnHeaders.indexOf(name);
    771773    return index >= 0 ? index : null;
     774  }
     775 
     776  /**
     777    Find the index of a column header using a regular expression for pattern
     778    matching. This method should only be called after {@link #parseHeaders()}
     779    has been called. If more than one header matches the regular expression
     780    only the first one found is returned.
     781   
     782    @param regex The regular expression used to match the header names
     783    @return The index, or null if no header is matching the regular expression
     784      or if the string is not a valid regular expression
     785    @see #getColumnHeaderIndex(String)
     786    @since 2.5
     787  */
     788  public Integer findColumnHeaderIndex(String regex)
     789  {
     790    try
     791    {
     792      Pattern p = Pattern.compile(regex);
     793      int index = 0;
     794      for (String column : columnHeaders)
     795      {
     796        if (p.matcher(column).matches()) return index;
     797        ++index;
     798      }
     799    }
     800    catch (PatternSyntaxException ex)
     801    {}
     802    return null;
    772803  }
    773804 
     
    852883=2 * col('Radius')
    853884</pre>
    854 
     885    If no column that is matching the exact name is found the placeholder
     886    is interpreted as a regular expression which is checked against each of
     887    the column headers. In all cases, the first column header found is used
     888    if there are multiple matches.
     889    <p>
    855890    If the expression is null, a mapper returning en empty string is returned,
    856     unless the {@link #setUseNullIfEmpty(boolean)} has been activeted. In that
     891    unless the {@link #setUseNullIfEmpty(boolean)} has been activated. In that
    857892    case the mapper returns null.
    858893
     
    899934          {
    900935            String name = m.group(1);
    901             int column = columnHeaders.indexOf(name);
    902             if (column >= 0)
    903             {
    904               mappers.add(new ColumnMapper(column, name, numberFormat, nullIfException));
    905             }
    906             else
     936            Integer column = getColumnHeaderIndex(name);
     937            if (column == null) column = findColumnHeaderIndex(name);
     938            if (column == null)
    907939            {
    908940              throw new BaseException("Column '" + name + "' not found in column headers.");
    909941            }
     942            mappers.add(new ColumnMapper(column, name, numberFormat, nullIfException));
    910943          }
    911944        }
Note: See TracChangeset for help on using the changeset viewer.