Changeset 4055


Ignore:
Timestamp:
Dec 12, 2007, 9:20:25 AM (15 years ago)
Author:
Nicklas Nordborg
Message:

Fixes #800: Make the Illumina importer accept tab as data delimiter and comma as numeric decimal separator

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2.5-stable/src/plugins/core/net/sf/basedb/plugins/IlluminaRawDataImporter.java

    r3820 r4055  
    8686import net.sf.basedb.util.parser.FlatFileParser.Data;
    8787import net.sf.basedb.util.parser.FlatFileParser.Line;
     88import net.sf.basedb.util.parser.FlatFileParser.LineType;
    8889
    8990/**
     
    128129    ));
    129130 
     131 
    130132  private static final PluginParameter<String> invalidColumnsErrorParameter = new PluginParameter<String>(
    131133      "invalidColumnsError",
     
    251253        storeValue(job, request, fileParameter);
    252254        storeValue(job, request, ri.getParameter(CHARSET));
     255        storeValue(job, request, ri.getParameter(DECIMAL_SEPARATOR));
    253256       
    254257        // Associations
     
    300303    Create a FlatFileParser that can parse Illumina data files:
    301304    <ul>
    302     <li>Data splitter: ,
     305    <li>Data splitter: (,|\t)
    303306    <li>Header regexp: (.+)=(.*?),*
    304     <li>Data header: TargetId,.*
     307    <li>Data header: TargetID(,|\t).*
    305308    </ul>
     309    NOTE! To begin with we support both comma and tab as column splitter but
     310    later on (in {@link #isImportable(FlatFileParser)}) when we know which one is actually
     311    used, we change this in the parser. We need to do this since numbers may
     312    use comma as decimal separator.
    306313  */
    307314  @Override
     
    309316    throws BaseException
    310317  {
     318    String separator = "(,|\\t)";
    311319    FlatFileParser ffp = new FlatFileParser();
    312     ffp.setDataSplitterRegexp(Pattern.compile(","));
    313     ffp.setDataHeaderRegexp(Pattern.compile("TargetID,.*"));
     320    ffp.setDataSplitterRegexp(Pattern.compile(separator));
     321    ffp.setDataHeaderRegexp(Pattern.compile("TargetID"+separator + ".*"));
    314322    ffp.setHeaderRegexp(Pattern.compile("(.+)=(.*?),*"));
    315323    return ffp;
    316324  }
    317325  /**
    318     @return Always "dot"
     326    @return As specified by job parameter or "dot" if not
    319327  */
    320328  @Override
    321329  protected String getDecimalSeparator()
    322330  {
    323     return "dot";
     331    String separator = super.getDecimalSeparator();
     332    if (separator == null) separator = "dot";
     333    return separator;
    324334  }
    325335
     
    333343  {
    334344    String firstLine = ffp.getLineCount() >= 1 ? ffp.getLine(0).line() : null;
    335     return firstLine != null && firstLine.contains("Illumina") ;
     345    boolean isIllumina = firstLine != null && firstLine.contains("Illumina");
     346    if (isIllumina)
     347    {
     348      String separator = ",";
     349      FlatFileParser.Line lastLine = ffp.getLine(ffp.getLineCount()-1);
     350      if (lastLine.type() == LineType.DATA_HEADER)
     351      {
     352        int firstTab = lastLine.line().indexOf("\t");
     353        if (firstTab > 0 && firstTab < lastLine.line().indexOf(","))
     354        {
     355          separator = "\\t";
     356        }
     357        ffp.setDataSplitterRegexp(Pattern.compile(separator));
     358      }
     359    }
     360    return isIllumina;
    336361  }
    337362 
     
    592617        parameters.add(fileParameter);
    593618        parameters.add(getCharsetParameter(null, null, null));
     619        parameters.add(getDecimalSeparatorParameter(null, null, (String)job.getValue(DECIMAL_SEPARATOR)));
    594620 
    595621        // parameters for scan, protocol and software
Note: See TracChangeset for help on using the changeset viewer.