Changeset 4246


Ignore:
Timestamp:
Apr 24, 2008, 11:39:07 AM (15 years ago)
Author:
Nicklas Nordborg
Message:

Fixes #968: Raw data importer – Enhance feature mismatch error handling

Location:
trunk
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/doc/test/roles/index.html

    r4025 r4246  
    16061606      <th>Raw bioassay</th>
    16071607      <th>Raw data file</th>
    1608       <th>Spots inserted</th>
     1608      <th>Spots inserted/with null reporter/skipped)</th>
    16091609      <th>Annotations created</th>
    16101610    </tr>
     
    16121612      <td>Raw bioassay A.00h</td>
    16131613      <td>genepix.mouse.v4.37k.00h.gpr</td>
    1614       <td>36,232</td>
     1614      <td>36,864/632/768</td>
    16151615      <td><i>Dye swap</i>: false</td>
    16161616    </tr>
     
    16181618      <td>Raw bioassay A.24h</td>
    16191619      <td>genepix.mouse.v4.37k.24h.gpr</td>
    1620       <td>36,232</td>
     1620      <td>36,864/632/768</td>
    16211621      <td><i>Dye swap</i>: false</td>
    16221622    </tr>   
     
    16241624      <td>Raw bioassay A.00h (dye-swap)</td>
    16251625      <td>genepix.mouse.v4.37k.00h.dyeswap.gpr</td>
    1626       <td>36,232</td>
     1626      <td>36,864/632/768</td>
    16271627      <td><i>Dye swap</i>: true</td>
    16281628    </tr>
     
    16301630      <td>Raw bioassay A.24h (dye-swap)</td>
    16311631      <td>genepix.mouse.v4.37k.24h.dyeswap.gpr</td>
    1632       <td>36,232</td>
     1632      <td>36,864/632/768</td>
    16331633      <td><i>Dye swap</i>: true</td>
    16341634    </tr>
     
    17511751      <td>Filtered bioassay set</td>
    17521752      <td>All intensities &gt; 0</td>
    1753       <td>ch(1) &gt; 0 &amp;&amp; ch(2) &gt; 0</td>
     1753      <td>ch(1) &gt; 0 &amp;&amp; ch(2) &gt; 0 &amp;&amp; rep('id') != null</td>
    17541754    </tr>
    17551755    </table>
    17561756    Wait for the plug-in to finish. It should report that 136,498 spots remain and
    1757     that 8,430 spots has been removed.
     1757    that 10,958 spots has been removed.
    17581758    <p>
    17591759  </li>
  • trunk/src/core/net/sf/basedb/core/RawDataBatcher.java

    r4243 r4246  
    176176  private boolean useNullIfReporterNotFound = true;
    177177 
     178  /**
     179    The number of reporters that was not found.
     180  */
     181  private int numReportersNotFound = 0;
     182 
     183  /**
     184    If inserts that can't find a matching feature should be skipped
     185    or generate an error.
     186  */
     187  private boolean skipInsertIfMissingFeature = false;
     188
     189  /**
     190    The number of skipped inserts due to missing feature
     191  */
     192  private int numSkippedMissingFeature = 0;
     193 
     194  /**
     195    If we should always accept inserts when the feature has a NULL
     196    reporter.
     197  */
     198  private boolean acceptInsertIfNullReporterOnFeature = false;
     199 
     200  /**
     201    The number of accepted inserts were the feature had a NULL reporter
     202  */
     203  private int numAcceptedWithNullReporterOnFeature = 0;
     204
    178205  RawDataBatcher(DbControl dc, RawBioAssay rawBioAssay, FeatureIdentificationMethod fim)
    179206    throws BaseException
     
    376403    Specify if the insert method should use a null reporter if a reporter
    377404    with the given external ID isn't found. This setting only affects raw bioassays that
    378     are not connected to an array design. The insert always fails if the reporter
    379     doesn't match the array design.
     405    are not connected to an array design.
    380406    @param useNull TRUE if the insert should use a null reporter (default),
    381407      FALSE otherwise
     408    @see #skipInsertIfMissingFeature(boolean)
    382409  */
    383410  public void useNullIfReporterNotFound(boolean useNull)
     
    385412    this.useNullIfReporterNotFound = useNull;
    386413  }
     414 
     415  /**
     416    Get the number of reporters that was not found.
     417    @since 2.7
     418    @see #useNullIfReporterNotFound(boolean)
     419  */
     420  public int getNumReportersNotFound()
     421  {
     422    return numReportersNotFound;
     423  }
     424 
     425  /**
     426    Specify if the insert method should skip the insert if a feature for
     427    the spot can't be found. This setting only affects raw bioassays that
     428    are connected to an array design.
     429   
     430    @param skip TRUE if the insert should be skipped, FALSE to
     431      throw an {@link ItemNotFoundException} (default)
     432    @since 2.7
     433    @see #useNullIfReporterNotFound(boolean)
     434  */
     435  public void skipInsertIfMissingFeature(boolean skip)
     436  {
     437    this.skipInsertIfMissingFeature = skip;
     438  }
     439
     440  /**
     441    Get the number of skipped inserts because of a missing feature.
     442    @since 2.7
     443    @see #skipInsertIfMissingFeature(boolean)
     444  */
     445  public int getNumSkippedMissingFeature()
     446  {
     447    return numSkippedMissingFeature;
     448  }
     449 
     450  /**
     451    Specify if inserts always should be accepted if there is a matching
     452    feature that has a NULL reporter. This settings only affects raw
     453    bioassays that are connected to an array design.
     454    <p>
     455    NOTE! The inserted data will also reference the NULL reporter.
     456   
     457    @param accept TRUE to always accept the insert, FALSE to throw an
     458      {@link ItemNotFoundException} if the insert doesn't specify a NULL
     459      reporter (default)
     460    @since 2.7
     461  */
     462  public void acceptInsertIfNullReporterOnFeature(boolean accept)
     463  {
     464    this.acceptInsertIfNullReporterOnFeature = accept;
     465  }
     466 
     467  /**
     468    Get the number of accepted inserts because of features with
     469    a NULL reporter.
     470    @since 2.7
     471    @see #acceptInsertIfNullReporterOnFeature(boolean)
     472  */
     473  public int getNumAcceptedWithNullReporterOnFeature()
     474  {
     475    return numAcceptedWithNullReporterOnFeature;
     476  }
    387477 
    388478  /**
     
    462552        findReporter.setString("externalId", externalReporterId);
    463553        ReporterData reporter = HibernateUtil.loadData(ReporterData.class, findReporter);
    464         if (reporter == null && !useNullIfReporterNotFound)
     554        if (reporter == null)
    465555        {
    466           throw new ItemNotFoundException("Reporter[externalId=" + externalReporterId + "]");
     556          if (useNullIfReporterNotFound)
     557          {
     558            numReportersNotFound++;
     559          }
     560          else
     561          {
     562            throw new ItemNotFoundException("Reporter[externalId=" + externalReporterId + "]");
     563          }
    467564        }
    468565        data.setReporter(reporter);
     
    477574          "] has already been used by another spot.");
    478575      }
    479       FeatureData f = preloaded.get(featureId);
    480       if (f == null)
    481       {
    482         throw new ItemNotFoundException("Feature["+ fiMethod.toString() + "=" + featureId + "] doesn't exist on array design");
    483       }
    484       ReporterData r = f.getReporter();
    485       if (validateReporterId)
     576      FeatureData feature = preloaded.get(featureId);
     577      if (feature == null)
     578      {
     579        numSkippedMissingFeature++;
     580        if (skipInsertIfMissingFeature) return;
     581        throw new ItemNotFoundException("Feature["+ fiMethod.toString() +
     582            "=" + featureId + "] doesn't exist on array design");
     583      }
     584      ReporterData reporterOnFeature = feature.getReporter();
     585      if (reporterOnFeature == null && acceptInsertIfNullReporterOnFeature)
     586      {
     587        numAcceptedWithNullReporterOnFeature++;
     588      }
     589      else if (validateReporterId)
    486590      {
    487591        // Get the reporter from the feature and verify that it has the same externalId
    488         String rId = r == null ? null : r.getExternalId();
    489         if (!StringUtil.isEqualOrNull(externalReporterId, rId, caseInsensitive))
     592        String reporterIdOnFeature = reporterOnFeature == null ? null : reporterOnFeature.getExternalId();
     593        if (!StringUtil.isEqualOrNull(externalReporterId, reporterIdOnFeature, caseInsensitive))
    490594        {
    491595          throw new ItemNotFoundException("Reporter mismatch: The feature has reporter '" +
    492             rId + "' whereas you have given '" + externalReporterId + "'");
     596            reporterIdOnFeature + "' whereas you have given '" + externalReporterId + "'");
    493597        }
    494598      }
    495       data.setPosition(f.getPosition());
    496       setPropertyValue(data, "feature", f);
    497       data.setReporter(r);
     599      data.setPosition(feature.getPosition());
     600      setPropertyValue(data, "feature", feature);
     601      data.setReporter(reporterOnFeature);
    498602    }
    499603    super.insert(data);
  • trunk/src/plugins/core/net/sf/basedb/plugins/RawDataFlatFileImporter.java

    r4124 r4246  
    237237      "This can for example be caused by invalid block and/or spot coordinates or a different reporter. " +
    238238      "If not specified the default error handling is used.\n\n"+
     239      "smart = SKIP lines that have no feature on the array design, " +
     240      "ACCEPT lines that has a feature with a NULL reporter, " +
     241      "FAIL lines that has a feature with different reporter\n" +
    239242      "skip = Skip the current data line and continue\n"+
    240243      "fail = Stop with an error message",
    241       new StringParameterType(255, null, false, 1, 0, 0,
    242         Arrays.asList( new String[] { "skip", "fail"} ))
     244      new StringParameterType(255, "smart", false, 1, 0, 0,
     245        Arrays.asList( new String[] { "smart", "skip", "fail"} ))
    243246    );
    244247
     
    272275
    273276  private DbControl dc;
    274   private int numInserted;
    275277  private FlatFileParser ffp;
    276278  private NumberFormat numberFormat;
     
    561563   
    562564    batcher = rawBioAssay.getRawDataBatcher(fiMethod);
    563     numInserted = 0;
    564565    this.ffp = ffp;
    565566    this.numberFormat = ffp.getDefaultNumberFormat();
     
    573574      {
    574575        addErrorHandler(ItemNotFoundException.class, new SimpleErrorHandler("skip".equals(method)));
     576        boolean isSmart = "smart".equals(method);
     577        batcher.skipInsertIfMissingFeature(isSmart);
     578        batcher.acceptInsertIfNullReporterOnFeature(isSmart);
    575579      }
    576580    }
     
    680684      raw.setExtended(ep.getName(), ep.parseString(m.getValue(data), numberFormat, nullIfException));
    681685    }
    682 
    683686    batcher.insert(raw, externalId, featureId);
    684     numInserted++;
    685687  }
    686688 
     
    700702  protected String getSuccessMessage(int skippedLines)
    701703  {
    702     String skipped = skippedLines > 0 ? "; " + skippedLines + " spot(s) skipped due to errors": "";
    703     return numInserted + " spot(s) inserted" + skipped;
     704    StringBuilder msg = new StringBuilder();
     705    msg.append(batcher.getTotalInsertCount()).append(" spot(s) inserted");
     706    int numReportersNotFound = batcher.getNumReportersNotFound();
     707    int numSkippedMissingFeature = batcher.getNumSkippedMissingFeature();
     708    int numAcceptedWithNullReporterOnFeature = batcher.getNumAcceptedWithNullReporterOnFeature();
     709    if (numReportersNotFound > 0)
     710    {
     711      msg.append("; ").append(numReportersNotFound).append(" spot(s) could not find a reporter");
     712    }
     713    if (numAcceptedWithNullReporterOnFeature > 0)
     714    {
     715      msg.append("; ").append(numAcceptedWithNullReporterOnFeature);
     716      msg.append(" spot(s) accepted with null reporter on feature");
     717    }
     718    if (numSkippedMissingFeature > 0)
     719    {
     720      msg.append("; ").append(numSkippedMissingFeature);
     721      msg.append(" spot(s) skipped due to missing features");
     722    }
     723    if (skippedLines > 0)
     724    {
     725      msg.append("; ").append(skippedLines).append(" spot(s) skipped due to errors");
     726    }
     727    return msg.toString();
    704728  }
    705729  // -------------------------------------------
  • trunk/src/test/TestArrayDesign.java

    r4097 r4246  
    8383      "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"ID\"\\t\"Name\".*", "\\t", 3, 4);
    8484    test_import_from_file(id2, "data/test.reportermap.import.txt",
    85       "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"ID\"\\t\"Name\".*", 0, 2, 1, 3);
     85      "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"ID\"\\t\"Name\".*", null, 0, 2, 1, 3);
    8686    test_add_features(id, FeatureIdentificationMethod.COORDINATES, 10);
    8787    test_add_features(id6, FeatureIdentificationMethod.FEATURE_ID, 10);
     
    547547  }
    548548
    549   static void test_import_from_file(int arrayDesignId, String filename, String headerRegexp, int blockCol, int rowCol, int colCol, int reporterCol)
     549  static void test_import_from_file(int arrayDesignId, String filename, String headerRegexp,
     550      String ignoreRegexp, int blockCol, int rowCol, int colCol, int reporterCol)
    550551  {
    551552    if (arrayDesignId == 0) return;
     
    564565      parser.setDataHeaderRegexp(Pattern.compile(headerRegexp));
    565566      parser.setDataSplitterRegexp(Pattern.compile("\\t"));
     567      if (ignoreRegexp != null)
     568      {
     569        parser.setIgnoreRegexp(Pattern.compile(ignoreRegexp));
     570      }
    566571      parser.setInputStream(FileUtil.getInputStream(new java.io.File(filename)), "ISO-8859-1");
    567572      parser.parseHeaders();
  • trunk/src/test/TestDirty.java

    r3820 r4246  
    7575    int arrayDesignId = TestArrayDesign.test_create(Platform.GENERIC, false);
    7676    TestArrayDesign.test_import_from_file(arrayDesignId, "data/test.import.dirty.txt",
    77         "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", 0, 2, 1, 4);
     77        "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", null, 0, 2, 1, 4);
    7878   
    7979    int rawBioAssayId = TestRawBioAssay.test_create(Platform.GENERIC, "genepix", 0, 0, 0, arrayDesignId, false);
  • trunk/src/test/TestExperiment.java

    r4232 r4246  
    8585    int arrayDesignId = TestArrayDesign.test_create(Platform.GENERIC, false);
    8686    TestArrayDesign.test_import_from_file(arrayDesignId, "data/test.rawdata.import.txt",
    87       "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", 0, 2, 1, 4);
     87      "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", null, 0, 2, 1, 4);
    8888   
    8989    int rbaId1 = TestRawBioAssay.test_create(Platform.GENERIC, "genepix", 0, 0, 0, arrayDesignId, false);
  • trunk/src/test/TestRawBioAssay.java

    r4232 r4246  
    5858      "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", "\\t", 4, 3);
    5959    TestArrayDesign.test_import_from_file(arrayDesignId, "data/test.rawdata.import.txt",
    60       "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", 0, 2, 1, 4);
     60      "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", null, 0, 2, 1, 4);
    6161    int softwareId = TestSoftware.test_create(SystemItems.getId(SoftwareType.FEATURE_EXTRACTION), false);
    6262    int protocolId = TestProtocol.test_create(SystemItems.getId(ProtocolType.FEATURE_EXTRACTION), false);
  • trunk/src/test/TestRawDataFlatFileImporter.java

    r4097 r4246  
    4646    int fileId = TestFile.test_create("data/test.rawdata.import.txt", false, false);
    4747    int arrayDesignId = TestArrayDesign.test_create(Platform.GENERIC, false);
    48     TestArrayDesign.test_import_from_file(arrayDesignId, "data/test.rawdata.import.txt", "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", 0, 2, 1, 4);
     48    TestArrayDesign.test_import_from_file(arrayDesignId, "data/test.rawdata.import.txt",
     49        "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", ".*(EMPTY_WELL|no clone).*", 0, 2, 1, 4);
    4950    int rawBioAssayId = TestRawBioAssay.test_create(Platform.GENERIC, "genepix", 0, 0, 0, arrayDesignId, true);
    5051   
     
    191192      request.setParameterValue("decimalSeparator", "dot");
    192193      request.setParameterValue("duplicateFeatureError", "skip");
     194      request.setParameterValue("featureMismatchError", "smart");
    193195
    194196      PluginResponse response = request.invoke();
  • trunk/src/test/TestWebservices.java

    r4033 r4246  
    104104    int arrayDesignId1 = TestArrayDesign.test_create(Platform.GENERIC, false);
    105105    TestArrayDesign.test_import_from_file(arrayDesignId1, "data/test.rawdata.import.txt",
    106         "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", 0, 2, 1, 4);   
     106        "\"Block\"\\t\"Column\"\\t\"Row\"\\t\"Name\"\\t\"ID\".*", null, 0, 2, 1, 4);   
    107107    int rawBioAssayId1 = TestRawBioAssay.test_create(Platform.GENERIC, "genepix", 0, 0, 0, arrayDesignId1, false);
    108108    int rawBioAssayId2 = TestRawBioAssay.test_create(Platform.GENERIC, "genepix", 0, 0, 0, arrayDesignId1, false);
  • trunk/src/test/net/sf/basedb/test/roles/AnalysisTest.java

    r4124 r4246  
    9797        "Root bioassay set (" + user + ")", formula);
    9898      BioAssaySet filtered = filterBioAssaySet(experiment, root,
    99         "Filtered bioassay set (" + user + ")", "ch(1) > 0 && ch(2) > 0");
     99        "Filtered bioassay set (" + user + ")", "ch(1) > 0 && ch(2) > 0 && rep('id') != null");
    100100
    101101      BioAssaySet normalized = normalizeBioAssaySet(experiment, filtered,
  • trunk/src/test/net/sf/basedb/test/roles/UserTest.java

    r4124 r4246  
    372372      job.setParameterValue("rawBioAssay",
    373373        new ItemParameterType<RawBioAssay>(RawBioAssay.class, null), rba);
    374       job.setParameterValue("missingReporterError", new StringParameterType(), "skip");
    375       job.setParameterValue("featureMismatchError", new StringParameterType(), "skip");
     374      job.setParameterValue("missingReporterError", new StringParameterType(), "null");
     375      job.setParameterValue("featureMismatchError", new StringParameterType(), "smart");
    376376      dc.saveItem(job);
    377377      dc.commit();
Note: See TracChangeset for help on using the changeset viewer.