Changeset 564


Ignore:
Timestamp:
Jan 31, 2008, 3:01:19 PM (13 years ago)
Author:
Nicklas Nordborg
Message:

References #91: Import Illumina scandata files as raw data

Error handling is now also in place. There should only be minor bugs and cleanup to do.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins/ScanDataImporter.java

    r562 r564  
    2525package net.sf.basedb.illumina.plugins;
    2626
    27 import java.io.IOException;
    2827import java.io.InputStream;
    2928import java.util.ArrayList;
     
    4342import net.sf.basedb.core.File;
    4443import net.sf.basedb.core.FileParameterType;
     44import net.sf.basedb.core.FileSetMember;
    4545import net.sf.basedb.core.FileStoreEnabled;
    4646import net.sf.basedb.core.FileStoreUtil;
    4747import net.sf.basedb.core.InvalidDataException;
     48import net.sf.basedb.core.InvalidUseOfNullException;
    4849import net.sf.basedb.core.Item;
    4950import net.sf.basedb.core.ItemNotFoundException;
     
    5152import net.sf.basedb.core.ItemQuery;
    5253import net.sf.basedb.core.Job;
     54import net.sf.basedb.core.NumberOutOfRangeException;
    5355import net.sf.basedb.core.Permission;
    5456import net.sf.basedb.core.PermissionDeniedException;
     
    6971import net.sf.basedb.core.plugin.GuiContext;
    7072import net.sf.basedb.core.plugin.InteractivePlugin;
     73import net.sf.basedb.core.plugin.ParameterValues;
    7174import net.sf.basedb.core.plugin.Request;
    7275import net.sf.basedb.core.plugin.Response;
     
    8184import net.sf.basedb.plugins.util.Parameters;
    8285import net.sf.basedb.util.MD5;
     86import net.sf.basedb.util.error.ClassMapErrorHandler;
     87import net.sf.basedb.util.error.ErrorHandler;
     88import net.sf.basedb.util.error.SimpleErrorHandler;
    8389import net.sf.basedb.util.parser.FlatFileParser;
    8490
     
    112118  private static final About about = new AboutImpl(
    113119    "Illumina Scandata importer",
    114     "TODO",
     120    "Raw data importer for Illumina scandata files. This plug-in can import data from one " +
     121    "or more related scandata files that are part of the same array but different stripes.",
    115122    Illumina.VERSION,
    116123    Illumina.COPYRIGHT,
     
    152159  private List<PluginParameter<File>> fileParameters;
    153160 
     161  /**
     162    To make it possible to abort the job.
     163   */
    154164  private ThreadSignalHandler signalHandler;
     165
     166  /**
     167    Error handling.
     168  */
     169  private ClassMapErrorHandler errorHandler;
     170
    155171 
    156172  public ScanDataImporter()
     
    196212      List<PlatformFileType> fileTypes = getPlatformFileTypes(dc, rba);
    197213      List<File> files = new ArrayList<File>(fileTypes.size());
     214      List<FileSetMember> members = new ArrayList<FileSetMember>(fileTypes.size());
    198215      for (PlatformFileType pft : fileTypes)
    199216      {
    200217        DataFileType dft = pft.getDataFileType();
    201218        File f = (File)job.getValue("file." + dft.getExternalId());
    202         if (f != null) files.add(f);
    203       }
    204      
    205       importScandata(rba, files, progress);
     219        if (f != null)
     220        {
     221          files.add(f);
     222          members.add(FileStoreUtil.setDataFile(dc, rba, dft.getExternalId(), f));
     223        }
     224      }
     225
     226      setUpErrorHandling(job);
     227      String msg = importScandata(rba, files, progress);
     228     
     229      for (FileSetMember member : members)
     230      {
     231        member.setValid(true, null);
     232      }     
    206233      dc.commit();
    207234     
    208       if (progress != null) progress.display(100, "");
    209       response.setDone("ok");
     235      if (progress != null) progress.display(100, msg);
     236      response.setDone(msg);
    210237    }
    211238    catch (Throwable t)
     
    412439      parameters.add(featureMismatchErrorParameter);
    413440      parameters.add(Parameters.invalidUseOfNullError(null, null, null));
    414       parameters.add(Parameters.numberFormatError(null, null, null));
     441      parameters.add(Parameters.numberFormatError(null,
     442          "How to handle errors that are caused by strings that can't be converted " +
     443          "to a numeric value. If no value is specified the default error handling " +
     444          "is used.\n\n"+
     445          "skip = Skip the current data line and continue\n"+
     446          "fail = Stop with an error message", null, "skip", "fail"));
    415447      parameters.add(Parameters.numberOutOfRangeError(null, null, null));
    416448     
     
    434466  }
    435467 
    436   public void importScandata(RawBioAssay rba, List<File> files, ProgressReporter progress)
    437     throws IOException
     468  public String importScandata(RawBioAssay rba, List<File> files, ProgressReporter progress)
    438469  {
    439470    // We assume that each file contains rougly the same number of lines
     
    441472    int offset = 0;
    442473    double deltaOffset = 100 / (files.size() + 1);
     474   
     475    // For the final status report
     476    int skipped = 0;
     477    int numInserted = 0;
     478    int numLines = 0;
    443479   
    444480    // Parse data from the files and temporarily store everything
     
    450486      double factor =  deltaOffset / file.getSize();
    451487      long current = 0;
    452       System.out.println("factor = " +factor + "; offset=" + offset + "; deltaOffset=" + deltaOffset);
    453488      InputStream in = file.getDownloadStream(0);
    454489      ffp.setInputStream(in, "ISO-8859-1");
    455490     
    456       FlatFileParser.LineType line = ffp.parseHeaders();
    457       if (line != FlatFileParser.LineType.DATA_HEADER)
    458       {
    459         throw new InvalidDataException("Can't find start of data in file: " + file);
    460       }
    461      
    462       while (ffp.hasMoreData())
    463       {
     491      // Error that are catched by this are fatal
     492      try
     493      {
     494        FlatFileParser.LineType line = ffp.parseHeaders();
     495        if (line != FlatFileParser.LineType.DATA_HEADER)
     496        {
     497          throw new InvalidDataException("Can't find start of data in file: " + file);
     498        }
     499     
     500        while (ffp.hasMoreData())
     501        {
     502          // Check if user has aborted
     503          if (Thread.interrupted()) throw new SignalException("Aborted by user.");
     504         
     505          // Progress reporting
     506          int lines = ffp.getParsedLines();
     507          if (progress != null && lines % 100 == 0)
     508          {
     509            String message = "Parsing file " + file.getName() + "; " + lines + " lines done.";
     510            current = ffp.getParsedBytes();
     511            int percent = (int)(offset + factor * current);
     512            progress.display(percent, message);
     513          }
     514         
     515          // Error handling enabled section
     516          try
     517          {
     518            // Get next data line
     519            FlatFileParser.Data data = ffp.nextData();
     520            numLines++;
     521            String featureId = MD5.leftPad(data.get(0), '0', 10);
     522           
     523            int n = Integer.parseInt(data.get(1));
     524            int mean = Integer.parseInt(data.get(2));
     525            int dev = Integer.parseInt(data.get(3));
     526           
     527            ScandataSpot spot = rawdata.get(featureId);
     528            if (spot == null)
     529            {
     530              spot = new ScandataSpot(featureId, n, mean, dev);
     531              rawdata.put(featureId, spot);
     532            }
     533            else
     534            {
     535              spot.merge(n, mean, dev);
     536            }
     537          }
     538          catch (Throwable t)
     539          {
     540            if (errorHandler == null)
     541            {
     542              throw t;
     543            }
     544            else
     545            {
     546              errorHandler.handleError(t);
     547            }
     548            skipped++;
     549          }
     550        }
     551       
     552        offset += deltaOffset;
     553      }
     554      catch (Throwable t)
     555      {
     556        throw new BaseException(t.getMessage() +
     557          " on line " + ffp.getParsedLines() + " in file " + file.getName(), t);
     558      }
     559    }
     560   
     561 
     562    // Insert data into db
     563    long current = 0;
     564    RawDataBatcher batcher = rba.getRawDataBatcher(FeatureIdentificationMethod.FEATURE_ID);
     565    double factor =  deltaOffset / rawdata.size();
     566    String currentFeatureId = null;
     567    try
     568    {
     569      for (ScandataSpot spot : rawdata.values())
     570      {
     571        currentFeatureId = spot.getFeatureId();
     572       
    464573        // Check if user has aborted
    465574        if (Thread.interrupted()) throw new SignalException("Aborted by user.");
    466575       
    467576        // Progress reporting
    468         int lines = ffp.getParsedLines();
    469         if (progress != null && lines % 100 == 0)
    470         {
    471           String message = "Parsing file " + file.getName() + "; " + lines + " lines done.";
    472           current = ffp.getParsedBytes();
     577        if (progress != null && current % 100 == 0)
     578        {
     579          String message = "Saving to database; " + current + " spots done.";
    473580          int percent = (int)(offset + factor * current);
    474581          progress.display(percent, message);
    475582        }
    476        
    477         // Get next data line
    478         FlatFileParser.Data data = ffp.nextData();
    479         String featureId = MD5.leftPad(data.get(0), '0', 10);
    480         int n = Integer.parseInt(data.get(1));
    481         int mean = Integer.parseInt(data.get(2));
    482         int dev = Integer.parseInt(data.get(3));
    483        
    484         ScandataSpot spot = rawdata.get(featureId);
    485         if (spot == null)
    486         {
    487           spot = new ScandataSpot(featureId, n, mean, dev);
    488           rawdata.put(featureId, spot);
    489         }
    490         else
    491         {
    492           spot.merge(n, mean, dev);
    493         }
    494       }
    495      
    496       offset += deltaOffset;
    497     }
    498 
    499     // Insert data into db
    500     long current = 0;
    501     RawDataBatcher batcher = rba.getRawDataBatcher(FeatureIdentificationMethod.FEATURE_ID);
    502     double factor =  deltaOffset / rawdata.size();
    503     for (ScandataSpot spot : rawdata.values())
    504     {
    505       // Check if user has aborted
    506       if (Thread.interrupted()) throw new SignalException("Aborted by user.");
    507      
    508       // Progress reporting
    509       if (progress != null && current % 100 == 0)
    510       {
    511         String message = "Saving to database; " + current + " spots done.";
    512         current = ffp.getParsedBytes();
    513         int percent = (int)(offset + factor * current);
    514         progress.display(percent, message);
    515       }
    516 
    517       RawData raw = batcher.newRawData();
    518       raw.setExtended("n", spot.getN());
    519       raw.setExtended("mean", (float)spot.getMean());
    520       raw.setExtended("dev", (float)spot.getDev());
    521  
    522       batcher.insert(raw, null, spot.getFeatureId());
    523       current++;
    524     }
    525     batcher.flush();
    526     batcher.close();
    527   }
    528  
     583 
     584        RawData raw = batcher.newRawData();
     585        raw.setExtended("n", spot.getN());
     586        raw.setExtended("mean", (float)spot.getMean());
     587        raw.setExtended("dev", (float)spot.getDev());
     588   
     589        try
     590        {
     591          batcher.insert(raw, null, currentFeatureId);
     592          numInserted++;
     593        }
     594        catch (Throwable t)
     595        {
     596          if (errorHandler == null)
     597          {
     598            throw t;
     599          }
     600          else
     601          {
     602            errorHandler.handleError(t);
     603          }
     604          skipped++;
     605        }
     606        current++;
     607      }
     608      batcher.flush();
     609      batcher.close();
     610    }
     611    catch (Throwable t)
     612    {
     613      throw new BaseException(t.getMessage() +
     614          " when inserting spot " + currentFeatureId, t);
     615    }
     616
     617    String msg = numInserted + " spot(s) inserted from " + numLines + " values in " +
     618      files.size() + " file(s)";
     619    if (skipped > 0) msg += "; " + skipped + " values skipped due to errors";
     620    return msg;
     621  }
     622 
     623  /**
     624    Get all file types that has been registered with the platform/variant
     625    of the given item.
     626    @param dc The DbControl to use for database access
     627    @param item An item which should have a platform and optionally a variant
     628    @return A list of file types registered with the platform/variant, or null
     629      if the item is null or doesn't have a platform
     630  */
    529631  public List<PlatformFileType> getPlatformFileTypes(DbControl dc, FileStoreEnabled item)
    530632  {
    531    
     633    if (item == null) return null;
    532634    Platform platform = item.getPlatform();
    533635    PlatformVariant variant = item.getVariant();
     636    if (platform == null) return null;
    534637   
    535638    ItemQuery<PlatformFileType> query = platform.getFileTypes(variant, false);
     
    538641    return query.list(dc);
    539642  }
    540 
     643 
     644  /**
     645    Initialise the error handling system. This method must be called before starting the
     646    import with {@link #importScandata(RawBioAssay, List, ProgressReporter).
     647    A subclass may override this method to add specific
     648    error handlers. If <code>super.setUpErrorHandling()</code> isn't called
     649    error handling in AbstractFlatFileImporter is disabled and the subclass
     650    must do all it's error handling in it's own code.
     651  */
     652  public void setUpErrorHandling(ParameterValues config)
     653  {
     654    ErrorHandler defaultErrorHandler = new SimpleErrorHandler("skip".equals(config.getValue("defaultError")));
     655    errorHandler = new ClassMapErrorHandler(defaultErrorHandler);
     656    addErrorHandler(ItemNotFoundException.class,
     657      createErrorHandler((String)config.getValue("featureMismatchError"), defaultErrorHandler));
     658    addErrorHandler(InvalidUseOfNullException.class,
     659      createErrorHandler((String)config.getValue("invalidUseOfNullError"), defaultErrorHandler));
     660    addErrorHandler(NumberOutOfRangeException.class,
     661      createErrorHandler((String)config.getValue("numberOutOfRangeError"), defaultErrorHandler));
     662    addErrorHandler(NumberFormatException.class,
     663      createErrorHandler((String)config.getValue("numberFormatError"), defaultErrorHandler));
     664  }
     665
     666  /**
     667    Add an error handler for the specified class of error. The error handler
     668    also handles error that are subclasses of the specified class.
     669    @see ClassMapErrorHandler#addErrorHandler(Class, ErrorHandler)
     670  */
     671  public void addErrorHandler(Class<? extends Throwable> t, ErrorHandler handler)
     672  {
     673    errorHandler.addErrorHandler(t, handler);
     674  }
     675 
     676  /**
     677    Create a new SimpleErrorHandler instance.
     678    @param method "skip" or "fail", if null the default error handler is returned
     679    @param defaultErrorHandler The error handler to return if the method parameter is null
     680    @return An error handler
     681  */
     682  public ErrorHandler createErrorHandler(String method, ErrorHandler defaultErrorHandler)
     683  {
     684    ErrorHandler handler = defaultErrorHandler;
     685    if ("skip".equals(method))
     686    {
     687      handler = new SimpleErrorHandler(true);
     688    }
     689    else if ("fail".equals(method))
     690    {
     691      handler = new SimpleErrorHandler(false);
     692    }
     693    return handler;
     694  }
     695 
    541696}
    542697
Note: See TracChangeset for help on using the changeset viewer.