Changeset 562


Ignore:
Timestamp:
Jan 30, 2008, 2:46:49 PM (15 years ago)
Author:
Nicklas Nordborg
Message:

References #91: Import Illumina scandata files as raw data

The plug-in can now import data, but error handling is very limited.

Location:
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins/ScanDataImporter.java

    r560 r562  
    2525package net.sf.basedb.illumina.plugins;
    2626
     27import java.io.IOException;
     28import java.io.InputStream;
    2729import java.util.ArrayList;
    2830import java.util.Arrays;
    2931import java.util.Collections;
     32import java.util.HashMap;
    3033import java.util.List;
     34import java.util.Map;
    3135import java.util.Set;
     36import java.util.regex.Pattern;
    3237
    3338import net.sf.basedb.core.ArrayDesign;
     
    3540import net.sf.basedb.core.DataFileType;
    3641import net.sf.basedb.core.DbControl;
     42import net.sf.basedb.core.FeatureIdentificationMethod;
    3743import net.sf.basedb.core.File;
    3844import net.sf.basedb.core.FileParameterType;
     45import net.sf.basedb.core.FileStoreEnabled;
    3946import net.sf.basedb.core.FileStoreUtil;
    4047import net.sf.basedb.core.InvalidDataException;
     
    5259import net.sf.basedb.core.ProgressReporter;
    5360import net.sf.basedb.core.RawBioAssay;
     61import net.sf.basedb.core.RawDataBatcher;
    5462import net.sf.basedb.core.RawDataType;
    5563import net.sf.basedb.core.RequestInformation;
    5664import net.sf.basedb.core.StringParameterType;
     65import net.sf.basedb.core.data.RawData;
    5766import net.sf.basedb.core.plugin.About;
    5867import net.sf.basedb.core.plugin.AboutImpl;
     
    6574import net.sf.basedb.core.query.Hql;
    6675import net.sf.basedb.core.query.Restrictions;
     76import net.sf.basedb.core.signal.SignalException;
     77import net.sf.basedb.core.signal.SignalHandler;
     78import net.sf.basedb.core.signal.SignalTarget;
     79import net.sf.basedb.core.signal.ThreadSignalHandler;
    6780import net.sf.basedb.illumina.Illumina;
    6881import net.sf.basedb.plugins.util.Parameters;
     82import net.sf.basedb.util.MD5;
    6983import net.sf.basedb.util.parser.FlatFileParser;
    7084
    7185/**
    72   Plug-in that import raw scandata file.
     86  Plug-in that import raw scandata files. This plug-in support one or more
     87  files each one containing data from one stripe on the glass. If more
     88  than one file is given the data for spots with the same Illumicode (=Array_Address_Id)
     89  are merged into a single value. See
     90  http://www.blackwell-synergy.com/doi/pdf/10.1111/j.1467-9639.1994.tb00697.x
     91  for the formula used to merg of the variance.
     92  <p>
     93  The format of the scandata files MUST be of this form:
     94  <pre class="code">
     95Illumicode,N,Mean GRN,Dev GRN
     9610008,26,222,47
     9710010,16,57,11
     9810014,16,56,13
     99....
     100</pre>
     101
     102  The Illumicode value will be left-padded with zeroes to a total length of
     103  10 characters to match the values for Array_Address_Id in the BGX files.
     104 
    73105  @author nicklas
    74106*/
    75107public class ScanDataImporter
    76108  extends AbstractPlugin
    77   implements InteractivePlugin
     109  implements InteractivePlugin, SignalTarget
    78110{
    79111
     
    105137    );
    106138 
    107  
    108   /**
    109     The parser we are using.
    110   */
    111   private FlatFileParser ffp;
    112  
    113139  /**
    114140    Configuration parameters
     
    125151  */
    126152  private List<PluginParameter<File>> fileParameters;
     153 
     154  private ThreadSignalHandler signalHandler;
    127155 
    128156  public ScanDataImporter()
     
    147175  public boolean supportsConfigurations()
    148176  {
    149     return false;
    150   }
    151 
     177    return super.supportsConfigurations();
     178    //return false;
     179  }
    152180  @Override
    153181  public MainType getMainType()
    154182  {
    155183    return MainType.IMPORT;
    156   } 
     184  }
    157185  @Override
    158186  public void run(Request request, Response response, ProgressReporter progress)
    159187  {
    160    
     188    if (signalHandler != null) signalHandler.setWorkerThread(null);
     189    DbControl dc = sc.newDbControl();
     190    try
     191    {
     192      if (progress != null) progress.display(0, "Loading items...");
     193      RawBioAssay rba = (RawBioAssay)job.getValue("rawBioAssay");
     194      rba = RawBioAssay.getById(dc, rba.getId());
     195     
     196      List<PlatformFileType> fileTypes = getPlatformFileTypes(dc, rba);
     197      List<File> files = new ArrayList<File>(fileTypes.size());
     198      for (PlatformFileType pft : fileTypes)
     199      {
     200        DataFileType dft = pft.getDataFileType();
     201        File f = (File)job.getValue("file." + dft.getExternalId());
     202        if (f != null) files.add(f);
     203      }
     204     
     205      importScandata(rba, files, progress);
     206      dc.commit();
     207     
     208      if (progress != null) progress.display(100, "");
     209      response.setDone("ok");
     210    }
     211    catch (Throwable t)
     212    {
     213      response.setError(t.getMessage(), Arrays.asList(t));
     214    }
     215    finally
     216    {
     217      if (dc != null) dc.close();
     218    }
    161219  }
    162220  // -------------------------------------------
     
    278336  }
    279337  // -------------------------------------------
     338  /*
     339    From the SignalTarget interface
     340    -------------------------------------------
     341  */
     342  @Override
     343  public SignalHandler getSignalHandler()
     344  {
     345    signalHandler =  new ThreadSignalHandler();
     346    return signalHandler;
     347  } 
     348  // -------------------------------------------
    280349 
    281350 
     
    308377       
    309378        fileParameters = new ArrayList<PluginParameter<File>>();
    310         Platform p = rba.getPlatform();
    311         PlatformVariant v = rba.getVariant();
    312         ItemQuery<PlatformFileType> query = p.getFileTypes(v, true);
    313         query.restrict(Restrictions.eq(Hql.property("dataFileType.itemType"),
    314           Expressions.integer(Item.RAWBIOASSAY.getValue())));
    315         for (PlatformFileType pft : query.list(dc))
     379        List<PlatformFileType> fileTypes = getPlatformFileTypes(dc, rba);
     380       
     381        for (PlatformFileType pft : fileTypes)
    316382        {
    317383          DataFileType dft = pft.getDataFileType();
     
    337403      }
    338404     
    339       /*
    340       // The raw data file to import from - if a file already hase
    341       // been attached to the raw bioassay use it as a default choice
    342       PluginParameter<File> fileParameter = new PluginParameter<File>(
    343         "file",
    344         "Scandata file",
    345         "The file that contains the raw data that you want to import",
    346         new FileParameterType(rawDataFiles == null || rawDataFiles.isEmpty() ?
    347           null : rawDataFiles.get(0), true, 1)
    348       );
    349       parameters.add(fileParameter);
    350       */
    351      
    352405      parameters.add(Parameters.charsetParameter(null, null, null));
    353406      parameters.add(Parameters.decimalSeparatorParameter(null, null, null));
     
    371424    }
    372425    return configureJob;
    373   }   
     426  }
     427 
     428  public FlatFileParser createFlatFileParser()
     429  {
     430    FlatFileParser ffp = new FlatFileParser();
     431    ffp.setDataHeaderRegexp(Pattern.compile("Illumicode,N,Mean GRN,Dev GRN"));
     432    ffp.setDataSplitterRegexp(Pattern.compile(","));
     433    return ffp;
     434  }
     435 
     436  public void importScandata(RawBioAssay rba, List<File> files, ProgressReporter progress)
     437    throws IOException
     438  {
     439    // We assume that each file contains rougly the same number of lines
     440    // Progress reporter should be divided into numFile + 1 steps
     441    int offset = 0;
     442    double deltaOffset = 100 / (files.size() + 1);
     443   
     444    // Parse data from the files and temporarily store everything
     445    // in 'rawdata'
     446    FlatFileParser ffp = createFlatFileParser();
     447    Map<String, ScandataSpot> rawdata = new HashMap<String, ScandataSpot>();
     448    for (File file : files)
     449    {
     450      double factor =  deltaOffset / file.getSize();
     451      long current = 0;
     452      System.out.println("factor = " +factor + "; offset=" + offset + "; deltaOffset=" + deltaOffset);
     453      InputStream in = file.getDownloadStream(0);
     454      ffp.setInputStream(in, "ISO-8859-1");
     455     
     456      FlatFileParser.LineType line = ffp.parseHeaders();
     457      if (line != FlatFileParser.LineType.DATA_HEADER)
     458      {
     459        throw new InvalidDataException("Can't find start of data in file: " + file);
     460      }
     461     
     462      while (ffp.hasMoreData())
     463      {
     464        // Check if user has aborted
     465        if (Thread.interrupted()) throw new SignalException("Aborted by user.");
     466       
     467        // Progress reporting
     468        int lines = ffp.getParsedLines();
     469        if (progress != null && lines % 100 == 0)
     470        {
     471          String message = "Parsing file " + file.getName() + "; " + lines + " lines done.";
     472          current = ffp.getParsedBytes();
     473          int percent = (int)(offset + factor * current);
     474          progress.display(percent, message);
     475        }
     476       
     477        // Get next data line
     478        FlatFileParser.Data data = ffp.nextData();
     479        String featureId = MD5.leftPad(data.get(0), '0', 10);
     480        int n = Integer.parseInt(data.get(1));
     481        int mean = Integer.parseInt(data.get(2));
     482        int dev = Integer.parseInt(data.get(3));
     483       
     484        ScandataSpot spot = rawdata.get(featureId);
     485        if (spot == null)
     486        {
     487          spot = new ScandataSpot(featureId, n, mean, dev);
     488          rawdata.put(featureId, spot);
     489        }
     490        else
     491        {
     492          spot.merge(n, mean, dev);
     493        }
     494      }
     495     
     496      offset += deltaOffset;
     497    }
     498
     499    // Insert data into db
     500    long current = 0;
     501    RawDataBatcher batcher = rba.getRawDataBatcher(FeatureIdentificationMethod.FEATURE_ID);
     502    double factor =  deltaOffset / rawdata.size();
     503    for (ScandataSpot spot : rawdata.values())
     504    {
     505      // Check if user has aborted
     506      if (Thread.interrupted()) throw new SignalException("Aborted by user.");
     507     
     508      // Progress reporting
     509      if (progress != null && current % 100 == 0)
     510      {
     511        String message = "Saving to database; " + current + " spots done.";
     512        current = ffp.getParsedBytes();
     513        int percent = (int)(offset + factor * current);
     514        progress.display(percent, message);
     515      }
     516
     517      RawData raw = batcher.newRawData();
     518      raw.setExtended("n", spot.getN());
     519      raw.setExtended("mean", (float)spot.getMean());
     520      raw.setExtended("dev", (float)spot.getDev());
     521 
     522      batcher.insert(raw, null, spot.getFeatureId());
     523      current++;
     524    }
     525    batcher.flush();
     526    batcher.close();
     527  }
     528 
     529  public List<PlatformFileType> getPlatformFileTypes(DbControl dc, FileStoreEnabled item)
     530  {
     531   
     532    Platform platform = item.getPlatform();
     533    PlatformVariant variant = item.getVariant();
     534   
     535    ItemQuery<PlatformFileType> query = platform.getFileTypes(variant, false);
     536    query.restrict(Restrictions.eq(Hql.property("dataFileType.itemType"),
     537      Expressions.integer(item.getType().getValue())));
     538    return query.list(dc);
     539  }
     540
    374541}
    375542
Note: See TracChangeset for help on using the changeset viewer.