Changeset 618


Ignore:
Timestamp:
Mar 5, 2008, 4:09:19 PM (13 years ago)
Author:
Martin Svensson
Message:

References #101. The import is now done. Code needs to be cleaned up before this ticket is finished.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.illumina/trunk/src/net/sf/basedb/illumina/plugins/SnpRawDataImporter.java

    r617 r618  
    3838import net.sf.basedb.core.ItemQuery;
    3939import net.sf.basedb.core.Job;
     40import net.sf.basedb.core.Location;
    4041import net.sf.basedb.core.Path;
    4142import net.sf.basedb.core.PathParameterType;
     
    7879import java.io.IOException;
    7980import java.io.OutputStream;
     81import java.io.OutputStreamWriter;
    8082import java.io.PrintWriter;
    8183import java.text.NumberFormat;
     
    125127      "invalidColumnsError",
    126128      "Mismatch of columns",
    127       "What to do if the file doesn't include the same columns for all arrays, for example " +
     129      "What to do if , for example " +
    128130      "if one array has GType and Score but another only has GType\n\n"+
    129131      "ignore = Ignore this and import the data that is there\n"+
     
    167169  private String[] requiredColumnNames = {"Address", "GenTrain Score", ".GType", ".B Allele Freq", ".Log R Ratio"};
    168170 
    169   private Map<String, PrintWriter> outputMapping = new HashMap<String, PrintWriter>();
    170  
     171  private Map<String, List<String>> sampleMappings = new HashMap<String, List<String>>();
     172   
    171173  //Original Illumina SNP raw data file type
    172174  private DataFileType originalDataFileType;
     
    247249        storeValue(job, request, splitFilesDirectoryParameter);
    248250        storeValue(job, request, ri.getParameter(Parameters.CHARSET_PARAMETER));
    249         storeValue(job, request, ri.getParameter(Parameters.DECIMAL_SEPARATOR_PARAMETER));
    250251               
    251252        // Associations
     
    257258       
    258259        // Error handling parameters
    259         storeValue(job, request, defaultErrorParameter);
    260 //        storeValue(job, request, missingReporterErrorParameter);
    261260        storeValue(job, request, invalidColumnsErrorParameter);
    262         storeValue(job, request, numberFormatErrorParameter);
    263261       
    264262        response.setDone("Job configuration complete", Job.ExecutionTime.SHORT);
     
    300298  private Protocol protocol;
    301299  private List<RawBioAssay> rawBioAssays;
    302   private Map<String, List<Mapper>> mapper;
    303   private List<Line> headerLines;
    304   private Mapper reporterMapper;
    305   private int numInserted;
    306   private int numRawBioAssays;
    307   private NumberFormat numberFormat;
    308   private boolean nullIfException;
     300//  private Map<String, List<Mapper>> mapper;
     301//  private List<Line> headerLines;
     302//  private Mapper reporterMapper;
     303//  private int numInserted;
     304//  private int numRawBioAssays;
     305//  private NumberFormat numberFormat;
     306//  private boolean nullIfException;
    309307  private boolean verifyColumns;
    310308//  private boolean nullIfMissingReporter;
     
    383381    String splitFilesPath = (String)job.getValue("splitFileDirectory");
    384382    this.dc = sc.newDbControl();
    385     try
    386     {
    387       this.splitDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_SPLITDATA_FILE_ID);
    388       this.originalDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_DATA_FILE_ID);
    389       Map<String, List<String>> sampleMappings = extractSamplesAndColumns(ffp,
    390             Arrays.asList(requiredColumnNames), verifyColumns);
    391       this.rawBioAssays = extractAndCreateRawBioAssays(dc, sampleMappings,
    392           rawDataFile, new Path(splitFilesPath, Path.Type.DIRECTORY));
    393       this.holders = new ArrayList<MapHolder>(rawBioAssays.size());
    394       for (RawBioAssay rba : rawBioAssays)
    395       {
    396         holders.add(new MapHolder(rba, ffp, sampleMappings.get(rba.getName())));
    397       }
    398       dc.commit();
    399     }
    400     finally
    401     {
    402       if (dc != null) dc.close();
     383   
     384    this.splitDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_SPLITDATA_FILE_ID);
     385    this.originalDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_DATA_FILE_ID);
     386    sampleMappings = extractSamplesAndColumns(ffp, Arrays.asList(requiredColumnNames), verifyColumns);
     387    this.rawBioAssays = extractAndCreateRawBioAssays(dc, sampleMappings,
     388        rawDataFile, new Path(splitFilesPath, Path.Type.DIRECTORY));
     389    this.holders = new ArrayList<MapHolder>(rawBioAssays.size());     
     390   
     391    for (RawBioAssay rba : rawBioAssays)
     392    {
     393      holders.add(new MapHolder(dc, rba, ffp, sampleMappings.get(rba.getName())));
    403394    }
    404395  }
     
    406397  protected void handleData(Data data)
    407398    throws BaseException
    408   {
    409     dc = sc.newDbControl();
    410     try
    411     {     
    412       if (data.lineNo() < 5)
    413       {
    414         for (MapHolder holder : holders)
     399  {   
     400    String separator = null;
     401    for (MapHolder holder : holders)
     402    {
     403      try
     404      {
     405        if (data.dataLineNo() < 5) System.out.println(data.dataLineNo());
     406        if (data.dataLineNo() == 1)
    415407        {
    416           RawBioAssay rba = holder.rba;
    417 //          dc.refreshItem(rba);
    418 //          File splitFile = rba.getFileSet().getMember(splitDataFileType).getFile();
    419 //          PrintWriter pw = new PrintWriter(splitFile.getUploadStream(false), true);
    420           PrintWriter pw = outputMapping.get(rba.getName());
    421           System.out.println(pw == null);         
    422           String separator = "";
    423           for (Mapper m : holder.mappers)
    424           {
    425             System.out.print(separator+m.getValue(data));
    426             pw.append(separator + m.getValue(data)); 
     408          RawBioAssay rba = holder.rba;
     409          separator = "";
     410          for (String columnName : sampleMappings.get(rba.getName()))
     411          {           
     412            holder.bw.write(separator + getFileSpecificColumnName(columnName));
    427413            separator = "\t";
    428414          }
    429           pw.append("\n");
    430           pw.flush();
    431         }       
    432         dc.commit();
    433       }
    434     }
    435     finally
    436     {
    437       if (dc != null) dc.close();     
    438     }
     415          holder.bw.newLine();
     416          holder.bw.flush();
     417        }
     418               
     419        separator = "";
     420        for (String columnName : sampleMappings.get(holder.rba.getName()))
     421        {
     422          Mapper m = holder.mappers.get(columnName);
     423          holder.bw.write(separator + m.getValue(data)); 
     424          separator = "\t";
     425        }
     426        holder.bw.newLine();
     427        holder.bw.flush();
     428      }
     429      catch(IOException io)
     430      {
     431       
     432      }
     433    }     
    439434  }
    440435 
    441436  protected void end (boolean success)
    442437  {
    443     if (dc != null) dc.close();
    444     for (PrintWriter pw : outputMapping.values())
    445     {
    446       pw.close();
     438    for (MapHolder holder : holders)
     439    {
     440      try
     441      {
     442        holder.bw.flush();
     443        holder.bw.close();
     444      }
     445      catch(IOException io)
     446      {}     
     447    }   
     448    try
     449    {
     450      dc.commit();
     451    }
     452    finally
     453    {
     454      if (dc != null) dc.close();
    447455    }
    448456  }
     
    462470        parameters.add(splitFilesDirectoryParameter);
    463471        parameters.add(Parameters.charsetParameter(null, null, null));
    464         parameters.add(Parameters.decimalSeparatorParameter(null, null,
    465             (String)job.getValue(Parameters.DECIMAL_SEPARATOR_PARAMETER)));
    466 
     472       
    467473        dc = sc.newDbControl();
    468474        List<Scan> scans = getItems(dc, Scan.getQuery());
     
    543549        // Error handling parameters
    544550        parameters.add(errorSection);
    545         parameters.add(defaultErrorParameter);
    546551        parameters.add(invalidColumnsErrorParameter);
    547         parameters.add(numberFormatErrorParameter);
    548552       
    549553        configureJob = new RequestInformation
     
    610614    {
    611615      File splitFile = File.getFile(dc, splitDir, entry.getKey()+".split", true);
     616      splitFile.setLocation(Location.PRIMARY);
    612617      dc.saveItem(splitFile);
    613618     
    614       PrintWriter pw = new PrintWriter(splitFile.getUploadStream(false), true);
    615       outputMapping.put(entry.getKey(), pw);
    616       String separator = "";
    617       for (String column : entry.getValue())
    618       {
    619         String fileSpecificColumnName = getFileSpecificColumnName(column);
    620         pw.print(separator + fileSpecificColumnName);
    621         separator = "\t";
    622       }
    623       pw.println();
    624       pw.flush();
    625            
    626619      RawBioAssay rba = RawBioAssay.getNew(dc, snpVariant, illuminaSNP);         
    627620      rba.setName(entry.getKey());
     
    634627      rba.setDescription("Raw bioassay for sample " + entry.getKey() + " in file " + rawDataFile.getName());
    635628      dc.saveItem(rba);
     629      dc.reattachItem(rba);
    636630      createdRba.add(rba);
    637631    }
     
    705699  {
    706700    private final RawBioAssay rba;
    707     private final List<Mapper> mappers;
    708    
    709     private MapHolder(RawBioAssay rba, FlatFileParser ffp, List<String> colNames)
     701    private final HashMap<String, Mapper> mappers;
     702    private final BufferedWriter bw;
     703   
     704   
     705    private MapHolder(DbControl dc, RawBioAssay rba, FlatFileParser ffp, List<String> colNames)
    710706    {
    711707      this.rba = rba;
    712       mappers = new ArrayList<Mapper>();
     708      mappers = new HashMap<String, Mapper>();
     709      File splitFile = rba.getFileSet().getMember(splitDataFileType).getFile();         
     710      bw = new BufferedWriter(new OutputStreamWriter(splitFile.getUploadStream(true)));
    713711      createMappers(ffp, colNames);
    714712    }
     
    721719        if (colIndex != null)
    722720        {
    723           mappers.add(new ColumnMapper(colIndex, colName));
     721          mappers.put(colName, new ColumnMapper(colIndex, colName));
    724722        }
    725723      }
Note: See TracChangeset for help on using the changeset viewer.