Changeset 617


Ignore:
Timestamp:
Mar 3, 2008, 4:50:11 PM (13 years ago)
Author:
Martin Svensson
Message:

References #101. Creating RawBioAssays? and split up files. The data have to also be split up.

Location:
plugins/base2/net.sf.basedb.illumina/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.illumina/trunk/META-INF/base-plugins.xml

    r572 r617  
    1818    <hasconfigurations>yes</hasconfigurations>
    1919    </pluginclass>
     20    <pluginclass classname="net.sf.basedb.illumina.plugins.SnpRawDataImporter">
     21    <minbaseversion>2.6</minbaseversion>
     22    <hasconfigurations/>
     23    </pluginclass>
    2024    <!-- 
    2125  <pluginclass classname="net.sf.basedb.illumina.plugins.ScanDataImporter">
  • plugins/base2/net.sf.basedb.illumina/trunk/src/net/sf/basedb/illumina/plugins/SnpRawDataImporter.java

    r592 r617  
    2424package net.sf.basedb.illumina.plugins;
    2525
     26import net.sf.basedb.core.ArrayDesign;
    2627import net.sf.basedb.core.BaseException;
     28import net.sf.basedb.core.BasicItem;
     29import net.sf.basedb.core.DataFileType;
     30import net.sf.basedb.core.DbControl;
     31import net.sf.basedb.core.Directory;
    2732import net.sf.basedb.core.Experiment;
     33import net.sf.basedb.core.File;
     34import net.sf.basedb.core.Include;
     35import net.sf.basedb.core.InvalidDataException;
    2836import net.sf.basedb.core.Item;
     37import net.sf.basedb.core.ItemParameterType;
     38import net.sf.basedb.core.ItemQuery;
     39import net.sf.basedb.core.Job;
     40import net.sf.basedb.core.Path;
     41import net.sf.basedb.core.PathParameterType;
     42import net.sf.basedb.core.Permission;
     43import net.sf.basedb.core.PlatformVariant;
     44import net.sf.basedb.core.PluginParameter;
     45import net.sf.basedb.core.Protocol;
     46import net.sf.basedb.core.ProtocolType;
     47import net.sf.basedb.core.RawBioAssay;
     48import net.sf.basedb.core.RawDataType;
     49import net.sf.basedb.core.RawDataTypes;
    2950import net.sf.basedb.core.RequestInformation;
     51import net.sf.basedb.core.Scan;
     52import net.sf.basedb.core.Software;
     53import net.sf.basedb.core.SoftwareType;
     54import net.sf.basedb.core.StringParameterType;
     55import net.sf.basedb.core.SystemItems;
    3056import net.sf.basedb.core.plugin.About;
    3157import net.sf.basedb.core.plugin.AboutImpl;
     
    3460import net.sf.basedb.core.plugin.Request;
    3561import net.sf.basedb.core.plugin.Response;
     62import net.sf.basedb.core.query.Expressions;
     63import net.sf.basedb.core.query.Hql;
     64import net.sf.basedb.core.query.Orders;
     65import net.sf.basedb.core.query.Restriction;
     66import net.sf.basedb.core.query.Restrictions;
    3667import net.sf.basedb.illumina.Illumina;
    3768import net.sf.basedb.plugins.AbstractFlatFileImporter;
     69import net.sf.basedb.plugins.util.Parameters;
     70import net.sf.basedb.util.parser.ColumnMapper;
     71import net.sf.basedb.util.parser.FlatFileParser;
     72import net.sf.basedb.util.parser.Mapper;
    3873import net.sf.basedb.util.parser.FlatFileParser.Data;
    39 
     74import net.sf.basedb.util.parser.FlatFileParser.Line;
     75import net.sf.basedb.util.parser.FlatFileParser.LineType;
     76
     77import java.io.BufferedWriter;
     78import java.io.IOException;
     79import java.io.OutputStream;
     80import java.io.PrintWriter;
     81import java.text.NumberFormat;
     82import java.util.ArrayList;
    4083import java.util.Arrays;
    4184import java.util.Collections;
     85import java.util.HashMap;
    4286import java.util.HashSet;
     87import java.util.LinkedList;
     88import java.util.List;
     89import java.util.Map;
    4390import java.util.Set;
     91import java.util.regex.Pattern;
    4492
    4593/**
     
    73121      )
    74122    ));
    75 
     123 
     124  private static final PluginParameter<String> invalidColumnsErrorParameter = new PluginParameter<String>(
     125      "invalidColumnsError",
     126      "Mismatch of columns",
     127      "What to do if the file doesn't include the same columns for all arrays, for example " +
     128      "if one array has GType and Score but another only has GType\n\n"+
     129      "ignore = Ignore this and import the data that is there\n"+
     130      "fail = Stop with an error message",
     131      new StringParameterType(255, null, false, 1, 0, 0,
     132        Arrays.asList( new String[] { "ignore", "fail"} ))
     133    );
     134
     135//  private static final PluginParameter<String> missingReporterErrorParameter = new PluginParameter<String>(
     136//      "missingReporterError",
     137//      "Reporter not found",
     138//      "How to handle errors that are caused by a reporter not beeing present in the datbase. If not specified the " +
     139//        "default error handling is used.\n\n"+
     140//        "null = Import the data but set the reporter to null\n"+
     141//        "skip = Skip the current data line and continue\n"+
     142//        "fail = Stop with an error message",
     143//        new StringParameterType(255, null, false, 1, 0, 0,
     144//            Arrays.asList( new String[] { "null", "skip", "fail"} ))
     145//      );
     146
     147  private static final PluginParameter<String> associationsSection = new PluginParameter<String>(
     148    "associationsSection",
     149    "Associations",
     150    "Select items that you want to associate the imported raw bioassays with",
     151    null
     152  );
     153
     154  private static final PluginParameter<String> splitFilesDirectoryParameter = new PluginParameter<String>(
     155    "splitFileDirectory",
     156    "Directory of split files",
     157    "The directory were to put the " +
     158    "new files when splitting up the original " +
     159    "raw datafile. Leave this parameter empty to " +
     160    "put the split files in a new sub-directory in " +
     161    "the original file's location",
     162    new PathParameterType(Path.Type.DIRECTORY, null, false)
     163  );
     164 
     165  private RequestInformation configureJob;
     166 
     167  private String[] requiredColumnNames = {"Address", "GenTrain Score", ".GType", ".B Allele Freq", ".Log R Ratio"};
     168 
     169  private Map<String, PrintWriter> outputMapping = new HashMap<String, PrintWriter>();
     170 
     171  //Original Illumina SNP raw data file type
     172  private DataFileType originalDataFileType;
     173  //Split Illumina SNP raw data file type
     174  private DataFileType splitDataFileType;
    76175 
    77176  public SnpRawDataImporter()
     
    99198    From the InteractivePlugin interface
    100199   */
    101   public void configure(GuiContext context, Request request, Response reponse)
    102   {
    103     // TODO Auto-generated method stub
    104  
    105   }
    106  
    107   public Set<GuiContext> getGuiContexts()
    108   {
    109     return guiContexts;
    110   }
    111  
    112   public RequestInformation getRequestInformation(GuiContext context, String command)
    113       throws BaseException
    114   {
    115     RequestInformation requestInformation = null;
    116     if (command.equals(Request.COMMAND_CONFIGURE_JOB))
    117     {
    118       requestInformation = getConfigureJobParameters(context);
    119     }
    120     return requestInformation;
    121   }
    122  
    123200  /**
    124201    When used from an experiment this check that Illumina SNP
     
    139216        message = "The object is not an Experiment: " + item;
    140217      }
    141     }
    142    
     218      Experiment experiment = (Experiment)item;
     219      RawDataType illuminaSnp = RawDataTypes.getSafeRawDataType(Illumina.SNP_VARIANT_ID);
     220      if (illuminaSnp != experiment.getRawDataType())
     221      {
     222        message = "The experiment can't contain Illumina SNP data: " + item;
     223      }
     224      else
     225      {
     226        experiment.checkPermission(Permission.WRITE);
     227      }
     228    }   
    143229    return message;
    144230  }
     231  public void configure(GuiContext context, Request request, Response response)
     232  {
     233    String command = request.getCommand();
     234    try
     235    {
     236      if (command.equals(Request.COMMAND_CONFIGURE_JOB))
     237      {
     238        RequestInformation ri = getConfigureJobParameters(context);
     239        List<Throwable> errors = validateRequestParameters(ri.getParameters(), request);
     240        if (errors != null)
     241        {
     242          response.setError(errors.size() + " invalid parameter(s) were found in the request", errors);
     243          return;
     244        }
     245       
     246        storeValue(job, request, fileParameter);
     247        storeValue(job, request, splitFilesDirectoryParameter);
     248        storeValue(job, request, ri.getParameter(Parameters.CHARSET_PARAMETER));
     249        storeValue(job, request, ri.getParameter(Parameters.DECIMAL_SEPARATOR_PARAMETER));
     250               
     251        // Associations
     252        storeValue(job, request, ri.getParameter("experiment"));
     253        storeValue(job, request, ri.getParameter("arrayDesign"));
     254        storeValue(job, request, ri.getParameter("scan"));
     255        storeValue(job, request, ri.getParameter("protocol"));
     256        storeValue(job, request, ri.getParameter("software"));
     257       
     258        // Error handling parameters
     259        storeValue(job, request, defaultErrorParameter);
     260//        storeValue(job, request, missingReporterErrorParameter);
     261        storeValue(job, request, invalidColumnsErrorParameter);
     262        storeValue(job, request, numberFormatErrorParameter);
     263       
     264        response.setDone("Job configuration complete", Job.ExecutionTime.SHORT);
     265      }
     266    }
     267    catch (Throwable ex)
     268    {
     269      response.setError(ex.getMessage(), Arrays.asList(ex));
     270    }
     271  }
     272  public Set<GuiContext> getGuiContexts()
     273  {
     274    return guiContexts;
     275  }
     276  public RequestInformation getRequestInformation(GuiContext context, String command)
     277      throws BaseException
     278  {
     279    RequestInformation requestInformation = null;
     280    if (command.equals(Request.COMMAND_CONFIGURE_JOB))
     281    {
     282      requestInformation = getConfigureJobParameters(context);
     283    }
     284    return requestInformation;
     285  }
     286 
    145287  // ------------------------------------------
    146288 
     
    148290    From the AbstractFlatFileImporter
    149291   */
    150   protected void handleData(Data data) throws BaseException
    151   {
    152     // TODO Auto-generated method stub
    153 
     292  private RawDataType illuminaSNP;
     293  private FlatFileParser ffp;
     294  private DbControl dc;
     295  private List<MapHolder> holders;
     296  private Experiment experiment;
     297  private ArrayDesign design;
     298  private Scan scan;
     299  private Software software;
     300  private Protocol protocol;
     301  private List<RawBioAssay> rawBioAssays;
     302  private Map<String, List<Mapper>> mapper;
     303  private List<Line> headerLines;
     304  private Mapper reporterMapper;
     305  private int numInserted;
     306  private int numRawBioAssays;
     307  private NumberFormat numberFormat;
     308  private boolean nullIfException;
     309  private boolean verifyColumns;
     310//  private boolean nullIfMissingReporter;
     311 
     312  /**
     313    Initalize a FlatFileParser so it can parse Illumina SNP data files.
     314    <ul>
     315    <li>Data splitter: (\t)
     316    <li>Header regexp: (.+)=(.*?),*
     317    <li>Data header: Address\tGenTrain Score\t.*
     318    </ul>
     319   */
     320  @Override
     321  protected FlatFileParser getInitializedFlatFileParser()
     322    throws BaseException
     323  {
     324    String separator = "\\t";
     325    FlatFileParser ffp = new FlatFileParser();
     326    ffp.setDataSplitterRegexp(Pattern.compile(separator));
     327    ffp.setDataHeaderRegexp(Pattern.compile("Address"+separator + "GenTrain Score"+separator+".*"));
     328    ffp.setHeaderRegexp(Pattern.compile("(.+)=(.*?),*"));
     329    return ffp;
     330  }
     331 
     332  /**
     333    @return As specified by job parameter or "dot" if not
     334  */
     335  @Override
     336  protected String getDecimalSeparator()
     337  {
     338    String separator = super.getDecimalSeparator();
     339    if (separator == null) separator = "dot";
     340    return separator;
     341  }
     342 
     343  /**
     344    Check that the first line is the column names and that it begins with
     345    columns 'Address' and 'GenTrain Score'
     346    @return TRUE if the column names are right in the first line, FALSE
     347      otherwise
     348  */
     349  @Override
     350  protected boolean isImportable(FlatFileParser ffp)
     351  {
     352    FlatFileParser.Line firstLine = ffp.getLineCount() >= 1 ? ffp.getLine(0) : null;
     353    boolean isSNPData = firstLine != null && firstLine.type() == LineType.DATA_HEADER;
     354   
     355    return isSNPData;
     356  }
     357 
     358  @Override
     359  protected void begin(FlatFileParser ffp)
     360    throws BaseException
     361  {
     362    super.begin(ffp);
     363    this.ffp = ffp;
     364    this.illuminaSNP = RawDataTypes.getRawDataType(Illumina.SNP_VARIANT_ID);
     365    this.experiment = (Experiment)job.getValue("experiment");
     366    this.design = (ArrayDesign)job.getValue("arrayDesign");
     367    this.scan = (Scan)job.getValue("scan");
     368    this.protocol = (Protocol)job.getValue("protocol");
     369    this.software = (Software)job.getValue("software");
     370   
     371    // Setup error handling
     372    this.verifyColumns = "fail".equals(getErrorOption("invalidColumnsError"));
     373  }
     374 
     375  /**
     376    Check column headers and map them to raw bioassays.
     377    Create raw bioassays. Initialise column <code>Mapper</code>:s.
     378  */
     379  @Override
     380  protected void beginData()
     381  {
     382    File rawDataFile = (File)job.getValue("file");
     383    String splitFilesPath = (String)job.getValue("splitFileDirectory");
     384    this.dc = sc.newDbControl();
     385    try
     386    {
     387      this.splitDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_SPLITDATA_FILE_ID);
     388      this.originalDataFileType = DataFileType.getByExternalId(dc, Illumina.SNP_DATA_FILE_ID);
     389      Map<String, List<String>> sampleMappings = extractSamplesAndColumns(ffp,
     390            Arrays.asList(requiredColumnNames), verifyColumns);
     391      this.rawBioAssays = extractAndCreateRawBioAssays(dc, sampleMappings,
     392          rawDataFile, new Path(splitFilesPath, Path.Type.DIRECTORY));
     393      this.holders = new ArrayList<MapHolder>(rawBioAssays.size());
     394      for (RawBioAssay rba : rawBioAssays)
     395      {
     396        holders.add(new MapHolder(rba, ffp, sampleMappings.get(rba.getName())));
     397      }
     398      dc.commit();
     399    }
     400    finally
     401    {
     402      if (dc != null) dc.close();
     403    }
     404  }
     405
     406  protected void handleData(Data data)
     407    throws BaseException
     408  {
     409    dc = sc.newDbControl();
     410    try
     411    {     
     412      if (data.lineNo() < 5)
     413      {
     414        for (MapHolder holder : holders)
     415        {
     416          RawBioAssay rba = holder.rba;
     417//          dc.refreshItem(rba);
     418//          File splitFile = rba.getFileSet().getMember(splitDataFileType).getFile();
     419//          PrintWriter pw = new PrintWriter(splitFile.getUploadStream(false), true);
     420          PrintWriter pw = outputMapping.get(rba.getName());
     421          System.out.println(pw == null);         
     422          String separator = "";
     423          for (Mapper m : holder.mappers)
     424          {
     425            System.out.print(separator+m.getValue(data));
     426            pw.append(separator + m.getValue(data)); 
     427            separator = "\t";
     428          }
     429          pw.append("\n");
     430          pw.flush();
     431        }       
     432        dc.commit();
     433      }
     434    }
     435    finally
     436    {
     437      if (dc != null) dc.close();     
     438    }
     439  }
     440 
     441  protected void end (boolean success)
     442  {
     443    if (dc != null) dc.close();
     444    for (PrintWriter pw : outputMapping.values())
     445    {
     446      pw.close();
     447    }
    154448  }
    155449 
     
    158452  private RequestInformation getConfigureJobParameters(GuiContext context)
    159453  {
    160     // TODO Auto-generated method stub
    161     return null;
     454    net.sf.basedb.core.DbControl dc = null;
     455    try
     456    {
     457      if (configureJob == null)
     458      {
     459        List<PluginParameter<?>> parameters = new ArrayList<PluginParameter<?>>();
     460       
     461        parameters.add(fileParameter);
     462        parameters.add(splitFilesDirectoryParameter);
     463        parameters.add(Parameters.charsetParameter(null, null, null));
     464        parameters.add(Parameters.decimalSeparatorParameter(null, null,
     465            (String)job.getValue(Parameters.DECIMAL_SEPARATOR_PARAMETER)));
     466
     467        dc = sc.newDbControl();
     468        List<Scan> scans = getItems(dc, Scan.getQuery());
     469        List<Protocol> protocols = getItems(dc, Protocol.getQuery(),
     470            Restrictions.eq(
     471              Hql.property("protocolType.id"),
     472              Expressions.integer(SystemItems.getId(ProtocolType.FEATURE_EXTRACTION))
     473            )
     474          );
     475        List<Software> software = getItems(dc, Software.getQuery(),
     476          Restrictions.eq(
     477              Hql.property("softwareType.id"),
     478              Expressions.integer(SystemItems.getId(SoftwareType.FEATURE_EXTRACTION))
     479            )
     480          );
     481        List<ArrayDesign> designs = getItems(dc, ArrayDesign.getQuery(),
     482          Restrictions.gt(
     483              Hql.property("numDbFeatures"),
     484              Expressions.integer(0)
     485            )
     486          );
     487       
     488        boolean hasAssociations =
     489          context.getItem() == Item.EXPERIMENT || scans.size() > 0 ||
     490          protocols.size() > 0 || software.size() > 0 || designs.size() > 0;
     491       
     492        if (hasAssociations)
     493        {
     494          parameters.add(associationsSection);
     495          if (context.getItem() == Item.EXPERIMENT)
     496          {
     497            // Include parameter for current experiment
     498            parameters.add(new PluginParameter<Experiment>(
     499                "experiment",
     500                "Experiment",
     501                "The imported raw bioassays will be included in this experiment.",
     502                new ItemParameterType<Experiment>(Experiment.class, null, true, 1, null)
     503              ));
     504          }
     505          if (!designs.isEmpty())
     506          {
     507            parameters.add(new PluginParameter<ArrayDesign>(
     508              "arrayDesign",
     509              "Array design",
     510              "The imported raw bioassays will be linked to the selected array design.",
     511              new ItemParameterType<ArrayDesign>(ArrayDesign.class, null, false, 1, designs)
     512            ));
     513          }
     514          if (!scans.isEmpty())
     515          {
     516            parameters.add(new PluginParameter<Scan>(
     517              "scan",
     518              "Scan",
     519              "The imported raw bioassays will be linked to the selected scan.",
     520              new ItemParameterType<Scan>(Scan.class, null, false, 1, scans)
     521            ));
     522          }
     523          if (!protocols.isEmpty())
     524          {
     525            parameters.add(new PluginParameter<Protocol>(
     526              "protocol",
     527              "Protocol",
     528              "The imported raw bioassays will be linked to the selected protocol.",
     529              new ItemParameterType<Protocol>(Protocol.class, null, false, 1, protocols)
     530            ));
     531          }
     532          if (!software.isEmpty())
     533          {
     534            parameters.add(new PluginParameter<Software>(
     535              "software",
     536              "Software",
     537              "The imported raw bioassays will be linked to the selected software.",
     538              new ItemParameterType<Software>(Software.class, null, false, 1, software)
     539            ));
     540          }
     541        }
     542       
     543        // Error handling parameters
     544        parameters.add(errorSection);
     545        parameters.add(defaultErrorParameter);
     546        parameters.add(invalidColumnsErrorParameter);
     547        parameters.add(numberFormatErrorParameter);
     548       
     549        configureJob = new RequestInformation
     550        (
     551          Request.COMMAND_CONFIGURE_JOB,
     552          "Select the file to import raw data from",
     553          "You must select a file to use for import. Most other options are optional.",
     554          parameters
     555        );
     556      }
     557    }
     558    finally
     559    {
     560      if (dc != null) dc.close();
     561    }
     562   
     563    return configureJob;
     564  }
     565 
     566  /**
     567    Sort the items by name and add USE permission filter to the query.
     568  */
     569  private <T extends BasicItem> List<T> getItems(DbControl dc, ItemQuery<T> query, Restriction... restrictions)
     570  {
     571    query.order(Orders.asc(Hql.property("name")));
     572    query.include(Include.MINE, Include.SHARED, Include.IN_PROJECT);
     573    query.setItemPermission(Permission.USE);
     574    if (restrictions != null && restrictions.length > 0)
     575    {
     576      query.restrict(Restrictions.and(restrictions));
     577    }
     578    return query.list(dc);
     579  }
     580 
     581  /**
     582    Extracts sample names and property names from the column headers.
     583    Create a raw bioassay for each array and set headers, scan, software
     584    and protocol.
     585    @return A List with created raw bioassays.
     586   */
     587  private List<RawBioAssay> extractAndCreateRawBioAssays(DbControl dc,
     588      Map<String, List<String>> sampleMappings, File rawDataFile, Path splitFilesPath)
     589  {
     590    //The created raw bioassays
     591    List<RawBioAssay> createdRba = new LinkedList<RawBioAssay>();
     592   
     593    PlatformVariant snpVariant = PlatformVariant.getByExternalId(dc, Illumina.SNP_VARIANT_ID);
     594   
     595    //The directory to place the split files in
     596    Directory splitDir = null;
     597    if (splitFilesPath != null)
     598    {
     599      splitDir = Directory.getByPath(dc, splitFilesPath);
     600    }
     601    else
     602    {
     603      dc.refreshItem(rawDataFile);
     604      splitDir = rawDataFile.getDirectory().newSubDirectory();
     605      splitDir.setName(rawDataFile.getName());
     606      dc.saveItem(splitDir);
     607    }   
     608    //Create raw bio assays and files with columns set.
     609    for (Map.Entry<String, List<String>> entry : sampleMappings.entrySet())
     610    {
     611      File splitFile = File.getFile(dc, splitDir, entry.getKey()+".split", true);
     612      dc.saveItem(splitFile);
     613     
     614      PrintWriter pw = new PrintWriter(splitFile.getUploadStream(false), true);
     615      outputMapping.put(entry.getKey(), pw);
     616      String separator = "";
     617      for (String column : entry.getValue())
     618      {
     619        String fileSpecificColumnName = getFileSpecificColumnName(column);
     620        pw.print(separator + fileSpecificColumnName);
     621        separator = "\t";
     622      }
     623      pw.println();
     624      pw.flush();
     625           
     626      RawBioAssay rba = RawBioAssay.getNew(dc, snpVariant, illuminaSNP);         
     627      rba.setName(entry.getKey());
     628      if (design != null) rba.setArrayDesign(design);
     629      if (scan != null) rba.setScan(scan);
     630      if (protocol != null) rba.setProtocol(protocol);
     631      if (software != null) rba.setSoftware(software);
     632      if (rawDataFile != null) rba.getFileSet().setMember(rawDataFile, originalDataFileType);
     633      if (splitFile != null) rba.getFileSet().setMember(splitFile, splitDataFileType);
     634      rba.setDescription("Raw bioassay for sample " + entry.getKey() + " in file " + rawDataFile.getName());
     635      dc.saveItem(rba);
     636      createdRba.add(rba);
     637    }
     638   
     639    return createdRba;
     640  }
     641 
     642  private Map<String, List<String>> extractSamplesAndColumns (FlatFileParser ffp, 
     643      List<String> requiredColumns, boolean verifyColumns)
     644  {   
     645    Map<String, List<String>> sampleMappings = new HashMap<String, List<String>>();   
     646//    //Holds column names common for all samples
     647//    Map <String, Mapper> commonCol = new HashMap<String, Mapper>();
     648   
     649    List<String> commonCol = new ArrayList<String>();
     650   
     651    List<String> columnHeaders = ffp.getColumnHeaders();
     652    //Extract column headers common for all samples
     653    for (String header : columnHeaders)
     654    {
     655      String[] splitHeader = header.split("\\.");
     656      if (splitHeader.length == 1)
     657      {
     658        commonCol.add(header);
     659      }
     660    }
     661   
     662    //Extract the sample names and sample specific columns.
     663    for (String header : columnHeaders)
     664    {
     665      String[] splitHeader = header.split("\\.");
     666      //Header with two parts looks like this: sampleName.columnName
     667      if (splitHeader.length == 2)
     668      {
     669        String sampleName = splitHeader[0];
     670        if (!sampleMappings.containsKey(sampleName))
     671        {
     672          sampleMappings.put(sampleName, new ArrayList<String>(commonCol));
     673        }       
     674        sampleMappings.get(sampleName).add(header);
     675      }
     676    }
     677   
     678    //VerifyColumns
     679    for (Map.Entry<String, List<String>> entry : sampleMappings.entrySet())
     680    {
     681      //Verify columns - if selected
     682      if (verifyColumns)
     683      {
     684        for (String required : requiredColumns)
     685        {
     686          String fullColumnName = required.startsWith(".") ? entry.getKey()+required : required;
     687          if (!entry.getValue().contains(fullColumnName))
     688          {
     689            throw new InvalidDataException("Missing data column: '" + fullColumnName + "' for one of the raw bio assays");
     690          }
     691        }
     692      }     
     693    }   
     694    return sampleMappings;
     695  }
     696 
     697
     698  private static String getFileSpecificColumnName(String fullColumnName)
     699  {
     700    int dotIndex = fullColumnName.indexOf(".");
     701    return dotIndex > -1 ? fullColumnName.substring(dotIndex+1) : fullColumnName;
     702  }
     703   
     704  private class MapHolder
     705  {
     706    private final RawBioAssay rba;
     707    private final List<Mapper> mappers;
     708   
     709    private MapHolder(RawBioAssay rba, FlatFileParser ffp, List<String> colNames)
     710    {
     711      this.rba = rba;
     712      mappers = new ArrayList<Mapper>();
     713      createMappers(ffp, colNames);
     714    }
     715   
     716    private void createMappers(FlatFileParser ffp, List<String> colNames)
     717    {
     718      for (String colName : colNames)
     719      {
     720        Integer colIndex = ffp.getColumnHeaderIndex(colName);
     721        if (colIndex != null)
     722        {
     723          mappers.add(new ColumnMapper(colIndex, colName));
     724        }
     725      }
     726    }
    162727  }
    163728}
Note: See TracChangeset for help on using the changeset viewer.