Changeset 569


Ignore:
Timestamp:
Feb 5, 2008, 4:51:36 PM (15 years ago)
Author:
Martin Svensson
Message:

References #98 The validator and meta data reader is done. The installation routine needs to be updated before this ticket is finished.

Location:
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/Illumina.java

    r566 r569  
    8080  */
    8181  public static final String BGX_FILE_ID = "illumina.bgx";
     82
     83  /**
     84    Class name of the BGX validator and meta data extractor
     85   */
     86  public static final String BGX_VALIDATOR_METADATAREADER = "net.sf.basedb.illumina.filehandler.BgxFileHandler"; 
     87 
    8288 
    8389  /**
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/filehandler/BgxFileHandler.java

    r567 r569  
    1 /*
     1/**
    22  $Id$
    33
     
    2424package net.sf.basedb.illumina.filehandler;
    2525
    26 import net.sf.basedb.core.Config;
    27 import net.sf.basedb.core.DataFileType;
     26import net.sf.basedb.core.ArrayDesign;
    2827import net.sf.basedb.core.DbControl;
    2928import net.sf.basedb.core.File;
     
    3332import net.sf.basedb.core.InvalidUseOfNullException;
    3433import net.sf.basedb.core.Location;
    35 import net.sf.basedb.core.RawBioAssay;
    3634import net.sf.basedb.core.filehandler.AbstractDataFileHandler;
    3735import net.sf.basedb.core.filehandler.DataFileMetadataReader;
     
    3937import net.sf.basedb.illumina.Illumina;
    4038import net.sf.basedb.util.FileUtil;
    41 import net.sf.basedb.util.InputStreamTracker;
    4239
    4340import java.io.BufferedReader;
     
    4744import java.io.PushbackInputStream;
    4845import java.util.ArrayList;
    49 import java.util.Collections;
    50 import java.util.HashSet;
    5146import java.util.List;
    5247import java.util.regex.Matcher;
     
    5550
    5651/**
     52  A tool to validate and extract meta data from a BGX-file.
     53  The validator parse through the headings in a given file and
     54  fails if it does not find the columns 'Probe_Id' and 'Array_Address_Id'
     55  in the section that comes first of '[Probes]' and '[Controls]'
     56  after a certain number of lines.
     57  The extracted meta data is the number of controls given by one of the headings.
    5758    @author Martin
    5859    @version 2.6
    59  
    6060 */
    6161public class BgxFileHandler
     
    6363  implements DataFileValidator, DataFileMetadataReader
    6464{
     65  //Max number of lines to parse
     66  private final int linesToParse = 100;
     67  //The start of the heading that holds the number of controls
     68  private String controlsHeading = "Number of Controls\t";
     69 
     70  //List to hold the parsed lines.
     71  private List<String> parsedHeadings = null;
     72 
     73  //Column names
     74  private String[] requiredColumns = {"Probe_Id", "Array_Address_Id"};
    6575 
    6676  public BgxFileHandler()
    6777  {}
    6878 
    69   /**
    70     Checks that it is really a bgx file
    71     that is used.
     79  /*
     80      @see net.sf.basedb.core.filehandler.DataFileValidator#validate(net.sf.basedb.core.DbControl)
    7281   */
    7382  @Override
     
    7584    throws InvalidDataException, InvalidRelationException
    7685  {
     86    ArrayDesign design = (ArrayDesign)getItem();
     87    if (design.getPlatform() == null || !(Illumina.PLATFORM_ID.equals(design.getPlatform().getExternalId())))
     88    {
     89      throw new InvalidDataException("Array design '" + design.getName() + "' is not using Illumina platform;");
     90    }
    7791    FileSetMember bgxMember = getMember(Illumina.BGX_FILE_ID);
    78     InputStream in = null;
    7992    if (bgxMember != null)
    8093    {
    8194      File bgxFile = bgxMember.getFile();
    82       if (bgxFile == null)
    83       {
    84         throw new InvalidUseOfNullException("BGX-file");
    85       }
    86       if (bgxFile.getLocation() != Location.PRIMARY)
    87       {
    88         throw new InvalidUseOfNullException("Data is not online for file " +
    89             bgxFile.getName() + " location = '" + bgxFile.getLocation() + "'");
    90       }
    91       try
    92       {
    93         in = wrapInputStream(bgxFile.getDownloadStream(0));
    94       }
    95       catch(IOException ioex)
    96       {
    97         throw new InvalidDataException("BGX-file could not be read properly.");
    98       }
    99       validateHeaders(in, null);
    100     }
    101   }
    102 
    103   /*  (non-Javadoc)
     95      parsedHeadings = parseHeadings(bgxFile, linesToParse);
     96    }   
     97  }
     98
     99  /* 
    104100      @see net.sf.basedb.core.filehandler.DataFileMetadataReader#extractMetadata(net.sf.basedb.core.DbControl)
    105101   */
     
    107103  public void extractMetadata(DbControl dc)
    108104  {
    109     // TODO Auto-generated method stub
    110 
    111   }
    112 
    113   /*  (non-Javadoc)
     105    if (parsedHeadings == null)
     106    {
     107      FileSetMember bgxMember = getMember(Illumina.BGX_FILE_ID);
     108      if (bgxMember != null)
     109      {
     110        parsedHeadings = parseHeadings(bgxMember.getFile(), linesToParse);
     111      }     
     112    }
     113    if (parsedHeadings != null)
     114    {
     115      ArrayDesign design = (ArrayDesign)getItem();
     116      int numFeatures = getNumFeatures(parsedHeadings);
     117      design.setNumFileFeatures(numFeatures);
     118    }
     119  }
     120
     121  /*
    114122      @see net.sf.basedb.core.filehandler.DataFileMetadataReader#resetMetadata(net.sf.basedb.core.DbControl)
    115123   */
     
    117125  public void resetMetadata(DbControl dc)
    118126  {
    119     // TODO Auto-generated method stub
    120 
    121   }
    122  
     127    ArrayDesign design = (ArrayDesign)getItem();
     128    design.setNumFileFeatures(0);
     129  }
     130 
     131  /*
     132    Gets a right type of input stream depending on if
     133    it comes from a packed or unpacked file.
     134   */
    123135  private InputStream wrapInputStream (InputStream in)
    124136    throws IOException
     
    137149  }
    138150 
    139   /**
    140     Looks for defined header lines in an input stream
    141     from a bgx-file. The defined lines must come in the
    142     same order as they are expected from the input stream.
    143     Each line is matched through a regexp.
    144     @param in The input stream to read and validate.
    145     @param headerLines Strings to match with the lines
    146       from the input stream.
    147    */
    148   private void validateHeaders(InputStream in, List<String> headerLines)
    149   {
    150     if (in == null) throw new InvalidUseOfNullException("in");
    151     if (headerLines == null) throw new InvalidUseOfNullException("headerLines");
     151  /*
     152    Takes a file, packed(gzip) or not packed and parse certain amount of lines.
     153    Parsed heading lines will be stored in a list.
     154    If one of [Probes] or [Controls] section is found, the next line in file will
     155    be checked.
     156    It must contain required (by the feature importer and reporter importer) columns.
     157   */
     158  private List<String> parseHeadings(File bgxFile, int range)
     159  {
     160    if (bgxFile == null)
     161    {
     162      throw new InvalidUseOfNullException("bgxFile");
     163    }
     164    if (bgxFile.getLocation() != Location.PRIMARY)
     165    {
     166      throw new InvalidUseOfNullException("Data is not online for file " +
     167          bgxFile.getName() + " location = '" + bgxFile.getLocation() + "'");
     168    }
     169    InputStream in = null;
    152170    try
    153171    {
    154       BufferedReader reader = new BufferedReader(new InputStreamReader(in, Config.getCharset()));
    155       String line = null;
    156       for (String header : headerLines)
     172      in = wrapInputStream(bgxFile.getDownloadStream(0));
     173    }
     174    catch(IOException ioex)
     175    {
     176      throw new InvalidDataException("BGX-file could not be read properly.");
     177    }
     178    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
     179    List<String> headings = new ArrayList<String>();
     180    boolean valid = false;
     181    String line = null;
     182    do
     183    {
     184      try
    157185      {
    158186        line = reader.readLine();
    159         Pattern pattern = Pattern.compile(header);
    160         Matcher m = pattern.matcher(line);
    161         if (!m.matches())
     187        headings.add(line);
     188        if (line.matches("\\[Probes\\]") || line.matches("\\[Controls\\]"))
    162189        {
    163           throw new InvalidDataException("Line " + (headerLines.indexOf(header)+1) + " in file does not match with BGX-header:" + header);
     190          line = reader.readLine();
     191          valid = true;
     192          for (String column : requiredColumns)
     193          {
     194            valid = line.contains("\t" + column + "\t") ? valid : false;
     195          }
    164196        }
    165197      }
    166      
    167     }
    168     catch (IOException ioex)
    169     {
    170       throw new InvalidDataException("Headers could not be validated due to error on when reading file");
    171     }
    172    
    173   }
     198      catch(IOException ioex)
     199      {
     200        line = null;
     201      }     
     202    }while(!valid && (headings.size() < range) && line != null);
     203    if (!valid)
     204    {
     205      throw new InvalidDataException("Could not find required parts in the file");
     206    }   
     207    return headings;
     208  }
     209 
     210  private int getNumFeatures(List<String> headings)
     211  {
     212    int numControls = 0;
     213    if (headings != null && !(headings.size()<1))
     214    {   
     215      for (String s : headings)
     216      {
     217        if (s.startsWith(controlsHeading))
     218        {
     219          try
     220          {
     221            numControls = Integer.parseInt(s.substring((controlsHeading.length()-1)).trim());
     222          }
     223          catch (NumberFormatException nex)
     224          {
     225            throw new InvalidDataException(nex);
     226          }
     227        }
     228      }
     229    }
     230    return numControls;
     231  }
    174232}
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/install/Install.java

    r566 r569  
    346346        Illumina.BGX_FILE_ID, "BGX file", "Illumina BGX file",
    347347        Item.ARRAYDESIGN, BGX_FILEYTPE_EXTENSION, reporterMapType, ignoreExistingItems);
     348      bgx.setValidatorClass(Illumina.BGX_VALIDATOR_METADATAREADER);
     349      bgx.setMetadataReaderClass(Illumina.BGX_VALIDATOR_METADATAREADER);
    348350
    349351      for (int i = 1; i <= largestNumOfStrips; i++ )
     
    427429 
    428430    DataFileType dft = null;
     431    String jarPath = PluginDefinition.getByClassName(dc, this.getClass().getName()).getJarPath();
    429432    try
    430433    {
     
    437440      dft = DataFileType.getByExternalId(dc, externalId);
    438441      if (!dft.getExtension().equals(extension)) message = dft.getName() + " has invalid extension";
    439       if (!dft.getItemType().equals(itemType)) message = dft.getName() + " has invalid item type";
    440       if (message != null) throw new ItemAlreadyExistsException(message);
     442      if (!dft.getItemType().equals(itemType)) message = dft.getName() + " has invalid item type"; 
     443      if (dft.getMetadataReaderJarPath() == null)
     444      {
     445        dft.setMetadataReaderJarPath(jarPath);         
     446      }
     447      if (dft.getValidatorJarPath() == null)
     448      {
     449        dft.setValidatorJarPath(jarPath);   
     450      }
     451      if (!dft.getValidatorJarPath().equals(jarPath) || !dft.getMetadataReaderJarPath().equals(jarPath))
     452      {
     453        message = dft.getName() + " has invalid jar path.";
     454      }
     455      if (message != null) throw new ItemAlreadyExistsException(message + ". This can not be used.");
    441456     
    442457    }
     
    449464      dft.setDescription(description);
    450465      dft.setGenericType(genericType);
     466      dft.setValidatorJarPath(jarPath);
     467      dft.setMetadataReaderJarPath(jarPath);
    451468    }
    452469    return dft;
Note: See TracChangeset for help on using the changeset viewer.