Changeset 569
- Timestamp:
- Feb 5, 2008, 4:51:36 PM (15 years ago)
- Location:
- trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/Illumina.java
r566 r569 80 80 */ 81 81 public static final String BGX_FILE_ID = "illumina.bgx"; 82 83 /** 84 Class name of the BGX validator and meta data extractor 85 */ 86 public static final String BGX_VALIDATOR_METADATAREADER = "net.sf.basedb.illumina.filehandler.BgxFileHandler"; 87 82 88 83 89 /** -
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/filehandler/BgxFileHandler.java
r567 r569 1 /* 1 /** 2 2 $Id$ 3 3 … … 24 24 package net.sf.basedb.illumina.filehandler; 25 25 26 import net.sf.basedb.core.Config; 27 import net.sf.basedb.core.DataFileType; 26 import net.sf.basedb.core.ArrayDesign; 28 27 import net.sf.basedb.core.DbControl; 29 28 import net.sf.basedb.core.File; … … 33 32 import net.sf.basedb.core.InvalidUseOfNullException; 34 33 import net.sf.basedb.core.Location; 35 import net.sf.basedb.core.RawBioAssay;36 34 import net.sf.basedb.core.filehandler.AbstractDataFileHandler; 37 35 import net.sf.basedb.core.filehandler.DataFileMetadataReader; … … 39 37 import net.sf.basedb.illumina.Illumina; 40 38 import net.sf.basedb.util.FileUtil; 41 import net.sf.basedb.util.InputStreamTracker;42 39 43 40 import java.io.BufferedReader; … … 47 44 import java.io.PushbackInputStream; 48 45 import java.util.ArrayList; 49 import java.util.Collections;50 import java.util.HashSet;51 46 import java.util.List; 52 47 import java.util.regex.Matcher; … … 55 50 56 51 /** 52 A tool to validate and extract meta data from a BGX-file. 53 The validator parse through the headings in a given file and 54 fails if it does not find the columns 'Probe_Id' and 'Array_Address_Id' 55 in the section that comes first of '[Probes]' and '[Controls]' 56 after a certain number of lines. 57 The extracted meta data is the number of controls given by one of the headings. 57 58 @author Martin 58 59 @version 2.6 59 60 60 */ 61 61 public class BgxFileHandler … … 63 63 implements DataFileValidator, DataFileMetadataReader 64 64 { 65 //Max number of lines to parse 66 private final int linesToParse = 100; 67 //The start of the heading that holds the number of controls 68 private String controlsHeading = "Number of Controls\t"; 69 70 //List to hold the parsed lines. 71 private List<String> parsedHeadings = null; 72 73 //Column names 74 private String[] requiredColumns = {"Probe_Id", "Array_Address_Id"}; 65 75 66 76 public BgxFileHandler() 67 77 {} 68 78 69 /** 70 Checks that it is really a bgx file 71 that is used. 79 /* 80 @see net.sf.basedb.core.filehandler.DataFileValidator#validate(net.sf.basedb.core.DbControl) 72 81 */ 73 82 @Override … … 75 84 throws InvalidDataException, InvalidRelationException 76 85 { 86 ArrayDesign design = (ArrayDesign)getItem(); 87 if (design.getPlatform() == null || !(Illumina.PLATFORM_ID.equals(design.getPlatform().getExternalId()))) 88 { 89 throw new InvalidDataException("Array design '" + design.getName() + "' is not using Illumina platform;"); 90 } 77 91 FileSetMember bgxMember = getMember(Illumina.BGX_FILE_ID); 78 InputStream in = null;79 92 if (bgxMember != null) 80 93 { 81 94 File bgxFile = bgxMember.getFile(); 82 if (bgxFile == null) 83 { 84 throw new InvalidUseOfNullException("BGX-file"); 85 } 86 if (bgxFile.getLocation() != Location.PRIMARY) 87 { 88 throw new InvalidUseOfNullException("Data is not online for file " + 89 bgxFile.getName() + " location = '" + bgxFile.getLocation() + "'"); 90 } 91 try 92 { 93 in = wrapInputStream(bgxFile.getDownloadStream(0)); 94 } 95 catch(IOException ioex) 96 { 97 throw new InvalidDataException("BGX-file could not be read properly."); 98 } 99 validateHeaders(in, null); 100 } 101 } 102 103 /* (non-Javadoc) 95 parsedHeadings = parseHeadings(bgxFile, linesToParse); 96 } 97 } 98 99 /* 104 100 @see net.sf.basedb.core.filehandler.DataFileMetadataReader#extractMetadata(net.sf.basedb.core.DbControl) 105 101 */ … … 107 103 public void extractMetadata(DbControl dc) 108 104 { 109 // TODO Auto-generated method stub 110 111 } 112 113 /* (non-Javadoc) 105 if (parsedHeadings == null) 106 { 107 FileSetMember bgxMember = getMember(Illumina.BGX_FILE_ID); 108 if (bgxMember != null) 109 { 110 parsedHeadings = parseHeadings(bgxMember.getFile(), linesToParse); 111 } 112 } 113 if (parsedHeadings != null) 114 { 115 ArrayDesign design = (ArrayDesign)getItem(); 116 int numFeatures = getNumFeatures(parsedHeadings); 117 design.setNumFileFeatures(numFeatures); 118 } 119 } 120 121 /* 114 122 @see net.sf.basedb.core.filehandler.DataFileMetadataReader#resetMetadata(net.sf.basedb.core.DbControl) 115 123 */ … … 117 125 public void resetMetadata(DbControl dc) 118 126 { 119 // TODO Auto-generated method stub 120 121 } 122 127 ArrayDesign design = (ArrayDesign)getItem(); 128 design.setNumFileFeatures(0); 129 } 130 131 /* 132 Gets a right type of input stream depending on if 133 it comes from a packed or unpacked file. 134 */ 123 135 private InputStream wrapInputStream (InputStream in) 124 136 throws IOException … … 137 149 } 138 150 139 /** 140 Looks for defined header lines in an input stream 141 from a bgx-file. The defined lines must come in the 142 same order as they are expected from the input stream. 143 Each line is matched through a regexp. 144 @param in The input stream to read and validate. 145 @param headerLines Strings to match with the lines 146 from the input stream. 147 */ 148 private void validateHeaders(InputStream in, List<String> headerLines) 149 { 150 if (in == null) throw new InvalidUseOfNullException("in"); 151 if (headerLines == null) throw new InvalidUseOfNullException("headerLines"); 151 /* 152 Takes a file, packed(gzip) or not packed and parse certain amount of lines. 153 Parsed heading lines will be stored in a list. 154 If one of [Probes] or [Controls] section is found, the next line in file will 155 be checked. 156 It must contain required (by the feature importer and reporter importer) columns. 157 */ 158 private List<String> parseHeadings(File bgxFile, int range) 159 { 160 if (bgxFile == null) 161 { 162 throw new InvalidUseOfNullException("bgxFile"); 163 } 164 if (bgxFile.getLocation() != Location.PRIMARY) 165 { 166 throw new InvalidUseOfNullException("Data is not online for file " + 167 bgxFile.getName() + " location = '" + bgxFile.getLocation() + "'"); 168 } 169 InputStream in = null; 152 170 try 153 171 { 154 BufferedReader reader = new BufferedReader(new InputStreamReader(in, Config.getCharset())); 155 String line = null; 156 for (String header : headerLines) 172 in = wrapInputStream(bgxFile.getDownloadStream(0)); 173 } 174 catch(IOException ioex) 175 { 176 throw new InvalidDataException("BGX-file could not be read properly."); 177 } 178 BufferedReader reader = new BufferedReader(new InputStreamReader(in)); 179 List<String> headings = new ArrayList<String>(); 180 boolean valid = false; 181 String line = null; 182 do 183 { 184 try 157 185 { 158 186 line = reader.readLine(); 159 Pattern pattern = Pattern.compile(header); 160 Matcher m = pattern.matcher(line); 161 if (!m.matches()) 187 headings.add(line); 188 if (line.matches("\\[Probes\\]") || line.matches("\\[Controls\\]")) 162 189 { 163 throw new InvalidDataException("Line " + (headerLines.indexOf(header)+1) + " in file does not match with BGX-header:" + header); 190 line = reader.readLine(); 191 valid = true; 192 for (String column : requiredColumns) 193 { 194 valid = line.contains("\t" + column + "\t") ? valid : false; 195 } 164 196 } 165 197 } 166 167 } 168 catch (IOException ioex) 169 { 170 throw new InvalidDataException("Headers could not be validated due to error on when reading file"); 171 } 172 173 } 198 catch(IOException ioex) 199 { 200 line = null; 201 } 202 }while(!valid && (headings.size() < range) && line != null); 203 if (!valid) 204 { 205 throw new InvalidDataException("Could not find required parts in the file"); 206 } 207 return headings; 208 } 209 210 private int getNumFeatures(List<String> headings) 211 { 212 int numControls = 0; 213 if (headings != null && !(headings.size()<1)) 214 { 215 for (String s : headings) 216 { 217 if (s.startsWith(controlsHeading)) 218 { 219 try 220 { 221 numControls = Integer.parseInt(s.substring((controlsHeading.length()-1)).trim()); 222 } 223 catch (NumberFormatException nex) 224 { 225 throw new InvalidDataException(nex); 226 } 227 } 228 } 229 } 230 return numControls; 231 } 174 232 } -
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/install/Install.java
r566 r569 346 346 Illumina.BGX_FILE_ID, "BGX file", "Illumina BGX file", 347 347 Item.ARRAYDESIGN, BGX_FILEYTPE_EXTENSION, reporterMapType, ignoreExistingItems); 348 bgx.setValidatorClass(Illumina.BGX_VALIDATOR_METADATAREADER); 349 bgx.setMetadataReaderClass(Illumina.BGX_VALIDATOR_METADATAREADER); 348 350 349 351 for (int i = 1; i <= largestNumOfStrips; i++ ) … … 427 429 428 430 DataFileType dft = null; 431 String jarPath = PluginDefinition.getByClassName(dc, this.getClass().getName()).getJarPath(); 429 432 try 430 433 { … … 437 440 dft = DataFileType.getByExternalId(dc, externalId); 438 441 if (!dft.getExtension().equals(extension)) message = dft.getName() + " has invalid extension"; 439 if (!dft.getItemType().equals(itemType)) message = dft.getName() + " has invalid item type"; 440 if (message != null) throw new ItemAlreadyExistsException(message); 442 if (!dft.getItemType().equals(itemType)) message = dft.getName() + " has invalid item type"; 443 if (dft.getMetadataReaderJarPath() == null) 444 { 445 dft.setMetadataReaderJarPath(jarPath); 446 } 447 if (dft.getValidatorJarPath() == null) 448 { 449 dft.setValidatorJarPath(jarPath); 450 } 451 if (!dft.getValidatorJarPath().equals(jarPath) || !dft.getMetadataReaderJarPath().equals(jarPath)) 452 { 453 message = dft.getName() + " has invalid jar path."; 454 } 455 if (message != null) throw new ItemAlreadyExistsException(message + ". This can not be used."); 441 456 442 457 } … … 449 464 dft.setDescription(description); 450 465 dft.setGenericType(genericType); 466 dft.setValidatorJarPath(jarPath); 467 dft.setMetadataReaderJarPath(jarPath); 451 468 } 452 469 return dft;
Note: See TracChangeset
for help on using the changeset viewer.