Changeset 562
- Timestamp:
- Jan 30, 2008, 2:46:49 PM (15 years ago)
- Location:
- trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins/ScanDataImporter.java
r560 r562 25 25 package net.sf.basedb.illumina.plugins; 26 26 27 import java.io.IOException; 28 import java.io.InputStream; 27 29 import java.util.ArrayList; 28 30 import java.util.Arrays; 29 31 import java.util.Collections; 32 import java.util.HashMap; 30 33 import java.util.List; 34 import java.util.Map; 31 35 import java.util.Set; 36 import java.util.regex.Pattern; 32 37 33 38 import net.sf.basedb.core.ArrayDesign; … … 35 40 import net.sf.basedb.core.DataFileType; 36 41 import net.sf.basedb.core.DbControl; 42 import net.sf.basedb.core.FeatureIdentificationMethod; 37 43 import net.sf.basedb.core.File; 38 44 import net.sf.basedb.core.FileParameterType; 45 import net.sf.basedb.core.FileStoreEnabled; 39 46 import net.sf.basedb.core.FileStoreUtil; 40 47 import net.sf.basedb.core.InvalidDataException; … … 52 59 import net.sf.basedb.core.ProgressReporter; 53 60 import net.sf.basedb.core.RawBioAssay; 61 import net.sf.basedb.core.RawDataBatcher; 54 62 import net.sf.basedb.core.RawDataType; 55 63 import net.sf.basedb.core.RequestInformation; 56 64 import net.sf.basedb.core.StringParameterType; 65 import net.sf.basedb.core.data.RawData; 57 66 import net.sf.basedb.core.plugin.About; 58 67 import net.sf.basedb.core.plugin.AboutImpl; … … 65 74 import net.sf.basedb.core.query.Hql; 66 75 import net.sf.basedb.core.query.Restrictions; 76 import net.sf.basedb.core.signal.SignalException; 77 import net.sf.basedb.core.signal.SignalHandler; 78 import net.sf.basedb.core.signal.SignalTarget; 79 import net.sf.basedb.core.signal.ThreadSignalHandler; 67 80 import net.sf.basedb.illumina.Illumina; 68 81 import net.sf.basedb.plugins.util.Parameters; 82 import net.sf.basedb.util.MD5; 69 83 import net.sf.basedb.util.parser.FlatFileParser; 70 84 71 85 /** 72 Plug-in that import raw scandata file. 86 Plug-in that import raw scandata files. This plug-in support one or more 87 files each one containing data from one stripe on the glass. If more 88 than one file is given the data for spots with the same Illumicode (=Array_Address_Id) 89 are merged into a single value. See 90 http://www.blackwell-synergy.com/doi/pdf/10.1111/j.1467-9639.1994.tb00697.x 91 for the formula used to merg of the variance. 92 <p> 93 The format of the scandata files MUST be of this form: 94 <pre class="code"> 95 Illumicode,N,Mean GRN,Dev GRN 96 10008,26,222,47 97 10010,16,57,11 98 10014,16,56,13 99 .... 100 </pre> 101 102 The Illumicode value will be left-padded with zeroes to a total length of 103 10 characters to match the values for Array_Address_Id in the BGX files. 104 73 105 @author nicklas 74 106 */ 75 107 public class ScanDataImporter 76 108 extends AbstractPlugin 77 implements InteractivePlugin 109 implements InteractivePlugin, SignalTarget 78 110 { 79 111 … … 105 137 ); 106 138 107 108 /**109 The parser we are using.110 */111 private FlatFileParser ffp;112 113 139 /** 114 140 Configuration parameters … … 125 151 */ 126 152 private List<PluginParameter<File>> fileParameters; 153 154 private ThreadSignalHandler signalHandler; 127 155 128 156 public ScanDataImporter() … … 147 175 public boolean supportsConfigurations() 148 176 { 149 return false;150 }151 177 return super.supportsConfigurations(); 178 //return false; 179 } 152 180 @Override 153 181 public MainType getMainType() 154 182 { 155 183 return MainType.IMPORT; 156 } 184 } 157 185 @Override 158 186 public void run(Request request, Response response, ProgressReporter progress) 159 187 { 160 188 if (signalHandler != null) signalHandler.setWorkerThread(null); 189 DbControl dc = sc.newDbControl(); 190 try 191 { 192 if (progress != null) progress.display(0, "Loading items..."); 193 RawBioAssay rba = (RawBioAssay)job.getValue("rawBioAssay"); 194 rba = RawBioAssay.getById(dc, rba.getId()); 195 196 List<PlatformFileType> fileTypes = getPlatformFileTypes(dc, rba); 197 List<File> files = new ArrayList<File>(fileTypes.size()); 198 for (PlatformFileType pft : fileTypes) 199 { 200 DataFileType dft = pft.getDataFileType(); 201 File f = (File)job.getValue("file." + dft.getExternalId()); 202 if (f != null) files.add(f); 203 } 204 205 importScandata(rba, files, progress); 206 dc.commit(); 207 208 if (progress != null) progress.display(100, ""); 209 response.setDone("ok"); 210 } 211 catch (Throwable t) 212 { 213 response.setError(t.getMessage(), Arrays.asList(t)); 214 } 215 finally 216 { 217 if (dc != null) dc.close(); 218 } 161 219 } 162 220 // ------------------------------------------- … … 278 336 } 279 337 // ------------------------------------------- 338 /* 339 From the SignalTarget interface 340 ------------------------------------------- 341 */ 342 @Override 343 public SignalHandler getSignalHandler() 344 { 345 signalHandler = new ThreadSignalHandler(); 346 return signalHandler; 347 } 348 // ------------------------------------------- 280 349 281 350 … … 308 377 309 378 fileParameters = new ArrayList<PluginParameter<File>>(); 310 Platform p = rba.getPlatform(); 311 PlatformVariant v = rba.getVariant(); 312 ItemQuery<PlatformFileType> query = p.getFileTypes(v, true); 313 query.restrict(Restrictions.eq(Hql.property("dataFileType.itemType"), 314 Expressions.integer(Item.RAWBIOASSAY.getValue()))); 315 for (PlatformFileType pft : query.list(dc)) 379 List<PlatformFileType> fileTypes = getPlatformFileTypes(dc, rba); 380 381 for (PlatformFileType pft : fileTypes) 316 382 { 317 383 DataFileType dft = pft.getDataFileType(); … … 337 403 } 338 404 339 /*340 // The raw data file to import from - if a file already hase341 // been attached to the raw bioassay use it as a default choice342 PluginParameter<File> fileParameter = new PluginParameter<File>(343 "file",344 "Scandata file",345 "The file that contains the raw data that you want to import",346 new FileParameterType(rawDataFiles == null || rawDataFiles.isEmpty() ?347 null : rawDataFiles.get(0), true, 1)348 );349 parameters.add(fileParameter);350 */351 352 405 parameters.add(Parameters.charsetParameter(null, null, null)); 353 406 parameters.add(Parameters.decimalSeparatorParameter(null, null, null)); … … 371 424 } 372 425 return configureJob; 373 } 426 } 427 428 public FlatFileParser createFlatFileParser() 429 { 430 FlatFileParser ffp = new FlatFileParser(); 431 ffp.setDataHeaderRegexp(Pattern.compile("Illumicode,N,Mean GRN,Dev GRN")); 432 ffp.setDataSplitterRegexp(Pattern.compile(",")); 433 return ffp; 434 } 435 436 public void importScandata(RawBioAssay rba, List<File> files, ProgressReporter progress) 437 throws IOException 438 { 439 // We assume that each file contains rougly the same number of lines 440 // Progress reporter should be divided into numFile + 1 steps 441 int offset = 0; 442 double deltaOffset = 100 / (files.size() + 1); 443 444 // Parse data from the files and temporarily store everything 445 // in 'rawdata' 446 FlatFileParser ffp = createFlatFileParser(); 447 Map<String, ScandataSpot> rawdata = new HashMap<String, ScandataSpot>(); 448 for (File file : files) 449 { 450 double factor = deltaOffset / file.getSize(); 451 long current = 0; 452 System.out.println("factor = " +factor + "; offset=" + offset + "; deltaOffset=" + deltaOffset); 453 InputStream in = file.getDownloadStream(0); 454 ffp.setInputStream(in, "ISO-8859-1"); 455 456 FlatFileParser.LineType line = ffp.parseHeaders(); 457 if (line != FlatFileParser.LineType.DATA_HEADER) 458 { 459 throw new InvalidDataException("Can't find start of data in file: " + file); 460 } 461 462 while (ffp.hasMoreData()) 463 { 464 // Check if user has aborted 465 if (Thread.interrupted()) throw new SignalException("Aborted by user."); 466 467 // Progress reporting 468 int lines = ffp.getParsedLines(); 469 if (progress != null && lines % 100 == 0) 470 { 471 String message = "Parsing file " + file.getName() + "; " + lines + " lines done."; 472 current = ffp.getParsedBytes(); 473 int percent = (int)(offset + factor * current); 474 progress.display(percent, message); 475 } 476 477 // Get next data line 478 FlatFileParser.Data data = ffp.nextData(); 479 String featureId = MD5.leftPad(data.get(0), '0', 10); 480 int n = Integer.parseInt(data.get(1)); 481 int mean = Integer.parseInt(data.get(2)); 482 int dev = Integer.parseInt(data.get(3)); 483 484 ScandataSpot spot = rawdata.get(featureId); 485 if (spot == null) 486 { 487 spot = new ScandataSpot(featureId, n, mean, dev); 488 rawdata.put(featureId, spot); 489 } 490 else 491 { 492 spot.merge(n, mean, dev); 493 } 494 } 495 496 offset += deltaOffset; 497 } 498 499 // Insert data into db 500 long current = 0; 501 RawDataBatcher batcher = rba.getRawDataBatcher(FeatureIdentificationMethod.FEATURE_ID); 502 double factor = deltaOffset / rawdata.size(); 503 for (ScandataSpot spot : rawdata.values()) 504 { 505 // Check if user has aborted 506 if (Thread.interrupted()) throw new SignalException("Aborted by user."); 507 508 // Progress reporting 509 if (progress != null && current % 100 == 0) 510 { 511 String message = "Saving to database; " + current + " spots done."; 512 current = ffp.getParsedBytes(); 513 int percent = (int)(offset + factor * current); 514 progress.display(percent, message); 515 } 516 517 RawData raw = batcher.newRawData(); 518 raw.setExtended("n", spot.getN()); 519 raw.setExtended("mean", (float)spot.getMean()); 520 raw.setExtended("dev", (float)spot.getDev()); 521 522 batcher.insert(raw, null, spot.getFeatureId()); 523 current++; 524 } 525 batcher.flush(); 526 batcher.close(); 527 } 528 529 public List<PlatformFileType> getPlatformFileTypes(DbControl dc, FileStoreEnabled item) 530 { 531 532 Platform platform = item.getPlatform(); 533 PlatformVariant variant = item.getVariant(); 534 535 ItemQuery<PlatformFileType> query = platform.getFileTypes(variant, false); 536 query.restrict(Restrictions.eq(Hql.property("dataFileType.itemType"), 537 Expressions.integer(item.getType().getValue()))); 538 return query.list(dc); 539 } 540 374 541 } 375 542
Note: See TracChangeset
for help on using the changeset viewer.