Changeset 231 for trunk/uk/ac/scri/batchimporter/src/sbrn
- Timestamp:
- Feb 14, 2007, 12:45:29 PM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/uk/ac/scri/batchimporter/src/sbrn/base/BatchDataImport.java
r226 r231 17 17 18 18 See also http://www.gnu.org/licenses/gpl.txt. 19 */19 */ 20 20 package sbrn.base; 21 21 22 import net.sf.basedb.core.Affymetrix; 23 import net.sf.basedb.core.ArrayDesign; 22 24 import net.sf.basedb.core.BaseException; 23 25 import net.sf.basedb.core.DbControl; … … 50 52 import net.sf.basedb.util.zip.FileUnpacker; 51 53 52 import sbrn.commons.TimeStampUtils;53 54 54 import java.io.InputStream; 55 55 import java.net.InetAddress; 56 56 import java.net.UnknownHostException; 57 import java.text.SimpleDateFormat; 57 58 import java.util.ArrayList; 59 import java.util.Date; 58 60 import java.util.List; 61 import java.util.Locale; 59 62 60 63 … … 64 67 and that it is of the same data type as the data that are being imported. Currently only supports the creation of 65 68 RawBioAssay objects as part of the import but could be extended to create other objects as needed. 66 69 67 70 @author Micha Bayer - Plant Bioinformatics Group, Scottish Crop Research Institute 68 71 email: sbrn@scri.ac.uk, web http://www.scri.ac.uk … … 87 90 //the Experiment that this import will be attached to 88 91 private Experiment experiment = null; 89 92 90 93 //the progress reporter for the overall batch import 91 94 private ProgressReporter progress = null; 92 95 93 96 //the percentage complete of the currently executing file import (subjob) 94 97 private double currentJobPercentComplete = 0; 95 98 96 99 //the currently executing file import job 97 100 private Job currentJob = null; 98 101 102 //the cdf file for the array desing for Affy data 103 private File cdfFile = null; 99 104 100 105 // ================================c'tor================================= … … 115 120 (minus the extension). 116 121 */ 117 public boolean unpackZipFile() 122 public boolean unpackZipFile() throws Exception 118 123 { 119 124 User user = null; … … 135 140 //the zip file should then be unzipped in here 136 141 //give it the same name as that of the zip file plus a timestamp 137 String timeStamp = TimeStampUtils.getTime("ddMMyy_hhmmss");142 String timeStamp = getTime("ddMMyy_HHmmss"); 138 143 Directory dataDir = Directory.getByPath(dc, new Path(homeDir.getPath().toString()+"/raw data",Path.Type.DIRECTORY)); 139 144 Directory subDir = Directory.getNew(dc,dataDir); … … 173 178 catch (Exception e) 174 179 { 175 // TODO Auto-generated catch block176 180 e.printStackTrace(); 181 throw new Exception("zip file unpacking failed: "+ e.getMessage()); 177 182 } 178 183 return unpackSuccess; … … 191 196 boolean importSuccess = false; 192 197 boolean unpackSuccess = unpackZipFile(); 193 if(!unpackSuccess)194 throw new Exception("zip file unpacking failed");195 198 196 199 System.out.println("BatchDataImport importData"); 197 200 System.out.println("dir = " + dir.getName()); 201 202 //get the raw data type 203 RawDataType rawDataType = experiment.getRawDataType(); 204 System.out.println("rawDataType = " + rawDataType.getName()); 198 205 199 206 ItemQuery<File> fileQuery = null; … … 208 215 fileQuery = dir.getFiles(); 209 216 fileList = fileQuery.list(dc); 210 File [] files = new File[fileList.size()]; 217 218 File [] dataFiles = null; 211 219 if(fileQuery!=null) 212 220 { 213 for (File f : fileList) 214 { 215 System.out.println("file = " + f.getName()); 221 //for Affy data the zip file uploaded should contain the cdf file for the chip and all the cel files 222 //need to separate these out 223 if (rawDataType.getName().equalsIgnoreCase("affymetrix")) 224 { 225 226 //iterate over all the files and check their extensions 227 int dataFileCount =0; 228 //initiate a shorter array for the data files since this should not include the cdf file 229 dataFiles = new File[fileList.size()-1]; 230 System.out.println("instantiating dataFiles array of length " + dataFiles.length); 231 232 System.out.println("affy data -- iterating over file names"); 233 for (int i = 0; i < fileList.size(); i++) 234 { 235 //get the current file and check its extension 236 File file = fileList.get(i); 237 String fileName = file.getName(); 238 String extension = fileName.substring(fileName.lastIndexOf(".")+1); 239 System.out.println("file name = " +fileName + ", extension = " + extension); 240 241 //if it's the cdf file, point the global var for this at it 242 if(extension.equalsIgnoreCase("cdf")) 243 { 244 System.out.println("cdf file found"); 245 cdfFile = file; 246 System.out.println("cdf file = " + cdfFile); 247 } 248 //if it's a cel file, add it to the dataFiles array 249 if(extension.equalsIgnoreCase("cel")) 250 { 251 System.out.println("setting dataFiles["+dataFileCount+"] to file " + file.getName()); 252 dataFiles[dataFileCount] = file; 253 dataFileCount++; 254 } 255 } 256 } 257 else//non-Affy data types 258 { 259 //in this case all the files in the zip file should be data files 260 dataFiles = new File[fileList.size()]; 261 System.out.println("instantiating dataFiles array of length " + dataFiles.length); 262 263 //put the files from the list into the array for homogeneity's sake 264 fileList.toArray(dataFiles); 265 } 266 267 //list all the data files 268 System.out.println("data files: "); 269 for (int i = 0; i< dataFiles.length; i++) 270 { 271 System.out.println(dataFiles[i].getName()); 216 272 } 217 273 } … … 222 278 System.out.println("exampleFile = " + exampleFile.getName()); 223 279 224 //get the appropriate plugin config 225 PluginConfigDetector detector = new PluginConfigDetector(sc); 226 PluginConfiguration importConfig = detector.detectPluginConfig(exampleFile); 227 System.out.println("PluginConfiguration importConfig = " + importConfig); 228 229 //now need to figure out the raw data type 230 //this can be done by looking at the plugin config which should have it stored in it 231 //it's needed so we can configure the RawBioAssay and Experiment objects appropriately 232 List rdtVal = importConfig.getParameterValues("rawDataType"); 233 String rawDataTypeStr = (String)rdtVal.get(0); 234 System.out.println("rawDataType = " + rawDataTypeStr); 235 RawDataType rawDataType = RawDataTypes.getRawDataType(rawDataTypeStr); 236 237 // for each file 280 //only do this for non-Affy data types 281 PluginConfiguration importConfig = null; 282 if (!rawDataType.getName().equalsIgnoreCase("affymetrix")) 283 { 284 //get the appropriate plugin config 285 PluginConfigDetector detector = new PluginConfigDetector(sc); 286 importConfig = detector.detectPluginConfig(exampleFile); 287 System.out.println("PluginConfiguration importConfig = " + importConfig); 288 } 289 290 // for each data file 238 291 //create a new RawBioAssay 239 List<RawBioAssay> rbaList = new ArrayList<RawBioAssay>(); 240 for (int i = 0; i < fileList.size(); i++) 292 RawBioAssay [] rawBioAssays = new RawBioAssay [dataFiles.length]; 293 System.out.println("dataFiles.length = " + dataFiles.length); 294 for (int i = 0; i < dataFiles.length; i++) 241 295 { 242 296 if(dc.isClosed()) 243 297 dc = sc.newDbControl(); 244 245 File file = fileList.get(i); 246 files[i]=file; 247 System.out.println("current file is " + file.getName()); 248 RawBioAssay rba = createRawBioAssay(dc, "raw bioassay "+ file.getName(),rawDataType); 249 rbaList.add(rba); 298 System.out.println("current data file is " + dataFiles[i].getName()); 299 RawBioAssay rba = createRawBioAssay(dc, "raw bioassay "+ dataFiles[i].getName(),rawDataType); 300 rawBioAssays[i] = rba; 250 301 } 251 302 … … 256 307 if(dc.isClosed()) 257 308 dc = sc.newDbControl(); 309 310 //monitor the job progress 311 ProgressMonitorThread pm = new ProgressMonitorThread(progress,rawBioAssays.length,this); 312 pm.start(); 313 314 //import the data for each RawBioAssay 315 //only do this for non-Affy data types 316 if (!rawDataType.getName().equalsIgnoreCase("affymetrix")) 317 { 318 for (int i = 0; i< rawBioAssays.length; i++) 319 { 320 SimpleProgressReporter pr = new SimpleProgressReporter(null); 321 pm.setPr(pr); 322 importRawData(importConfig, dataFiles[i], rawBioAssays[i], pr); 323 pm.setCount(i); 324 } 325 } 326 else//Affy data 327 { 328 System.out.println("Affymetrix experiment -- attaching cel files to raw bioassays"); 329 330 //make a new ArrayDesign for this affy chip 331 ArrayDesign arrayDesign = ArrayDesign.getNew(dc, true); 332 arrayDesign.setName(cdfFile.getName()); 333 dc.saveItem(arrayDesign); 334 dc.commit(); 335 //ArrayDesign arrayDesign = ArrayDesign.getById(dc, 7); 336 System.out.println("using arrayDesign object " + arrayDesign.getName()); 337 338 //make sure we have a working dbcontrol object 339 if(dc.isClosed()) 340 dc = sc.newDbControl(); 341 342 //check the cdf file exists -- this should have been included in the zip file 343 //else fail the whole import 344 if(cdfFile == null) 345 { 346 throw new Exception("no cdf file found for Affy data -- check that the zip file included this"); 347 } 348 349 //now set the cdf file on the array design 350 System.out.println("setting cdf file on the array design object"); 351 dc.reattachItem(arrayDesign); 352 dc.reattachItem(cdfFile); 353 Affymetrix.setCdfFile(arrayDesign, cdfFile); 354 355 //make sure we have a working dbcontrol object 356 if(dc.isClosed()) 357 dc = sc.newDbControl(); 358 359 System.out.println("adding cel files to raw bioassays"); 360 for (int i = 0; i< rawBioAssays.length; i++) 361 { 362 RawBioAssay rba = RawBioAssay.getById(dc, rawBioAssays[i].getId()); 363 364 System.out.println("iteration " + i); 365 System.out.println("rba = " + rba.getName()); 366 System.out.println("file = " + dataFiles[i].getName()); 367 368 //must set the array design 369 rba.setArrayDesign(arrayDesign); 370 371 //need to attach the cel file to the bioassay object 372 Affymetrix.setCelFile(rba, dataFiles[i]); 373 } 374 } 258 375 259 int count = 0; 260 double numFiles = rbaList.size(); 261 262 ProgressMonitorThread pm = new ProgressMonitorThread(progress,numFiles,this); 263 pm.start(); 264 265 //import the data for each RawBioAssay 266 for(RawBioAssay rba : rbaList) 267 { 268 SimpleProgressReporter pr = new SimpleProgressReporter(null); 269 pm.setPr(pr); 270 importRawData(importConfig,files[count], rba,pr); 271 count++; 272 pm.setCount(count); 273 } 376 //commit the changes to the database 377 dc.commit(); 274 378 275 379 //now attach all the new RawBioAssay objects to the current Experiment item 276 380 //this is the Experiment the user will be running the import from 277 configureExperiment(dc, rawDataType,r baList);278 381 configureExperiment(dc, rawDataType,rawBioAssays); 382 279 383 //make sure we have a working dbcontrol object 280 384 if(dc.isClosed()) … … 284 388 //if item 0 in the list of RawBioAssay objects can be retrieved, we assume it all worked 285 389 ItemQuery<RawBioAssay> query = RawBioAssay.getQuery(); 286 query.restrict(Restrictions.eq(Hql.property("name"), Expressions.string(r baList.get(0).getName())));390 query.restrict(Restrictions.eq(Hql.property("name"), Expressions.string(rawBioAssays[0].getName()))); 287 391 RawBioAssay testRba = query.list(dc).get(0); 288 392 if(testRba!=null) … … 295 399 { 296 400 e.printStackTrace(); 401 throw new Exception(e); 297 402 } 298 403 finally … … 327 432 job.setParameterValue("missingReporterError", new StringParameterType(), "skip"); 328 433 job.setParameterValue("featureMismatchError", new StringParameterType(), "skip"); 329 434 330 435 currentJob = job; 331 436 332 437 dc.saveItem(job); 333 438 dc.commit(); … … 369 474 System.out.println("Executing job: " + job.getName() ); 370 475 job = Job.getById(dc, job.getId()); 371 476 372 477 PluginExecutionRequest request = job.execute(pr, localhost); 373 478 dc.commit(); … … 375 480 //execute the job request 376 481 PluginResponse response = request.invoke(); 377 482 378 483 //check the job ended sucessfully 379 484 if (response.getStatus() == Response.Status.ERROR) … … 393 498 Adds the raw bioassays to our experiment. 394 499 */ 395 private void configureExperiment(DbControl dc, RawDataType rawDataType, List<RawBioAssay>rawBioAssays) throws Exception500 private void configureExperiment(DbControl dc, RawDataType rawDataType, RawBioAssay [] rawBioAssays) throws Exception 396 501 { 397 502 try … … 420 525 int expID = experiment.getId(); 421 526 Experiment exp = Experiment.getById(dc, expID); 422 527 423 528 for (RawBioAssay rba : rawBioAssays) 424 529 { … … 459 564 460 565 // ------------------------------------------------------------------------------------------------------------------------------------------- 461 566 567 private String getTime(String pattern) 568 { 569 SimpleDateFormat formatter; 570 Locale currentLocale = Locale.getDefault(); 571 formatter = new SimpleDateFormat(pattern, currentLocale); 572 String timeStamp = formatter.format(new Date()); 573 return timeStamp; 574 } 575 576 // ------------------------------------------------------------------------------------------------------------------------------------------- 577 462 578 public Job getCurrentJob() 463 579 {
Note: See TracChangeset
for help on using the changeset viewer.