Changeset 231


Ignore:
Timestamp:
Feb 14, 2007, 12:45:29 PM (15 years ago)
Author:
mbayer
Message:

fixed a number of issues:

  • implemented support for Affy files
  • have removed need for additional jar by including time stamp code in this class directly
  • exception messages now show at end of import in popup
  • removed hardcoding of id of RawDataImporter? def
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/uk/ac/scri/batchimporter/src/sbrn/base/BatchDataImport.java

    r226 r231  
    1717
    1818See also http://www.gnu.org/licenses/gpl.txt.
    19 */
     19 */
    2020package sbrn.base;
    2121
     22import net.sf.basedb.core.Affymetrix;
     23import net.sf.basedb.core.ArrayDesign;
    2224import net.sf.basedb.core.BaseException;
    2325import net.sf.basedb.core.DbControl;
     
    5052import net.sf.basedb.util.zip.FileUnpacker;
    5153
    52 import sbrn.commons.TimeStampUtils;
    53 
    5454import java.io.InputStream;
    5555import java.net.InetAddress;
    5656import java.net.UnknownHostException;
     57import java.text.SimpleDateFormat;
    5758import java.util.ArrayList;
     59import java.util.Date;
    5860import java.util.List;
     61import java.util.Locale;
    5962
    6063
     
    6467  and that it is of the same data type as the data that are being imported. Currently only supports the creation of
    6568  RawBioAssay objects as part of the import but could be extended to create other objects as needed.
    66  
     69
    6770  @author Micha Bayer - Plant Bioinformatics Group, Scottish Crop Research Institute
    6871  email: sbrn@scri.ac.uk, web http://www.scri.ac.uk
     
    8790  //the Experiment that this import will be attached to
    8891  private Experiment experiment = null;
    89  
     92
    9093  //the progress reporter for the overall  batch import
    9194  private ProgressReporter progress = null;
    92  
     95
    9396  //the percentage complete of the currently executing file import (subjob)
    9497  private double currentJobPercentComplete = 0;
    95  
     98
    9699  //the currently executing file import job
    97100  private Job currentJob = null;
    98  
     101
     102  //the cdf file for the array desing for Affy data
     103  private File cdfFile = null;
    99104
    100105//  ================================c'tor=================================
     
    115120   (minus the extension).
    116121   */
    117   public boolean unpackZipFile()
     122  public boolean unpackZipFile() throws Exception
    118123  {
    119124    User user = null;
     
    135140      //the zip file should then be unzipped in here
    136141      //give it the same name as that of the zip file plus a timestamp
    137       String timeStamp = TimeStampUtils.getTime("ddMMyy_hhmmss");
     142      String timeStamp = getTime("ddMMyy_HHmmss");
    138143      Directory dataDir = Directory.getByPath(dc, new Path(homeDir.getPath().toString()+"/raw data",Path.Type.DIRECTORY));           
    139144      Directory subDir = Directory.getNew(dc,dataDir); 
     
    173178    catch (Exception e)
    174179    {
    175       // TODO Auto-generated catch block
    176180      e.printStackTrace();
     181      throw new Exception("zip file unpacking failed: "+ e.getMessage());
    177182    }
    178183    return unpackSuccess;
     
    191196    boolean importSuccess = false;
    192197    boolean unpackSuccess = unpackZipFile();
    193     if(!unpackSuccess)
    194       throw new Exception("zip file unpacking failed");
    195198
    196199    System.out.println("BatchDataImport importData");
    197200    System.out.println("dir = " + dir.getName());
     201
     202    //get the raw data type
     203    RawDataType rawDataType = experiment.getRawDataType();
     204    System.out.println("rawDataType = " + rawDataType.getName());
    198205
    199206    ItemQuery<File> fileQuery = null;
     
    208215      fileQuery = dir.getFiles();
    209216      fileList = fileQuery.list(dc); 
    210       File [] files = new File[fileList.size()];
     217
     218      File [] dataFiles = null;
    211219      if(fileQuery!=null)
    212220      {
    213         for (File f : fileList)
    214         {     
    215           System.out.println("file = " + f.getName());
     221        //for Affy data the zip file uploaded should contain the cdf file for the chip and all the cel files
     222        //need to separate these out
     223        if (rawDataType.getName().equalsIgnoreCase("affymetrix"))
     224        {
     225
     226          //iterate over all the files and check their extensions
     227          int dataFileCount =0;
     228          //initiate a shorter array for the data files since this should not include the cdf file
     229          dataFiles = new File[fileList.size()-1]; 
     230          System.out.println("instantiating dataFiles array of length " + dataFiles.length);
     231         
     232          System.out.println("affy data -- iterating over file names");
     233          for (int i = 0; i < fileList.size(); i++)
     234          {
     235            //get the current file and check its extension
     236            File file = fileList.get(i);
     237            String fileName = file.getName();
     238            String extension = fileName.substring(fileName.lastIndexOf(".")+1);
     239            System.out.println("file name = " +fileName + ", extension = " + extension);
     240
     241            //if it's the cdf file, point the global var for this at it
     242            if(extension.equalsIgnoreCase("cdf"))
     243            {
     244              System.out.println("cdf file found");
     245              cdfFile = file;
     246              System.out.println("cdf file = " + cdfFile);             
     247            }
     248            //if it's a cel file, add it to the dataFiles array
     249            if(extension.equalsIgnoreCase("cel"))
     250            {
     251                System.out.println("setting dataFiles["+dataFileCount+"] to file " + file.getName());
     252                dataFiles[dataFileCount] = file;
     253                dataFileCount++;
     254            }
     255          }
     256        }
     257        else//non-Affy data types
     258        {
     259          //in this case all the files in the zip file should be data files
     260          dataFiles = new File[fileList.size()]; 
     261          System.out.println("instantiating dataFiles array of length " + dataFiles.length);
     262
     263          //put the files from the list into the array for homogeneity's sake
     264          fileList.toArray(dataFiles);
     265        }
     266
     267        //list all the data files
     268        System.out.println("data files: ");
     269        for (int i = 0; i< dataFiles.length; i++)
     270        {               
     271          System.out.println(dataFiles[i].getName());
    216272        }
    217273      }
     
    222278      System.out.println("exampleFile = " + exampleFile.getName());
    223279
    224       //get the appropriate plugin config
    225       PluginConfigDetector detector = new PluginConfigDetector(sc);
    226       PluginConfiguration importConfig = detector.detectPluginConfig(exampleFile);     
    227       System.out.println("PluginConfiguration importConfig = " + importConfig);
    228 
    229       //now need to figure out the raw data type
    230       //this can be done by looking at the plugin config which should have it stored in it
    231       //it's needed so we can configure the RawBioAssay and Experiment objects appropriately
    232       List rdtVal = importConfig.getParameterValues("rawDataType");
    233       String rawDataTypeStr = (String)rdtVal.get(0);
    234       System.out.println("rawDataType = " + rawDataTypeStr); 
    235       RawDataType rawDataType = RawDataTypes.getRawDataType(rawDataTypeStr);
    236 
    237       // for each file
     280      //only do this for non-Affy data types
     281      PluginConfiguration importConfig = null;
     282      if (!rawDataType.getName().equalsIgnoreCase("affymetrix"))
     283      {
     284        //get the appropriate plugin config
     285        PluginConfigDetector detector = new PluginConfigDetector(sc);
     286        importConfig = detector.detectPluginConfig(exampleFile);     
     287        System.out.println("PluginConfiguration importConfig = " + importConfig);
     288      }
     289
     290      // for each data file
    238291      //create a new RawBioAssay
    239       List<RawBioAssay> rbaList = new ArrayList<RawBioAssay>();
    240       for (int i = 0; i < fileList.size(); i++)
     292      RawBioAssay [] rawBioAssays = new RawBioAssay [dataFiles.length];
     293      System.out.println("dataFiles.length = " + dataFiles.length);
     294      for (int i = 0; i < dataFiles.length; i++)
    241295      {
    242296        if(dc.isClosed())
    243297          dc = sc.newDbControl();
    244 
    245         File file = fileList.get(i);
    246         files[i]=file;
    247         System.out.println("current file is " + file.getName());
    248         RawBioAssay rba = createRawBioAssay(dc, "raw bioassay "+ file.getName(),rawDataType);
    249         rbaList.add(rba);
     298        System.out.println("current data file is " + dataFiles[i].getName());
     299        RawBioAssay rba = createRawBioAssay(dc, "raw bioassay "+ dataFiles[i].getName(),rawDataType);
     300        rawBioAssays[i] = rba;
    250301      }
    251302
     
    256307      if(dc.isClosed())
    257308        dc = sc.newDbControl();
     309
     310      //monitor the job progress
     311      ProgressMonitorThread pm = new ProgressMonitorThread(progress,rawBioAssays.length,this);
     312      pm.start();
     313
     314      //import the data for each RawBioAssay
     315      //only do this for non-Affy data types
     316      if (!rawDataType.getName().equalsIgnoreCase("affymetrix"))
     317      {
     318        for (int i = 0; i< rawBioAssays.length; i++)
     319        {
     320          SimpleProgressReporter pr = new SimpleProgressReporter(null);
     321          pm.setPr(pr);
     322          importRawData(importConfig, dataFiles[i], rawBioAssays[i], pr);
     323          pm.setCount(i);
     324        }
     325      }
     326      else//Affy data
     327      {
     328        System.out.println("Affymetrix experiment -- attaching cel files to raw bioassays");
     329
     330        //make a new ArrayDesign for this affy chip
     331        ArrayDesign arrayDesign = ArrayDesign.getNew(dc, true);
     332        arrayDesign.setName(cdfFile.getName());
     333        dc.saveItem(arrayDesign);
     334        dc.commit();
     335        //ArrayDesign arrayDesign = ArrayDesign.getById(dc, 7);
     336        System.out.println("using arrayDesign object " + arrayDesign.getName());
     337       
     338        //make sure we have a working dbcontrol object
     339        if(dc.isClosed())
     340          dc = sc.newDbControl();         
     341
     342        //check the cdf file exists -- this should have been included in the zip file
     343        //else fail the whole import
     344        if(cdfFile == null)
     345        {
     346          throw new Exception("no cdf file found for Affy data -- check that the zip file included this");
     347        }
     348
     349        //now set the cdf file on the array design
     350        System.out.println("setting cdf file on the array design object");
     351        dc.reattachItem(arrayDesign);
     352        dc.reattachItem(cdfFile);
     353        Affymetrix.setCdfFile(arrayDesign, cdfFile);
     354       
     355        //make sure we have a working dbcontrol object
     356        if(dc.isClosed())
     357          dc = sc.newDbControl();
     358
     359        System.out.println("adding cel files to raw bioassays");
     360        for (int i = 0; i< rawBioAssays.length; i++)
     361        {
     362          RawBioAssay rba = RawBioAssay.getById(dc, rawBioAssays[i].getId());
     363         
     364          System.out.println("iteration " + i);
     365          System.out.println("rba = " + rba.getName());
     366          System.out.println("file = " + dataFiles[i].getName());
     367         
     368          //must set the array design
     369          rba.setArrayDesign(arrayDesign);
     370
     371          //need to attach the cel file to the bioassay object
     372          Affymetrix.setCelFile(rba, dataFiles[i]);
     373        }
     374      }
    258375     
    259       int count = 0; 
    260       double numFiles = rbaList.size();
    261 
    262       ProgressMonitorThread pm = new ProgressMonitorThread(progress,numFiles,this);
    263       pm.start();
    264      
    265       //import the data for each RawBioAssay
    266       for(RawBioAssay rba : rbaList)
    267       {
    268         SimpleProgressReporter pr = new SimpleProgressReporter(null);
    269         pm.setPr(pr);
    270         importRawData(importConfig,files[count], rba,pr);     
    271         count++;
    272         pm.setCount(count);
    273       }
     376      //commit the changes to the database
     377      dc.commit();
    274378
    275379      //now attach all the new RawBioAssay objects to the current Experiment item
    276380      //this is the Experiment the user will be running the import from
    277       configureExperiment(dc, rawDataType,rbaList);
    278      
     381      configureExperiment(dc, rawDataType,rawBioAssays);
     382
    279383      //make sure we have a working dbcontrol object
    280384      if(dc.isClosed())
     
    284388      //if item 0 in the list of RawBioAssay objects can be retrieved, we assume it all worked
    285389      ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
    286       query.restrict(Restrictions.eq(Hql.property("name"), Expressions.string(rbaList.get(0).getName())));
     390      query.restrict(Restrictions.eq(Hql.property("name"), Expressions.string(rawBioAssays[0].getName())));
    287391      RawBioAssay testRba = query.list(dc).get(0);
    288392      if(testRba!=null)
     
    295399    {
    296400      e.printStackTrace();
     401      throw new Exception(e);
    297402    }
    298403    finally
     
    327432      job.setParameterValue("missingReporterError", new StringParameterType(), "skip");
    328433      job.setParameterValue("featureMismatchError", new StringParameterType(), "skip");
    329      
     434
    330435      currentJob = job;
    331      
     436
    332437      dc.saveItem(job);
    333438      dc.commit();
     
    369474    System.out.println("Executing job: " + job.getName() );
    370475    job = Job.getById(dc, job.getId());
    371    
     476
    372477    PluginExecutionRequest request = job.execute(pr, localhost);
    373478    dc.commit();
     
    375480    //execute the job request
    376481    PluginResponse response = request.invoke();
    377    
     482
    378483    //check the job ended sucessfully
    379484    if (response.getStatus() == Response.Status.ERROR)
     
    393498   Adds the raw bioassays to our experiment.
    394499   */
    395   private void configureExperiment(DbControl dc, RawDataType rawDataType, List<RawBioAssay> rawBioAssays) throws Exception
     500  private void configureExperiment(DbControl dc, RawDataType rawDataType, RawBioAssay [] rawBioAssays) throws Exception
    396501  {
    397502    try
     
    420525      int expID = experiment.getId();
    421526      Experiment exp = Experiment.getById(dc, expID);
    422      
     527
    423528      for (RawBioAssay rba : rawBioAssays)
    424529      {
     
    459564
    460565//  -------------------------------------------------------------------------------------------------------------------------------------------
    461  
     566
     567  private String getTime(String pattern)
     568  {
     569    SimpleDateFormat formatter;
     570    Locale currentLocale = Locale.getDefault();
     571    formatter = new SimpleDateFormat(pattern, currentLocale);
     572    String timeStamp = formatter.format(new Date()); 
     573    return timeStamp;
     574  }
     575
     576//  -------------------------------------------------------------------------------------------------------------------------------------------
     577
    462578  public Job getCurrentJob()
    463579  {
Note: See TracChangeset for help on using the changeset viewer.