Changeset 5692


Ignore:
Timestamp:
Aug 11, 2011, 4:16:26 PM (10 years ago)
Author:
Nicklas Nordborg
Message:

References #1153: Handling short read transcript sequence data

Fixed the 'Illumina raw data importer' plug-in. As a side-effect it now has support for attaching raw bioassays to more than one scan (derived bioassay). Actually, the attachment to scans was not fully implemented in the old version and didn't work as expected.

Location:
trunk/src
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core/net/sf/basedb/core/BioMaterialEvent.java

    r5663 r5692  
    2323package net.sf.basedb.core;
    2424
     25import net.sf.basedb.core.query.Expressions;
    2526import net.sf.basedb.core.query.Restrictions;
    2627import net.sf.basedb.core.query.Hql;
     
    3536import net.sf.basedb.core.data.SharedData;
    3637
     38import java.util.List;
    3739import java.util.Map;
    3840import java.util.HashMap;
     
    917919 
    918920  /**
     921    Utility method for finding all sources on a given bioassay
     922    position.
     923    @param position The position number (between 1 and {@link PhysicalBioAssay#getSize()}.
     924    @return A list (empty if no sources are found)
     925  */
     926  public List<BioMaterialEventSource> getEventSources(DbControl dc, int position)
     927  {
     928    SpecialQuery<BioMaterialEventSource> posQuery = getEventSources();
     929    posQuery.restrict(Restrictions.eq(Hql.property("position"), Expressions.integer(position)));
     930    return posQuery.list(dc);
     931  }
     932 
     933  /**
    919934    The maximum length of the comment about this event. Check the length
    920935    against this value before calling the {@link #setComment(String)}
  • trunk/src/plugins/core/net/sf/basedb/plugins/IlluminaRawDataImporter.java

    r5689 r5692  
    3939import net.sf.basedb.core.BaseException;
    4040import net.sf.basedb.core.BasicItem;
     41import net.sf.basedb.core.BioMaterialEventSource;
    4142import net.sf.basedb.core.DataFileType;
    4243import net.sf.basedb.core.DbControl;
     44import net.sf.basedb.core.DerivedBioAssay;
    4345import net.sf.basedb.core.Experiment;
     46import net.sf.basedb.core.Extract;
    4447import net.sf.basedb.core.FeatureIdentificationMethod;
    4548import net.sf.basedb.core.File;
     
    7881import net.sf.basedb.util.Coordinate;
    7982import net.sf.basedb.util.error.SimpleErrorHandler;
     83import net.sf.basedb.util.fuzzy.StringMatcher;
    8084import net.sf.basedb.util.parser.ColumnMapper;
    8185import net.sf.basedb.util.parser.FlatFileParser;
     
    291295        // Associations
    292296        storeValue(job, request, ri.getParameter("experiment"));
     297        storeValues(job, request, ri.getParameter("bioAssays"));
    293298        storeValue(job, request, ri.getParameter("arrayDesign"));
    294299        storeValue(job, request, featureIdentificationParameter);
    295         storeValue(job, request, ri.getParameter("scan"));
    296300        storeValue(job, request, ri.getParameter("protocol"));
    297301        storeValue(job, request, ri.getParameter("software"));
     
    335339  private DbControl dc;
    336340  private Experiment experiment;
     341  private List<DerivedBioAssay> bioAssays;
    337342  private ArrayDesign design;
    338343  private Software software;
     
    412417  }
    413418 
     419  @SuppressWarnings("unchecked")
    414420  @Override
    415421  protected void begin(FlatFileParser ffp)
     
    422428    this.headerLines = new LinkedList<Line>();
    423429    this.experiment = (Experiment)job.getValue("experiment");
     430    this.bioAssays = (List<DerivedBioAssay>)job.getValues("bioAssays");
    424431    this.design = (ArrayDesign)job.getValue("arrayDesign");
    425432    this.protocol = (Protocol)job.getValue("protocol");
     
    651658        String arrayName = m.group(2);
    652659        String lastInName = arrayName.substring(arrayName.length()-1);
    653         int arrayNum = Coordinate.alphaToNumeric(lastInName);
    654         if (arrayNum <= 0) arrayNum = 1;
     660        int arrayPosition = Coordinate.alphaToNumeric(lastInName);
     661        if (arrayPosition <= 0) arrayPosition = 1;
    655662        // Check if raw data property exists
    656663        if (illumina.getProperty(propertyName) == null)
     
    671678            RawBioAssay rba = RawBioAssay.getNew(dc, generic, illumina);
    672679            rba.setName(arrayName);
    673 //            TODO (#1153)
    674             //rba.setArrayNum(arrayNum);
     680           
     681            DerivedBioAssay bioAssay = null;
     682            Extract extract = null;
     683            if (bioAssays != null)
     684            {
     685              // Find derived bioassay and physical bioassay based on the array name
     686              bioAssay = findBioAssay(bioAssays, arrayName);
     687              if (bioAssay != null)
     688              {
     689                bioAssay = DerivedBioAssay.getById(dc, bioAssay.getId());
     690                // Find extract on the given array position.
     691                List<BioMaterialEventSource> sources = bioAssay.getPhysicalBioAssay().getCreationEvent().getEventSources(dc, arrayPosition);
     692                // There must be exactly one source
     693                if (sources.size() == 1) extract = (Extract)sources.get(0).getBioMaterial();
     694              }
     695            }
     696            if (bioAssay != null) rba.setParentBioAssay(bioAssay);
     697            if (extract != null) rba.setParentExtract(extract);
    675698            if (design != null) rba.setArrayDesign(design);
    676699            if (protocol != null) rba.setProtocol(protocol);
     
    711734    }
    712735    return list;
     736  }
     737 
     738  private DerivedBioAssay findBioAssay(List<DerivedBioAssay> bioAssays, String name)
     739  {
     740    if (bioAssays.size() == 1) return bioAssays.get(0);
     741    DerivedBioAssay bestMatch = null;
     742    StringMatcher matcher = new StringMatcher();
     743    double bestScore = 0;
     744    for (DerivedBioAssay dba : bioAssays)
     745    {
     746      double score = matcher.getScore(name, dba.getName());
     747      if (score > bestScore)
     748      {
     749        bestScore = score;
     750        bestMatch = dba;
     751      }
     752    }
     753    return bestMatch;
    713754  }
    714755 
     
    747788            )
    748789          );
     790        List<DerivedBioAssay> bioAssays = getItems(dc, DerivedBioAssay.getQuery(),
     791          Restrictions.eq(
     792              Hql.property("itemSubtype.id"),
     793              Expressions.integer(SystemItems.getId(DerivedBioAssay.SCAN))
     794            )
     795          );
     796
    749797       
    750798        boolean hasAssociations =
    751799          context.getItem() == Item.EXPERIMENT || 
    752           protocols.size() > 0 || software.size() > 0 || designs.size() > 0;
     800          protocols.size() > 0 || software.size() > 0 || designs.size() > 0 || bioAssays.size() > 0;
    753801       
    754802        if (hasAssociations)
     
    765813              ));
    766814          }
     815          if (!bioAssays.isEmpty())
     816          {
     817            parameters.add(new PluginParameter<DerivedBioAssay>(
     818              "bioAssays",
     819              "Scans",
     820              "The scans that the data in the file is coming from. " +
     821              "Select each scan that is used in the file. The information " +
     822              "will be used to link each raw bioassay to the extract the " +
     823              "data is coming from. If more than one scan is used, the names of the " +
     824              "scans should be similar to the data headers in the file.",
     825              new ItemParameterType<DerivedBioAssay>(DerivedBioAssay.class, null, false, 0, bioAssays)
     826            ));
     827          }
     828         
    767829          if (!designs.isEmpty())
    768830          {
  • trunk/src/test/TestIlluminaImporter.java

    r5689 r5692  
    2020  along with BASE. If not, see <http://www.gnu.org/licenses/>.
    2121*/
     22import java.util.ArrayList;
     23import java.util.Arrays;
    2224import java.util.List;
    2325
     
    4345    TestReporter.test_import_from_file("data/test.illumina.reporters.txt", "Search_key,Target,ProbeId.*", ",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))", 1, 1);
    4446    int fileId = TestFile.test_create("data/test.illumina.rawdata.txt", false, false);
    45     int hybId = TestPhysicalBioAssay.test_create(0, "Test hyb", 0, 1);
    46 //    TODO (#1153)
    47     //int scanId = TestScan.test_create(hybId, 0, 0, false);
    48     int scanId = 0;
     47    int hyb1Id = TestPhysicalBioAssay.test_create(0, "Hyb #1677718123", 0, 1);
     48    int scan1Id = TestDerivedBioAssay.test_create_root("1677718123", hyb1Id, SystemItems.getId(DerivedBioAssay.SCAN), 0, 0, 0);
     49    int hyb2Id = TestPhysicalBioAssay.test_create(0, "Hyb #1677718142", 0, 1);
     50    int scan2Id = TestDerivedBioAssay.test_create_root("1677718142", hyb2Id, SystemItems.getId(DerivedBioAssay.SCAN), 0, 0, 0);
    4951    int protocolId = TestProtocol.test_create(SystemItems.getId(Protocol.FEATURE_EXTRACTION), null, false);
    5052    int softwareId = TestSoftware.test_create(SystemItems.getId(Software.FEATURE_EXTRACTION), null, false);
     
    5355    // Create plugin configuration and job
    5456    int pluginDefinitionId  = TestPluginDefinition.test_get("net.sf.basedb.plugins.IlluminaRawDataImporter");
    55     int jobId = test_create_job(pluginDefinitionId, fileId, scanId, protocolId, softwareId, experimentId);
     57    int jobId = test_create_job(pluginDefinitionId, fileId, protocolId, softwareId, experimentId, scan1Id, scan2Id);
    5658   
    5759    // Execute job
     
    7476    TestJob.test_delete(jobId);
    7577   
    76 //    TODO (#1153)
    77     //TestScan.test_delete(scanId);
     78    TestDerivedBioAssay.test_delete(scan1Id);
     79    TestDerivedBioAssay.test_delete(scan2Id);
    7880    TestSoftware.test_delete(softwareId);
    7981    TestProtocol.test_delete(protocolId);
    80     TestPhysicalBioAssay.test_delete(hybId);
     82    TestPhysicalBioAssay.test_delete(hyb1Id);
     83    TestPhysicalBioAssay.test_delete(hyb2Id);
    8184    TestFile.test_delete(fileId);
    8285    TestReporter.test_delete();
     
    8689  }
    8790
    88   static int test_create_job(int pluginId, int fileId, int scanId, int protocolId, int softwareId, int experimentId)
     91  static int test_create_job(int pluginId, int fileId, int protocolId, int softwareId, int experimentId, int... scanIds)
    8992  {
    9093    if (pluginId == 0 || fileId == 0 || experimentId == 0 && !TestUtil.hasPermission(Permission.CREATE, Item.JOB)) return 0;
     
    109112        request.setParameterValue("experiment", Experiment.getById(dc, experimentId));
    110113      }
    111       /*
    112       TODO (#1153)
    113       if (scanId != 0)
    114       {
    115         request.setParameterValue("scan", Scan.getById(dc, scanId));
    116       }
    117       */
     114      if (scanIds != null && scanIds.length > 0)
     115      {
     116        List<DerivedBioAssay> bioAssays = new ArrayList<DerivedBioAssay>(scanIds.length);
     117        for (int scanId : scanIds)
     118        {
     119          bioAssays.add(DerivedBioAssay.getById(dc, scanId));
     120        }
     121        request.setParameterValues("bioAssays", bioAssays);
     122      }
    118123      if (protocolId != 0)
    119124      {
Note: See TracChangeset for help on using the changeset viewer.