Changeset 4289


Ignore:
Timestamp:
Feb 16, 2012, 2:49:23 PM (12 years ago)
Author:
marianne
Message:

Refs #774. Distribution tolerance update. ClusterID set only if features are aligned. Alignment if no common sequences.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/api/core/conf/common-queries.xml

    r4281 r4289  
    264264    </description>
    265265  </query>
    266  
    267   <query id="GET_UNIQUE_SEQUENCES_IN_MSFILE_FOR_PROJECT" type="HQL">
    268     <sql>
    269       SELECT DISTINCT f.peptideSequence
    270       FROM FeatureData f
    271       WHERE f.project = :project
    272       AND f.msFile = :msFile
    273       AND NOT f.peptideSequence IS NULL
    274       AND f.chargeState IS 2
    275       ORDER by f.apexRetentionTimeInMinutes
    276     </sql>
    277     <description>
    278       Load all distinct (unique) sequences for a ms file in the Feature table for a project
    279     </description>
    280   </query>
    281  
    282266
    283267  <query id="GET_IDS_FOR_SEPARATION_METHODS_WITH_SEPARATION_EVENTS" type="HQL">
  • trunk/api/core/src/org/proteios/core/Feature.java

    r4280 r4289  
    2121  public static final Item TYPE = Item.PROTEIOS_FEATURE;
    2222
    23  
    24   /*@SuppressWarnings("unchecked")
    25   public static List<String> getAllClusterIDs(DbControl dc)
    26   {
    27     org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc
    28       .getHibernateSession(),
    29       "GET_ALL_FEATURES_WITH_SPECIFIC_CLUSTERID");
    30     List<String> retval = query.list();
    31     return retval;
    32   }*/
    33  
    3423 
    3524  @SuppressWarnings("unchecked")
     
    5039 
    5140 
    52 
    53 
    54   @SuppressWarnings("unchecked")
    55   public static List<String> getUniqueFeatureSequences(Project project, DbControl dc, File msFile)
    56   {
    57     org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc
    58       .getHibernateSession(),
    59       "GET_UNIQUE_SEQUENCES_IN_MSFILE_FOR_PROJECT");
    60     query.setEntity("project", project.getData());
    61     query.setEntity("msFile", msFile.getData());
    62     List<String> retval = query.list();
    63     return retval;
    64   }
    65 
    66   @SuppressWarnings("unchecked")
    67   public static List<String> getUniqueFeatureSequnces(Project project, DbControl dc, File featureFile)
    68   {
    69     org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc
    70       .getHibernateSession(),
    71       "GET_UNIQUE_SEQUNCES_IN_FEATURE_FILE_FOR_PROJECT");
    72     query.setEntity("project", project.getData());
    73     query.setEntity("featureFile", featureFile.getData());
    74     List<String> retval = query.list();
    75     return retval;
    76   }
    77 
    78 
    7941  /*
    8042   * From the Identifiable interface
  • trunk/plugin/src/org/proteios/plugins/FeatureSequencePropagator.java

    r4288 r4289  
    123123      Timestamp tStmp = new Timestamp(new Date().getTime());
    124124      long initialClusterID = tStmp.getTime();
    125 
    126       double tolerance = 0.01;
    127       double timeTol = 0.5;
     125     
     126      //DEFAULT VALUES! Used if no sequences in the project at all!
     127      double defaultTolerance = 0.01;
     128      double defaultTimeTol = 0.5;
     129     
     130      double tolerance = 0;
     131      double timeTol = 0;
    128132      int count = 0;
    129133      // dbControl = object used for communicating with database
     
    133137      DbControl dcFile = sc.newDbControl();
    134138      // Retrieve the job parameters
    135       tolerance = ((Float) job.getValue("tolerance")).doubleValue();
    136       timeTol = ((Float) job.getValue("timeTol")).doubleValue();
     139      //tolerance = ((Float) job.getValue("tolerance")).doubleValue();
     140      //timeTol = ((Float) job.getValue("timeTol")).doubleValue();
    137141      boolean matchBetweenFractions = (Boolean) job
    138142          .getValue("matchBetweenFractions");
     
    147151        //Annotator annotator = new Annotator(factory);
    148152        //AnnotationType fractionAnnotationType = annotator
    149           //  .getAnnotationType("fraction", Type.STRING, File.TYPE);
     153        //  .getAnnotationType("fraction", Type.STRING, File.TYPE);
    150154        //creates an instance of the template class
    151155        File outCoreFile = factory.create(File.class);
     
    172176        writer.println("Propagation report for project:" + project
    173177            .getName());
    174         writer.println("m/z tolerance:" + tolerance);
    175         writer.println("RT (minutes) tolerance:" + timeTol);
     178        writer.println("Default m/z tolerance if no common pair-wise sequences in project: " + defaultTolerance);
     179        writer.println("Default RT (minutes) tolerance if no common pair-wise sequences in project: " + defaultTimeTol);
    176180        writer
    177181        .println("Matching between fraction:" + matchBetweenFractions);
     182
    178183        //get unique ms files for the project
    179184        List<File> uniqueMsFiles = Feature.getUniqueMsFiles(project, dc);
    180         writer.println("Number of files with MS features:" + uniqueMsFiles.size() + "\n");
    181         log.debug("Number of files with MS features:" + uniqueMsFiles.size() + "\n");
     185        int nbrOfFiles = uniqueMsFiles.size();
     186        writer.println("Number of files with MS features:" + nbrOfFiles + "\n");
     187        log.debug("Number of files with MS features:" + nbrOfFiles + "\n");
    182188
    183189        //query for the class
    184190        ItemQuery<Feature> featureQuery = qf.select(Feature.class);
     191
    185192        // Only features in project should be retrieved
    186193        // OBS! has hql object as input
     
    188195            .property("project"), Hql.entity(project)));
    189196
    190         int nbrOfFiles = uniqueMsFiles.size();
    191         //int totNbrFiles = 2*nbrOfFiles-2;
    192         //int totMatchEvents = nbrOfFiles - 1;
    193         //int currMatchEvents = 0;
     197
    194198
    195199        FilePoint[] simList = new FilePoint[nbrOfFiles*(nbrOfFiles-1)/2];
     
    197201
    198202        ArrayList<TreeSet<Feature>> featureList = new ArrayList<TreeSet<Feature>>(nbrOfFiles);
    199         ArrayList<ArrayList<Feature>> totFeatureList = new ArrayList<ArrayList<Feature>>(nbrOfFiles);
    200203
    201204        //  double[][] mzDist = new double[nbrOfFiles][];
     
    206209        Arrays.fill(hasSequence,true);
    207210
    208         //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences.
    209 
    210211        //Random randGen = new Random();
    211        
     212
    212213        double[] originalFeatWithSeq = new double[nbrOfFiles];
    213214
     
    216217          File currentMsFile = uniqueMsFiles.get(fileNbr);
    217218
    218           List<String> currSeq = Feature.getUniqueFeatureSequences(project, dc, currentMsFile);
    219           writer.println("Number of unique sequences in file:" + fileNbr + " " + currSeq.size());
    220          
    221           if (currSeq==null || currSeq.isEmpty())
    222           {
    223             hasSequence[fileNbr]=false;
     219
     220          //Clearing clusterID for current project
     221          featureQuery.restrict(Restrictions.eq(Hql
     222              .property("msFile"), Hql.entity(currentMsFile)));
     223          featureQuery.restrict(Restrictions.neq(Hql
     224              .property("clusterId"), null));
     225
     226
     227          List<Feature> allFeaturesClusterID = featureQuery.list(dc);
     228          featureQuery.reset();
     229
     230          //Clearing the clusterID:s in current project
     231          for (Feature clusterF: allFeaturesClusterID){
     232            if (progress != null)
     233            {
     234              progress.display((fileNbr+1)/nbrOfFiles,
     235                  "Clearing clusterID in project. Processing file nr " + (fileNbr+1));
     236            }
     237
     238
     239            clusterF.setClusterId(null); 
     240            dc.reattachItem(clusterF);
     241
    224242          }
    225           //seqList.add(currSeq);
     243          dc.commit();
     244          dc = sc.newDbControl();
    226245
    227246          // Retrieve features for current ms file
     
    234253
    235254          List<Feature> allFeatures = featureQuery.list(dc);
    236 
    237           //TODO: Setting clusterID, should be done together with distribution calculation
    238           for (Feature clusterF: allFeatures){
    239             //if (clusterF.getClusterId()==null){
    240 
    241             clusterF.setClusterId(initialClusterID++); 
    242             dc.reattachItem(clusterF);
    243             //}
    244           }
    245           dc.commit();
    246 
    247           dc = sc.newDbControl();
    248255          allFeatures = featureQuery.list(dc);
    249 
    250           totFeatureList.add(new ArrayList<Feature>());
    251           totFeatureList.get(fileNbr).addAll(allFeatures);
    252 
    253           writer.println("Number of all features in file:" + fileNbr + " " + allFeatures.size());
    254256
    255257
     
    277279          featureList.add(new TreeSet<Feature>(new SeqComparator()));
    278280          featureList.get(fileNbr).addAll(features);
    279          
    280 
    281           /*for (Feature f:featureList.get(fileNbr))
    282           {
    283             log.debug(f.getPeptideSequence()+ " "+f.getApexRetentionTimeInMinutes()+ " " +f.getChargeState());
    284           }*/
    285 
    286          
     281
     282
    287283          originalFeatWithSeq[fileNbr]=features.size();
    288          
     284
    289285          writer.println("File number " +fileNbr + " is " + currentMsFile);
    290286          writer.println("Number of all features in file " + fileNbr + ": " + allFeatures.size());
     
    299295          log.debug("Number of unique features with sequences in file " + fileNbr + ": " + featureList.get(fileNbr).size());
    300296
    301 
    302 
    303297          //String frac1 = getFirstAnnotationValue(annotator,
    304298          //fractionAnnotationType, currentMsFile);
    305299        }
     300
     301
    306302
    307303        writer.println("===============================");
     
    353349            int indexKey = (secFileNbr+1)*(secFileNbr-2)/2+fileNbr+1;
    354350
    355 
    356 
    357             //  writer.println("The index key :" + indexKey);
    358             // 
    359             //if (simValues[indexKey]<0)
    360             //{
    361 
    362351            ArrayList<Point> points = getPointMatchBySeq(features, features2);
     352            ArrayList<Point> evalPoints = new ArrayList<Point>();
     353            double simValue = 0;
     354            PolynomialSplineFunction func = null;
    363355
    364356            //double percentageForRegression = 10;
     
    370362            if(totNbrMatches==0){
    371363
    372               writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1));
    373               writer.println("These files will not be aligned.");
    374               continue;
     364              writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1) +": " +totNbrMatches);
     365              writer.println("Alignment will be performed without retention time correction.");
     366
     367            }else{
     368
     369
     370              Collections.sort(points,new DiffComparator());
     371
     372              double lowerQuartile = points.get((int) Math.round(0.25*points.size())).getDiff();
     373              double upperQuartile = points.get((int) Math.round(0.75*points.size())).getDiff();
     374              double medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff();
     375              double interQuartileRange = upperQuartile-lowerQuartile;
     376              double upperFence = upperQuartile + 3*interQuartileRange;
     377
     378              writer.println("Median RT difference before alignment: " +medianQuartile);
     379              writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff());
     380
     381              int maxCutOff = points.size()-1;
     382              while(points.get(maxCutOff).getDiff()>upperFence)
     383              {
     384                points.remove(maxCutOff);
     385                maxCutOff--;
     386              }
     387
     388
     389              writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size());
     390
     391              medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff();
     392
     393              writer.println("Median RT difference is now: " +medianQuartile);
     394              writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff());
     395
     396              Collections.sort(points,new MzDiffComparator());
     397              double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff();
     398              writer.println("Median mz difference before alignment: " +medianQuartileMz);
     399              writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff());
     400
     401              tolerance=points.get(points.size()-1).getMzDiff();
     402              timeTol=medianQuartile;
     403
     404
     405
     406
     407
     408              //no duplicates in x-values
     409              Collections.sort(points, new RTComparator());
     410
     411              log.debug("Points size: " +points.size());
     412
     413
     414              //TODO: bortkommentera när mzDist ska användas som likhetsvärde
     415              simValue = points.size()*2.0/(features2.size()+features.size());
     416              writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue);
     417
     418              /*
     419               * Weight order
     420              Collections.sort(points, new WeightComparator());
     421
     422              int halfLength = (int) Math.ceil(points.size()/2);
     423
     424              evalPoints.addAll(points.subList(0,halfLength-1));
     425
     426              points.subList(0,halfLength).clear();
     427
     428              Collections.sort(points, new RTComparator());
     429              Collections.sort(evalPoints, new RTComparator());*/
     430
     431
     432              //Partitioning matching sequences in alignment set and evaluation set
     433              int ndx = 0;
     434              while(ndx<points.size())
     435              {
     436                //TODO: avkommentera när random ska användas
     437                //features.remove(randGen.nextInt(features.size()));
     438
     439                evalPoints.add(points.remove(ndx));
     440                ndx++;
     441
     442              }
     443
     444              log.debug("Points size: " +points.size());
     445              log.debug("Eval size: " +evalPoints.size());
     446
     447              ArrayList<Point> tempPoints = new ArrayList<Point>();
     448              tempPoints = evalPoints;
     449              evalPoints = points;
     450              points = tempPoints;
     451
     452              func = estimateSplineFunc(points, writer);
     453              if (func==null){
     454                writer.println("Adding evaluation points to spline function estimation. No precision and recall will be computed for this pair-wise alignment.");
     455                points.addAll(evalPoints);
     456                Collections.sort(points, new RTComparator());
     457                evalPoints.clear();
     458                func = estimateSplineFunc(points, writer);
     459                if (func==null){
     460                  writer.println("Too few points for alignment.");
     461                  continue;
     462                }
     463
     464              }
    375465            }
    376 
    377 
    378             Collections.sort(points,new DiffComparator());
    379 
    380             double lowerQuartile = points.get((int) Math.round(0.25*points.size())).getDiff();
    381             double upperQuartile = points.get((int) Math.round(0.75*points.size())).getDiff();
    382             double medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff();
    383             double interQuartileRange = upperQuartile-lowerQuartile;
    384             double upperFence = upperQuartile + 3*interQuartileRange;
    385 
    386             writer.println("Median RT difference before alignment: " +medianQuartile);
    387             writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff());
    388 
    389             int maxCutOff = points.size()-1;
    390             while(points.get(maxCutOff).getDiff()>upperFence && points.get(maxCutOff).getDiff()>timeTol)
    391             {
    392               points.remove(maxCutOff);
    393               maxCutOff--;
    394             }
    395 
    396 
    397             writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size());
    398 
    399             medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff();
    400 
    401             writer.println("Median RT difference is now: " +medianQuartile);
    402             writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff());
    403 
    404             Collections.sort(points,new MzDiffComparator());
    405             double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff();
    406             writer.println("Median mz difference before alignment: " +medianQuartileMz);
    407             writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff());
    408 
    409             tolerance=points.get(points.size()-1).getMzDiff();
    410             timeTol=medianQuartile;
    411 
    412 
    413             ArrayList<Point> evalPoints = new ArrayList<Point>();
    414 
    415 
    416             //no duplicates in x-values
    417             Collections.sort(points, new RTComparator());
    418 
    419             log.debug("Points size: " +points.size());
    420 
    421 
    422             //TODO: bortkommentera när mzDist ska användas som likhetsvärde
    423             double simValue = points.size()*2.0/(features2.size()+features.size());
    424             writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue);
    425 
    426             /*
    427             Collections.sort(points, new WeightComparator());
    428 
    429             int halfLength = (int) Math.ceil(points.size()/2);
    430 
    431             evalPoints.addAll(points.subList(0,halfLength-1));
    432 
    433             points.subList(0,halfLength).clear();
    434 
    435             Collections.sort(points, new RTComparator());
    436             Collections.sort(evalPoints, new RTComparator());*/
    437 
    438             int ndx = 0;
    439 
    440             while(ndx<points.size())
    441             {
    442               //TODO: avkommentera när random ska användas
    443               //features.remove(randGen.nextInt(features.size()));
    444 
    445               evalPoints.add(points.remove(ndx));
    446               ndx++;
    447 
    448             }
    449 
    450             log.debug("Points size: " +points.size());
    451             log.debug("Eval size: " +evalPoints.size());
    452 
    453             ArrayList<Point> tempPoints = new ArrayList<Point>();
    454             tempPoints = evalPoints;
    455             evalPoints = points;
    456             points = tempPoints;
    457 
    458 
    459             //}
    460 
    461 
    462             //  writer.println("Similarity:" + simValues[indexKey]);
    463 
    464 
    465             PolynomialSplineFunction func = estimateSplineFunc(points, writer);
    466             simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue,func,evalPoints);
     466            simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue, tolerance, timeTol, func,evalPoints);
    467467
    468468            writer.println();
     
    471471        }
    472472        //writer.println("Best match is :" + (firstMatch+1) +" and " +(secMatch+1)+ "\n");
     473
     474
     475        Arrays.sort(simList,Collections.reverseOrder(new SimComparator()));
     476
     477        // Propagate and get count
    473478       
     479        double noSeqMzTol = 0;
     480        double noSeqRtTol = 0;
    474481       
    475         Arrays.sort(simList,Collections.reverseOrder(new SimComparator()));
    476 
    477         // Propagate and get count
    478      
    479         countNbrMatches=0;
    480         for(FilePoint f:simList)
     482        for(int i=0; i<simList.length; i++)
    481483        {
    482           if (f.getSimValue()>0){
    483           countNbrMatches++;
    484 
     484
     485          FilePoint f = simList[i];
     486         
    485487          if (progress != null)
    486488          {
    487489            progress.display((100 *countNbrMatches/simList.length),
    488                 "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +countNbrMatches +" of " +simList.length);
     490                "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +(i+1) +" of " +simList.length +". Similarity score: " +f.getSimValue());
    489491          }
    490492
    491 
     493          //there are no common features at all, setting the mzTol and rtTol to the one with the least similar files
     494          if (f.getMzTol()==0 && f.getRtTol()==0){
     495           
     496         
     497            if (noSeqMzTol == 0 && noSeqRtTol==0){
     498
     499              if (i-1>=0){
     500               
     501                noSeqMzTol = simList[i-1].getMzTol();
     502                noSeqRtTol = simList[i-1].getRtTol();
     503               
     504                writer.println("===============================");
     505                writer.println("There are no common sequences for these files. Alignment will be performed with mz and rt tolerance for least similar files.");
     506               
     507              }else{
     508               
     509                writer.println("===============================");
     510                writer.println("There are no sequences in this project. Alignment will be performed with default values for mz and rt tolerance.");
     511               
     512                noSeqMzTol = defaultTolerance;
     513                noSeqRtTol = defaultTimeTol;
     514               
     515              }
     516
     517            }else{
     518              f.setMzTol(noSeqMzTol);
     519              f.setRtTol(noSeqRtTol);
     520             
     521            }
     522
     523          }
     524         
    492525
    493526          //TODO: här ska allFeatures skickas in för ytterligare utvärdering
    494           count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles,tolerance,timeTol);
    495 
    496           //TODO: lägg in hur många unika sekvenser filen har EFTER matchning
    497           }
    498         }
    499        
    500        
     527          count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles);
     528
     529
     530
     531        }
     532
     533
    501534        //writerDat.close();
    502535        // Commit to database
    503536        dc.commit();
    504        
    505        
     537
     538
    506539        writer.println("===============================");
    507        
     540
    508541        featureQuery.reset();
    509542        dc = sc.newDbControl();
     
    517550          featureQuery.restrict(Restrictions.gteq(Hql
    518551              .property("chargeState"),Expressions.integer(2)));
    519          
     552
    520553          writer.println("Number of features with sequences in file " +currentMsFile +" after alignment: " +featureQuery.list(dc).size());
    521554          writer.println("Total increase in sequence coverage for file " +currentMsFile + ": " +(featureQuery.list(dc).size()-originalFeatWithSeq[fileNbr])/originalFeatWithSeq[fileNbr]);
     
    523556        }
    524557        dc.close();
    525        
     558
    526559        writer.println("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count);
    527560        response.setDone("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count);
    528        
     561
    529562        // Close log file
    530563        writer.close();
     
    624657                log.debug("Point replaced.");
    625658              }
    626               //TODO: lägg in hits sorteringen..????
     659
    627660            }
    628661
     
    746779   */
    747780  private int matchFeatureLists(FilePoint fp, ItemFactory factory,
    748       Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles, double tolerance, double timeTol)
     781      Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles)
    749782  {
    750783
     784    int hitsCounter = 0;
     785    //timestamp for clusterID
     786    Timestamp tStmp = new Timestamp(new Date().getTime());
     787    long initialClusterID = tStmp.getTime();
    751788
    752789    DbControl dc = sc.newDbControl();
    753790
    754     ArrayList<Point> features = fp.getEvalList();
     791    ArrayList<Point> evalList = fp.getEvalList();
    755792    PolynomialSplineFunction func = fp.getSplineFunction();
     793    double tolerance = fp.getMzTol();
     794    double timeTol = fp.getRtTol();
    756795
    757796    featureQuery.restrict(Restrictions.eq(Hql
     
    782821    ArrayList<Feature> allFeatures2 = new ArrayList<Feature>(tempAllFeatures2);
    783822    featureQuery.reset();
    784    
     823
    785824    featureQuery.restrict(Restrictions.eq(Hql
    786825        .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile()))));
     
    790829    featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null));
    791830    List<Feature> tempAllFeaturesSeq2 = featureQuery.list(dc);
    792     ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq);
     831    ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq2);
    793832    featureQuery.reset();
     833
    794834
    795835    ArrayList<Feature> bestMatchList = new ArrayList<Feature>();
     
    803843
    804844    writer.println("===============================");
    805     writer.println("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
     845    writer.println("Evaluating match with " +evalList.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
    806846    writer.println();
    807847
    808848
    809     log.debug("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
    810    
    811    
     849    log.debug("Evaluating match with " +evalList.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
     850
     851
    812852    String localSampleId = null;
    813853    String fractionId = null;
     
    854894    double fineTolSeq = 0;
    855895
    856     for (Point p : features)
    857     {     
    858       Feature f = p.getXFeature();
    859       String fseq = f.getPeptideSequence();
     896    if (!evalList.isEmpty()){
     897
     898      for (Point p : evalList)
     899      {     
     900        Feature f = p.getXFeature();
     901        String fseq = f.getPeptideSequence();
     902
     903        float fRT = f.getApexRetentionTimeInMinutes();
     904
     905        if (func != null)
     906        {
     907          try
     908          {
     909            fRT = (float) func.value(f
     910                .getApexRetentionTimeInMinutes().doubleValue());
     911          }
     912          catch (ArgumentOutsideDomainException e)
     913          {}
     914        }
     915
     916        for(Point p2: evalList)
     917        {
     918          Feature f2 = p2.getYFeature();
     919          String f2seq = f2.getPeptideSequence();
     920
     921          if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol))
     922          {
     923            if (f2seq.equals(fseq))
     924            {
     925              fineSeq++;
     926
     927            }else{
     928
     929              fineTolSeq++;
     930
     931              writer.println("Current timetol: " +timeTol);
     932              writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
     933
     934            }
     935
     936            //sequences that match but are not within tolerance limits => conflict 
     937          }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){
     938
     939            log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance);
     940            log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
     941
     942          }
     943        }
     944      }
     945    }
     946
     947
     948
     949    int count = 0;
     950    int minNdx = 0;
     951
     952    //double timeTolTemp = 0;
     953    Feature bestMatchPrev = null;
     954    for(Feature f:allFeatures){
     955
    860956
    861957      float fRT = f.getApexRetentionTimeInMinutes();
     
    872968      }
    873969
    874       for(Point p2: features)
    875       {
    876         Feature f2 = p2.getYFeature();
    877         String f2seq = f2.getPeptideSequence();
    878         //  log.debug("Feature to match RT:" + f.getApexRetentionTimeInMinutes() + "after shift:" + fRT + " diff:" + (fRT - f.getApexRetentionTimeInMinutes()));
    879         //sequence evaluation for recall and tol evaluation for precision
    880         if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol))
    881         {
    882          
    883           //fseq och f2seq ska inte kunna vara null
    884           if (f2seq.equals(fseq))
    885           {
    886             fineSeq++;
    887 
    888           }else{
    889 
    890             fineTolSeq++;
    891 
    892             writer.println("Current timetol: " +timeTol);
    893             writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
    894 
    895           }
    896        
    897         //sequences that should be matching
    898         }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){
    899          
    900           log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance);
    901           log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
    902      
    903         }
    904       }
    905     }
    906 
    907 
    908 
    909     int count = 0;
    910     int minNdx = 0;
    911 
    912     //double timeTolTemp = 0;
    913 
    914     for(Feature f:allFeatures){
    915 
    916 
    917       float fRT = f.getApexRetentionTimeInMinutes();
    918 
    919       if (func != null)
    920       {
    921         try
    922         {
    923           fRT = (float) func.value(f
    924               .getApexRetentionTimeInMinutes().doubleValue());
    925         }
    926         catch (ArgumentOutsideDomainException e)
    927         {}
    928       }
    929 
    930970      while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol){
    931971        minNdx++;
     
    935975
    936976      boolean hit = false;
    937    
     977
    938978
    939979      //  log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes());
     
    942982      {
    943983
    944         //slut på sökning
     984        //end of search
    945985        if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol){
    946986          break;
     
    9601000              if(Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)>Math.abs(f2.getApexRetentionTimeInMinutes() - fRT))
    9611001              {
    962                 log.debug("Best match is updated.");
    963                 log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
    9641002                bestMatch = f2;
    965                 log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
    966                
     1003                //log.debug("Best match is updated.");
     1004                //log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
     1005
     1006                //log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
     1007
    9671008              }
    9681009
     
    9751016
    9761017
    977       if(bestMatch!=null)
    978       {
    979 
    980 
    981         //log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes());
    982        
    983 
    984         //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    985         //All features have clusterID:s, they are set at start of alignment -> all features that have a clusterID have undergone alignment
    986         if(!f.getClusterId().equals(bestMatch.getClusterId())){
    987           //log.debug("Setting new clusterID");
    988 
    989           bestMatchList.add(bestMatch);
    990           fList.add(f);
    991           bestMatchClusterIDList.add(bestMatch.getClusterId());
    992 
    993         }
    994 
    995       }
    996 
    997 
    998     }
    999    
    1000     int hitsCounter = 0;
    1001    
     1018      if(bestMatch!=null){ 
     1019
     1020        if(f.getClusterId()!=null && bestMatch.getClusterId()!=null || f.getClusterId()==null && bestMatch.getClusterId()!=null){
     1021          if (!bestMatch.getClusterId().equals(f.getClusterId())){
     1022            //Feature pair have undergone matching before and are not in same cluster => add to bestMatchClusterIDList
     1023            bestMatchList.add(bestMatch);
     1024            fList.add(f);
     1025            bestMatchClusterIDList.add(bestMatch.getClusterId());
     1026          }
     1027        }else{
     1028
     1029          //single match
     1030
     1031          count++;
     1032
     1033          if(f.getClusterId()==null && bestMatch.getClusterId()==null){
     1034            //Feature-pair has not undergone alignment before, no need to add to bestMatchClusterIDList
     1035            long clusterMatchID=initialClusterID++;
     1036            bestMatch.setClusterId(clusterMatchID);
     1037            f.setClusterId(clusterMatchID);
     1038
     1039          }else if (f.getClusterId()!=null && bestMatch.getClusterId()==null){
     1040            //only f has undergone alignment before but not bestMatch, no need to add to bestMatchClusterIDList
     1041            bestMatch.setClusterId(f.getClusterId());
     1042          }
     1043
     1044          dc.reattachItem(f);
     1045          dc.reattachItem(bestMatch);
     1046
     1047          //Updating hits list for single match
     1048          if (f.getPeptideSequence()!= null && bestMatch.getPeptideSequence()==null){
     1049            hitsCounter+=updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project);
     1050          }else if(f.getPeptideSequence()== null && bestMatch.getPeptideSequence()!=null){
     1051            hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project);
     1052          }
     1053
     1054
     1055        }
     1056
     1057        if (bestMatchPrev!=null && bestMatchPrev.getMassToChargeRatio()==bestMatch.getMassToChargeRatio() && Math.abs(bestMatchPrev.getApexRetentionTimeInMinutes()-bestMatch.getApexRetentionTimeInMinutes())<0.00001){
     1058          log.debug("Previous bestMatch equals new bestMatch.");
     1059
     1060        }
     1061        bestMatchPrev=bestMatch;
     1062
     1063      }
     1064
     1065
     1066
     1067
     1068
     1069    }
     1070
     1071
    10021072    if (!bestMatchClusterIDList.isEmpty()){
    10031073
     
    10121082      log.debug("Size of fList: " +fList.size());
    10131083
    1014       count=bestMatchList.size();
     1084      count+=bestMatchList.size();
    10151085
    10161086
     
    10251095        Feature f = fList.get(featureNbr);
    10261096
     1097        //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
     1098        if (f.getClusterId()!=null){
     1099          bestMatch.setClusterId(f.getClusterId());
     1100        }else{
     1101          f.setClusterId(bestMatch.getClusterId());
     1102        }
     1103
    10271104        dc.reattachItem(f);
    10281105        dc.reattachItem(bestMatch);
    1029 
    1030         //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    1031 
    1032         bestMatch.setClusterId(f.getClusterId());
    10331106
    10341107        //updating hits
     
    10391112          hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project);
    10401113        }
    1041         //TODO: what to do if both have sequences?
    1042 
    1043         //log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
     1114        //TODO: what to do if both have sequences??? CONFILCT resolution needed!
    10441115
    10451116      }
     
    10541125    writer.println("Number of features matched:" + count);
    10551126    writer.println("Number of hits created:" + hitsCounter);
    1056     writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +fineSeq/features.size());
    1057     writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +(1-fineTolSeq/features.size()));
     1127    writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +fineSeq/evalList.size());
     1128    writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +(1-fineTolSeq/evalList.size()));
    10581129    writer.println();
    10591130
     
    10691140    dc.reattachItem(f);
    10701141    dc.reattachItem(f2);
    1071    
     1142
    10721143    int newHitCounter = 0;
    1073    
     1144
    10741145    Hit h = null;
    10751146    for (Hit h1 : f.getHits())
     
    10951166            .lastIndexOf(' '));
    10961167      for (Feature f3 : allFeaturesSeq)
    1097       {
     1168      {
     1169
    10981170        if ((fseq.equals(f3.getPeptideSequence()) || (fseq
    10991171            .contains(" ") && f3
     
    11571229      f2.setHits(f.getHits());
    11581230    }
    1159    
     1231
    11601232    return newHitCounter;
    11611233
     
    11961268      catch (MathException e1)
    11971269      {
    1198         throw new BaseException(e1.getMessage());
     1270        writer.println("Too few points to perform spline interpolation: " +points.size());
    11991271      }
    12001272    }
     
    14781550  private class FilePoint{
    14791551
    1480     private double simValue = 0.0;
     1552    private double simValue = 0;
     1553    private double mzTol = 0;
     1554    private double rtTol = 0;
    14811555    private PolynomialSplineFunction func = null;
    14821556    private int file1 = 0;
     
    14841558    private ArrayList<Point> evalPoint = new ArrayList<Point>();
    14851559
     1560
    14861561    public FilePoint()
    14871562    {}
    14881563
    1489     public FilePoint(int fileNbr, int fileNbr2, double simValue, PolynomialSplineFunction func, ArrayList<Point> evalPoint)
     1564    public FilePoint(int fileNbr, int fileNbr2, double simValue, double mzTol, double rtTol, PolynomialSplineFunction func, ArrayList<Point> evalPoint)
    14901565    {
    14911566      this.file1 = fileNbr;
    14921567      this.file2 = fileNbr2;
    14931568      this.simValue = simValue;
     1569      this.mzTol = mzTol;
     1570      this.rtTol = rtTol;
    14941571      this.func = func;
    14951572      this.evalPoint.addAll(evalPoint);
     
    15121589    }
    15131590
     1591    public double getMzTol(){
     1592
     1593      return this.mzTol;
     1594    }
     1595   
     1596    public void setMzTol(double mzTol){
     1597
     1598      this.mzTol = mzTol;
     1599    }
     1600
     1601    public double getRtTol(){
     1602
     1603      return this.rtTol;
     1604    }
     1605   
     1606    public void setRtTol(double rtTol){
     1607
     1608      this.rtTol = rtTol;
     1609    }
     1610
    15141611    public PolynomialSplineFunction getSplineFunction() {
    15151612
Note: See TracChangeset for help on using the changeset viewer.