Changeset 4288


Ignore:
Timestamp:
Feb 13, 2012, 9:16:52 PM (12 years ago)
Author:
marianne
Message:

Refs #774. Bug fix: Added two-way alignment. Skipping alignment if there are no matching sequences.
Added: Recall and precision evaluation for features with sequences. Feature number coverage increase computation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/plugin/src/org/proteios/plugins/FeatureSequencePropagator.java

    r4285 r4288  
    8686   */
    8787  private static final org.apache.log4j.Logger log = org.apache.log4j.LogManager
    88   .getLogger("org.proteios.plugins.featuresequencepropagator");
     88      .getLogger("org.proteios.plugins.featuresequencepropagator");
    8989
    9090
     
    9898  {
    9999    return new AboutImpl("Propagate identifications",
    100       "Match features and propagate identifications", "0.2",
    101       "2011, Marianne Sandin, Fredrik Levander", null, null,
    102     "http://www.proteios.org");
     100        "Match features and propagate identifications", "0.2",
     101        "2011, Marianne Sandin, Fredrik Levander", null, null,
     102        "http://www.proteios.org");
    103103  }
    104104
     
    107107  public void init(SessionControl sc, ParameterValues configuration,
    108108      ParameterValues job)
    109   throws BaseException
    110   {
     109          throws BaseException
     110          {
    111111    super.init(sc, configuration, job);
    112   }
     112          }
    113113
    114114
     
    123123      Timestamp tStmp = new Timestamp(new Date().getTime());
    124124      long initialClusterID = tStmp.getTime();
    125      
     125
    126126      double tolerance = 0.01;
    127127      double timeTol = 0.5;
     
    129129      // dbControl = object used for communicating with database
    130130      DbControl dc = sc.newDbControl();
    131      
     131
    132132      //used for alignment file
    133133      DbControl dcFile = sc.newDbControl();
     
    136136      timeTol = ((Float) job.getValue("timeTol")).doubleValue();
    137137      boolean matchBetweenFractions = (Boolean) job
    138       .getValue("matchBetweenFractions");
     138          .getValue("matchBetweenFractions");
    139139      Project project = (Project) job.getValue("project");
    140140      // qf is used to query the database
     
    145145        ItemFactory factory = new ItemFactory(dcFile);
    146146
    147         Annotator annotator = new Annotator(factory);
    148         AnnotationType fractionAnnotationType = annotator
    149         .getAnnotationType("fraction", Type.STRING, File.TYPE);
     147        //Annotator annotator = new Annotator(factory);
     148        //AnnotationType fractionAnnotationType = annotator
     149          //  .getAnnotationType("fraction", Type.STRING, File.TYPE);
    150150        //creates an instance of the template class
    151151        File outCoreFile = factory.create(File.class);
     
    157157        outCoreFile.setDirectory(project.getProjectDirectory());
    158158        outCoreFile.setName("AlignLog-" + job.getId() + ".txt");
    159        
     159
    160160        //datFile.setDirectory(project.getProjectDirectory());
    161161        //datFile.setName("AlignDat" + job.getId() + ".txt");
    162        
     162
    163163        dcFile.saveItem(outCoreFile);
    164164        //dc.saveItem(datFile);
     
    166166        OutputStream outstream = outCoreFile.getUploadStream(false);
    167167        //OutputStream outstream2 = datFile.getUploadStream(false);
    168        
     168
    169169        PrintWriter writer = new PrintWriter(outstream);
    170170        //PrintWriter writerDat = new PrintWriter(outstream2);
    171        
     171
    172172        writer.println("Propagation report for project:" + project
    173           .getName());
     173            .getName());
    174174        writer.println("m/z tolerance:" + tolerance);
    175175        writer.println("RT (minutes) tolerance:" + timeTol);
     
    186186        // OBS! has hql object as input
    187187        featureQuery.restrictPermanent(Restrictions.eq(Hql
    188           .property("project"), Hql.entity(project)));
     188            .property("project"), Hql.entity(project)));
    189189
    190190        int nbrOfFiles = uniqueMsFiles.size();
     
    192192        //int totMatchEvents = nbrOfFiles - 1;
    193193        //int currMatchEvents = 0;
    194        
     194
    195195        FilePoint[] simList = new FilePoint[nbrOfFiles*(nbrOfFiles-1)/2];
    196196        Arrays.fill(simList, new FilePoint());
    197        
     197
    198198        ArrayList<TreeSet<Feature>> featureList = new ArrayList<TreeSet<Feature>>(nbrOfFiles);
    199199        ArrayList<ArrayList<Feature>> totFeatureList = new ArrayList<ArrayList<Feature>>(nbrOfFiles);
    200        
    201       //  double[][] mzDist = new double[nbrOfFiles][];
    202         PearsonsCorrelation pearCorr = new PearsonsCorrelation();
    203        
     200
     201        //  double[][] mzDist = new double[nbrOfFiles][];
     202        //PearsonsCorrelation pearCorr = new PearsonsCorrelation();
     203
    204204        double[] meanFeatureLength = new double[nbrOfFiles];
    205205        boolean[] hasSequence = new boolean[nbrOfFiles];
    206206        Arrays.fill(hasSequence,true);
     207
     208        //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences.
     209
     210        //Random randGen = new Random();
    207211       
    208         //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences.
    209        
    210         Random randGen = new Random();
     212        double[] originalFeatWithSeq = new double[nbrOfFiles];
    211213
    212214        for (int fileNbr = 0; fileNbr<nbrOfFiles; fileNbr++)
    213215        {
    214216          File currentMsFile = uniqueMsFiles.get(fileNbr);
    215  
     217
    216218          List<String> currSeq = Feature.getUniqueFeatureSequences(project, dc, currentMsFile);
     219          writer.println("Number of unique sequences in file:" + fileNbr + " " + currSeq.size());
     220         
    217221          if (currSeq==null || currSeq.isEmpty())
    218222          {
     
    224228          // Only retrieve features with peptide sequences assigned
    225229          featureQuery.restrict(Restrictions.eq(Hql
    226             .property("msFile"), Hql.entity(currentMsFile)));
     230              .property("msFile"), Hql.entity(currentMsFile)));
    227231          featureQuery.restrict(Restrictions.gteq(Hql
    228             .property("chargeState"),Expressions.integer(2)));
     232              .property("chargeState"),Expressions.integer(2)));
    229233          featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
    230          
    231           dc.reconnect();
     234
    232235          List<Feature> allFeatures = featureQuery.list(dc);
    233          
     236
    234237          //TODO: Setting clusterID, should be done together with distribution calculation
    235238          for (Feature clusterF: allFeatures){
    236239            //if (clusterF.getClusterId()==null){
    237              
    238               clusterF.setClusterId(initialClusterID++); 
    239               dc.reattachItem(clusterF);
     240
     241            clusterF.setClusterId(initialClusterID++); 
     242            dc.reattachItem(clusterF);
    240243            //}
    241244          }
    242245          dc.commit();
    243          
     246
    244247          dc = sc.newDbControl();
    245248          allFeatures = featureQuery.list(dc);
    246          
     249
    247250          totFeatureList.add(new ArrayList<Feature>());
    248251          totFeatureList.get(fileNbr).addAll(allFeatures);
    249          
     252
    250253          writer.println("Number of all features in file:" + fileNbr + " " + allFeatures.size());
    251          
    252          
     254
     255
    253256          //setMzDistribution(allFeatures,mzDist,meanFeatureLength,fileNbr);
    254257          featureQuery.reset();
    255          
     258
    256259          log.debug("Mean length: " +meanFeatureLength[fileNbr]);
    257260          //log.debug("Length of mzDist: " +mzDist[fileNbr].length);
    258        
    259          
     261
     262
    260263          //ordered according to RT for the treeset so that the first one (least RT) will be picked
    261264          featureQuery.restrict(Restrictions.eq(Hql
    262             .property("msFile"), Hql.entity(currentMsFile)));
     265              .property("msFile"), Hql.entity(currentMsFile)));
    263266          featureQuery.restrict(Restrictions.neq(Hql
    264             .property("peptideSequence"), null));
     267              .property("peptideSequence"), null));
    265268          featureQuery.restrict(Restrictions.gteq(Hql
    266             .property("chargeState"),Expressions.integer(2)));
     269              .property("chargeState"),Expressions.integer(2)));
    267270          featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
    268271
     
    271274          List<Feature> features =  featureQuery.list(dc);
    272275          featureQuery.reset();
    273          
    274           featureList.add(new TreeSet(new SeqComparator()));
     276
     277          featureList.add(new TreeSet<Feature>(new SeqComparator()));
    275278          featureList.get(fileNbr).addAll(features);
    276279         
     280
    277281          /*for (Feature f:featureList.get(fileNbr))
    278282          {
    279283            log.debug(f.getPeptideSequence()+ " "+f.getApexRetentionTimeInMinutes()+ " " +f.getChargeState());
    280284          }*/
     285
    281286         
     287          originalFeatWithSeq[fileNbr]=features.size();
    282288         
    283289          writer.println("File number " +fileNbr + " is " + currentMsFile);
    284290          writer.println("Number of all features in file " + fileNbr + ": " + allFeatures.size());
    285291          writer.println("Number of features with sequences in file " + fileNbr + ": " + features.size());
    286           writer.println("Number of unique features with sequences in file " + fileNbr + ": " + featureList.get(fileNbr).size());
    287        
     292          writer.println("Number of unique features with sequences in file " + fileNbr + ": " +featureList.get(fileNbr).size());
     293
    288294          writer.println();
    289          
     295
    290296          log.debug("File number " +fileNbr + " is " + currentMsFile);
    291297          log.debug("Number of all features in file " + fileNbr + ": " + allFeatures.size());
    292298          log.debug("Number of features with sequences in file " + fileNbr + ": " + features.size());
    293299          log.debug("Number of unique features with sequences in file " + fileNbr + ": " + featureList.get(fileNbr).size());
    294          
    295          
    296          
     300
     301
     302
    297303          //String frac1 = getFirstAnnotationValue(annotator,
    298304          //fractionAnnotationType, currentMsFile);
     
    300306
    301307        writer.println("===============================");
    302        
    303        
    304        
     308
     309
     310
    305311        int countNbrMatches=0;
    306        
     312
    307313        for(int fileNbr=0; fileNbr<nbrOfFiles-1; fileNbr++ )
    308314        {
    309          
    310          
     315
     316
    311317          for (int secFileNbr=fileNbr+1; secFileNbr<nbrOfFiles; secFileNbr++)
    312318          {
    313            
     319
    314320            countNbrMatches++;
    315            
     321
    316322            if (progress != null)
    317323            {
    318324              progress.display((100 *countNbrMatches)/((nbrOfFiles*(nbrOfFiles-1)/2)),
    319                 "Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1));
     325                  "Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1));
    320326            }
    321            
     327
    322328            writer.println("Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1) +". Progress is: " +(100 *countNbrMatches)/((nbrOfFiles*(nbrOfFiles-1)/2)));
    323329            //double simValue = pearCorr.correlation(mzDist[fileNbr], mzDist[secFileNbr]);
    324330            //writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue);
    325            
    326             ArrayList<Feature> features = new ArrayList(featureList.get(fileNbr));
    327             ArrayList<Feature> features2 = new ArrayList(featureList.get(secFileNbr));
    328            
    329             //ArrayList<Feature> features = new ArrayList(totFeatureList.get(fileNbr));
    330             //ArrayList<Feature> features2 = new ArrayList(totFeatureList.get(secFileNbr));
    331            
    332             double meanWeight1=meanFeatureLength[fileNbr];
    333             double meanWeight2=meanFeatureLength[secFileNbr];
     331
     332            ArrayList<Feature> features = new ArrayList<Feature>(featureList.get(fileNbr));
     333            ArrayList<Feature> features2 = new ArrayList<Feature>(featureList.get(secFileNbr));
     334
     335            //double meanWeight1=meanFeatureLength[fileNbr];
     336            //double meanWeight2=meanFeatureLength[secFileNbr];
    334337            /* 
    335338
     
    349352
    350353            int indexKey = (secFileNbr+1)*(secFileNbr-2)/2+fileNbr+1;
    351            
    352            
    353            
     354
     355
     356
    354357            //  writer.println("The index key :" + indexKey);
    355358            // 
     
    358361
    359362            ArrayList<Point> points = getPointMatchBySeq(features, features2);
    360            
    361             double percentageForRegression = 10;
     363
     364            //double percentageForRegression = 10;
    362365            //ArrayList<Point> points = getPointMatchByTol(features,features2,percentageForRegression);
    363            
     366
    364367            int totNbrMatches = points.size();
    365368            writer.println("Total number of matches: " +totNbrMatches);
     369
     370            if(totNbrMatches==0){
     371
     372              writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1));
     373              writer.println("These files will not be aligned.");
     374              continue;
     375            }
     376
    366377
    367378            Collections.sort(points,new DiffComparator());
     
    372383            double interQuartileRange = upperQuartile-lowerQuartile;
    373384            double upperFence = upperQuartile + 3*interQuartileRange;
    374            
     385
    375386            writer.println("Median RT difference before alignment: " +medianQuartile);
    376387            writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff());
    377            
     388
    378389            int maxCutOff = points.size()-1;
    379390            while(points.get(maxCutOff).getDiff()>upperFence && points.get(maxCutOff).getDiff()>timeTol)
     
    382393              maxCutOff--;
    383394            }
    384            
    385            
     395
     396
    386397            writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size());
    387            
     398
    388399            medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff();
    389            
     400
    390401            writer.println("Median RT difference is now: " +medianQuartile);
    391402            writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff());
    392            
     403
    393404            Collections.sort(points,new MzDiffComparator());
    394405            double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff();
    395406            writer.println("Median mz difference before alignment: " +medianQuartileMz);
    396407            writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff());
    397            
     408
    398409            tolerance=points.get(points.size()-1).getMzDiff();
    399410            timeTol=medianQuartile;
    400            
    401            
     411
     412
    402413            ArrayList<Point> evalPoints = new ArrayList<Point>();
    403          
    404            
     414
     415
    405416            //no duplicates in x-values
    406417            Collections.sort(points, new RTComparator());
    407            
     418
    408419            log.debug("Points size: " +points.size());
    409            
    410            
     420
     421
    411422            //TODO: bortkommentera när mzDist ska användas som likhetsvärde
    412423            double simValue = points.size()*2.0/(features2.size()+features.size());
    413424            writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue);
    414            
     425
    415426            /*
    416427            Collections.sort(points, new WeightComparator());
    417            
     428
    418429            int halfLength = (int) Math.ceil(points.size()/2);
    419            
     430
    420431            evalPoints.addAll(points.subList(0,halfLength-1));
    421            
     432
    422433            points.subList(0,halfLength).clear();
    423            
     434
    424435            Collections.sort(points, new RTComparator());
    425436            Collections.sort(evalPoints, new RTComparator());*/
    426            
     437
    427438            int ndx = 0;
    428            
     439
    429440            while(ndx<points.size())
    430441            {
    431442              //TODO: avkommentera när random ska användas
    432443              //features.remove(randGen.nextInt(features.size()));
    433              
     444
    434445              evalPoints.add(points.remove(ndx));
    435446              ndx++;
    436    
     447
    437448            }
    438            
     449
    439450            log.debug("Points size: " +points.size());
    440451            log.debug("Eval size: " +evalPoints.size());
    441          
     452
    442453            ArrayList<Point> tempPoints = new ArrayList<Point>();
    443454            tempPoints = evalPoints;
     
    448459            //}
    449460
    450            
     461
    451462            //  writer.println("Similarity:" + simValues[indexKey]);
    452            
     463
    453464
    454465            PolynomialSplineFunction func = estimateSplineFunc(points, writer);
    455466            simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue,func,evalPoints);
    456            
     467
    457468            writer.println();
    458469          }
     
    460471        }
    461472        //writer.println("Best match is :" + (firstMatch+1) +" and " +(secMatch+1)+ "\n");
    462 
    463         //TODO: What if func is null?
     473       
     474       
    464475        Arrays.sort(simList,Collections.reverseOrder(new SimComparator()));
    465        
     476
    466477        // Propagate and get count
    467        
     478     
    468479        countNbrMatches=0;
    469480        for(FilePoint f:simList)
    470481        {
    471          
     482          if (f.getSimValue()>0){
    472483          countNbrMatches++;
    473          
     484
    474485          if (progress != null)
    475486          {
    476487            progress.display((100 *countNbrMatches/simList.length),
    477               "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +countNbrMatches +" of " +simList.length);
     488                "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +countNbrMatches +" of " +simList.length);
    478489          }
    479          
    480          
    481          
     490
     491
     492
    482493          //TODO: här ska allFeatures skickas in för ytterligare utvärdering
    483494          count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles,tolerance,timeTol);
    484          
    485         }
    486 
    487         /*seqList.add(ListUtils.sum(seqList.get(firstMatch),seqList.get(secMatch)));
    488           writer.println("Size of union list is :" + seqList.get(seqList.size()-1).size() + "\n");
    489           seqList.set(firstMatch,null);
    490           seqList.set(secMatch,null);
    491 
    492           featureSet.add(bestSet);
    493           writer.println("Size of union set is :" + bestSet.size() + "\n");
    494           featureSet.set(firstMatch,null);
    495           featureSet.set(secMatch,null);
    496 
    497           currMatchEvents++;*/
    498 
    499 
     495
     496          //TODO: lägg in hur många unika sekvenser filen har EFTER matchning
     497          }
     498        }
     499       
    500500       
    501501        //writerDat.close();
    502502        // Commit to database
    503503        dc.commit();
    504         response.setDone("Features matched for " + uniqueMsFiles.size() + " MS files. " + count + " features sequences propagated.");
    505         writer.println("Features matched for " + uniqueMsFiles.size() + " MS files. " + count + " features sequences propagated.");
     504       
     505       
     506        writer.println("===============================");
     507       
     508        featureQuery.reset();
     509        dc = sc.newDbControl();
     510        for(int fileNbr=0; fileNbr<nbrOfFiles; fileNbr++ )
     511        {
     512          File currentMsFile = uniqueMsFiles.get(fileNbr);
     513          featureQuery.restrict(Restrictions.eq(Hql
     514              .property("msFile"), Hql.entity(currentMsFile)));
     515          featureQuery.restrict(Restrictions.neq(Hql
     516              .property("peptideSequence"), null));
     517          featureQuery.restrict(Restrictions.gteq(Hql
     518              .property("chargeState"),Expressions.integer(2)));
     519         
     520          writer.println("Number of features with sequences in file " +currentMsFile +" after alignment: " +featureQuery.list(dc).size());
     521          writer.println("Total increase in sequence coverage for file " +currentMsFile + ": " +(featureQuery.list(dc).size()-originalFeatWithSeq[fileNbr])/originalFeatWithSeq[fileNbr]);
     522          featureQuery.reset();
     523        }
     524        dc.close();
     525       
     526        writer.println("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count);
     527        response.setDone("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count);
     528       
    506529        // Close log file
    507530        writer.close();
     
    536559    String retval = null;
    537560    Annotation anno2 = annotator.getAnnotation(currentMsFile,
    538       annotationType);
     561        annotationType);
    539562    List<String> fracList2 = extracted(anno2);
    540563    if (fracList2 != null && !fracList2.isEmpty())
     
    545568  }
    546569
    547  
     570
    548571  private ArrayList<Point> getPointMatchBySeq(ArrayList<Feature> features, ArrayList<Feature> features2)
    549572  {
    550    
     573
    551574    Iterator<Feature> itr = features.iterator();
    552575    Iterator<Feature> itr2;
    553576
    554577    ArrayList<Point> points = new ArrayList<Point>();
    555    
     578
    556579    while(itr.hasNext())
    557580    {
    558581      Feature f = itr.next();
    559      
     582
    560583      itr2 = features2.iterator();
    561      
     584
    562585      int nbrHits = 0;
    563586
     
    565588
    566589        Feature f2 = itr2.next();
    567        
    568        
     590
     591
    569592        if(f.getPeptideSequence().equals(f2.getPeptideSequence()) && f.getChargeState().equals(f2.getChargeState()))
    570593        {
     
    572595
    573596          Point tempPoint = new Point(f,f2);
    574          
     597
    575598          //contains is based on the RT x-value, no duplicate x is allowed for the regression
    576599          if(!points.contains(tempPoint))
     
    579602              if(!(Math.abs(f.getMassToChargeRatio()-f2.getMassToChargeRatio())>0.01) )
    580603              {
    581                
    582                
     604
     605
    583606                //double weight = Math.max(Math.abs(tempPoint.getXLength()-meanWeight1)/meanWeight1,Math.abs(tempPoint.getYLength()-meanWeight2)/meanWeight2);
    584607                //weight = 1/weight;
    585608                //tempPoint.setWeight(weight);
    586                
     609
    587610                points.add(tempPoint);
    588611              }
     
    607630          }
    608631
    609          
     632
    610633        }
    611634
     
    621644
    622645    }
    623    
     646
    624647    return points;
    625    
    626   }
    627  
     648
     649  }
     650
    628651  private ArrayList<Point> getPointMatchByTol(ArrayList<Feature> featuresIn, ArrayList<Feature> featuresIn2, double percentage )
    629652  {
    630    
     653
    631654    Comparator<Feature> apexC = new ApexIntensityComparator();
    632655    int cutOff1 = (int) Math.floor(featuresIn.size()*(100-percentage)/100);
    633656    int cutOff2 = (int) Math.floor(featuresIn2.size()*(100-percentage)/100);
    634    
     657
    635658    Collections.sort(featuresIn,apexC);
    636659    Collections.sort(featuresIn2,apexC);
    637    
     660
    638661    //Collections.sort(featuresIn,Collections.reverseOrder(apexC));
    639662    //Collections.sort(featuresIn2,Collections.reverseOrder(apexC));
    640    
     663
    641664    List<Feature> features = featuresIn.subList(cutOff1,featuresIn.size()-1);
    642665    List<Feature> features2 = featuresIn2.subList(cutOff2,featuresIn2.size()-1);
    643    
     666
    644667    log.debug("Tol matching size: " +features.size() +" and " +features2.size());
    645    
    646    
     668
     669
    647670    Iterator<Feature> itr = features.iterator();
    648671    Iterator<Feature> itr2;
    649672
    650673    ArrayList<Point> points = new ArrayList<Point>();
    651    
     674
    652675    while(itr.hasNext())
    653676    {
    654677      Feature f = itr.next();
    655      
     678
    656679      itr2 = features2.iterator();
    657      
     680
    658681      int nbrHits = 0;
    659682
     
    685708              //multiple hit for the tolerance
    686709
    687              
     710
    688711              log.debug("Duplicate tol hit. RT diff p1: " +points.get(points.size()-1).getDiff() + " p2: " +tempPoint.getDiff());
    689712              //use the one with the "BEST" match, ie the one with the smallest difference
     
    700723          }
    701724
    702          
    703         }
    704 
    705       }
    706 
    707     }
    708    
     725
     726        }
     727
     728      }
     729
     730    }
     731
    709732    Comparator<Feature> rtC = new RTComparatorF();
    710733    Collections.sort(featuresIn,rtC);
    711734    Collections.sort(featuresIn2,rtC);
    712    
     735
    713736    log.debug("Tol matching points size: " +points.size());
    714737
    715    
     738
    716739    return points;
    717    
    718   }
    719  
    720  
     740
     741  }
     742
     743
    721744  /**
    722745   * Match feature lists and propagate sequences from first to second list.
    723746   */
    724747  private int matchFeatureLists(FilePoint fp, ItemFactory factory,
    725       Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles, double tolerance, double timeTolin)
     748      Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles, double tolerance, double timeTol)
    726749  {
    727    
    728    
     750
     751
    729752    DbControl dc = sc.newDbControl();
    730    
     753
    731754    ArrayList<Point> features = fp.getEvalList();
    732755    PolynomialSplineFunction func = fp.getSplineFunction();
    733    
     756
    734757    featureQuery.restrict(Restrictions.eq(Hql
    735       .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile()))));
     758        .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile()))));
    736759    featureQuery.restrict(Restrictions.gteq(Hql
    737       .property("chargeState"),Expressions.integer(2)));
     760        .property("chargeState"),Expressions.integer(2)));
    738761    featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
    739762    List<Feature> tempAllFeatures = featureQuery.list(dc);
    740763    ArrayList<Feature> allFeatures = new ArrayList<Feature>(tempAllFeatures);
    741764    featureQuery.reset();
    742    
     765
    743766    featureQuery.restrict(Restrictions.eq(Hql
    744767        .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile()))));
    745       featureQuery.restrict(Restrictions.gteq(Hql
     768    featureQuery.restrict(Restrictions.gteq(Hql
    746769        .property("chargeState"),Expressions.integer(2)));
    747       featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
     770    featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
    748771    featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null));
    749772    List<Feature> tempAllFeaturesSeq = featureQuery.list(dc);
    750773    ArrayList<Feature> allFeaturesSeq = new ArrayList<Feature>(tempAllFeaturesSeq);
    751774    featureQuery.reset();
    752    
     775
    753776    featureQuery.restrict(Restrictions.eq(Hql
    754       .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile()))));
     777        .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile()))));
    755778    featureQuery.restrict(Restrictions.gteq(Hql
    756       .property("chargeState"),Expressions.integer(2)));
     779        .property("chargeState"),Expressions.integer(2)));
    757780    featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
    758781    List<Feature> tempAllFeatures2 = featureQuery.list(dc);
     
    760783    featureQuery.reset();
    761784   
     785    featureQuery.restrict(Restrictions.eq(Hql
     786        .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile()))));
     787    featureQuery.restrict(Restrictions.gteq(Hql
     788        .property("chargeState"),Expressions.integer(2)));
     789    featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes")));
     790    featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null));
     791    List<Feature> tempAllFeaturesSeq2 = featureQuery.list(dc);
     792    ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq);
     793    featureQuery.reset();
     794
    762795    ArrayList<Feature> bestMatchList = new ArrayList<Feature>();
    763796    ArrayList<Feature> fList = new ArrayList<Feature>();
     
    767800    //double timeTol = 0.5;
    768801    //double[] timeTol = {0.25,0.5,0.75,1,1.5,2,3,4,5};
    769     double[] timeTol = {0, timeTolin};
    770    
     802
     803
    771804    writer.println("===============================");
    772805    writer.println("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
    773806    writer.println();
     807
     808
     809    log.debug("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
    774810   
    775811   
    776     log.debug("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());
    777 
     812    String localSampleId = null;
     813    String fractionId = null;
     814
     815    for (Feature f : allFeatures)
     816    {
     817      if (f.getPeptideSequence() != null)
     818      {
     819        Iterator<Hit> i = f.getHits().iterator();
     820        if (i.hasNext())
     821        {
     822          Hit h = i.next();
     823          localSampleId = h.getLocalSampleId();
     824          fractionId = h.getFractionId();
     825          log
     826          .debug("Found local sample id : " + localSampleId + " fraction: " + fractionId);
     827          break;
     828        }
     829      }
     830    }
    778831
    779832    String localSampleId2 = null;
     
    791844          fractionId2 = h.getFractionId();
    792845          log
    793             .debug("Found local sample id2:" + localSampleId2 + " fraction:" + fractionId2);
     846          .debug("Found local sample id2: " + localSampleId2 + " fraction for set 2: " + fractionId2);
    794847          break;
    795848        }
     
    797850    }
    798851
    799    
    800     double[] fineSeq = new double[timeTol.length];
    801     double[] notFineSeq = new double[timeTol.length];
    802     double[] fineTolSeq = new double[timeTol.length];
    803     /*
     852
     853    double fineSeq = 0;
     854    double fineTolSeq = 0;
     855
    804856    for (Point p : features)
    805857    {     
     
    814866        {
    815867          fRT = (float) func.value(f
    816             .getApexRetentionTimeInMinutes().doubleValue());
     868              .getApexRetentionTimeInMinutes().doubleValue());
    817869        }
    818870        catch (ArgumentOutsideDomainException e)
     
    824876        Feature f2 = p2.getYFeature();
    825877        String f2seq = f2.getPeptideSequence();
    826 
    827 
    828 
    829878        //  log.debug("Feature to match RT:" + f.getApexRetentionTimeInMinutes() + "after shift:" + fRT + " diff:" + (fRT - f.getApexRetentionTimeInMinutes()));
    830 
    831879        //sequence evaluation for recall and tol evaluation for precision
    832         //TODO: skriv detta som funktion så att både allFeatures och Points kan skickas in för utvärdering
    833         for (int k=0;k<timeTol.length;k++){
    834 
    835 
    836 
    837           if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol[k]))
     880        if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol))
     881        {
     882         
     883          //fseq och f2seq ska inte kunna vara null
     884          if (f2seq.equals(fseq))
    838885          {
    839 
    840             if (f2seq!=null && fseq!=null && f2seq.equals(fseq))
    841             {
    842               fineSeq[k]++;
    843 
    844             }else{
    845 
    846               fineTolSeq[k]++;
    847 
    848               writer.println("Current timetol: " +timeTol[k]);
    849               writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
    850 
    851             }
    852 
    853           }else if (f2seq!=null && fseq!=null && f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState()))
    854           {
    855             notFineSeq[k]++;
    856             log.debug("Current timetol: " +timeTol[k]);
    857             log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
     886            fineSeq++;
     887
     888          }else{
     889
     890            fineTolSeq++;
     891
     892            writer.println("Current timetol: " +timeTol);
     893            writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
     894
    858895          }
    859 
    860         }
    861 
    862       }
    863     }
    864 */
     896       
     897        //sequences that should be matching
     898        }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){
     899         
     900          log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance);
     901          log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));
     902     
     903        }
     904      }
     905    }
     906
    865907
    866908
    867909    int count = 0;
    868     double[] fine = new double[timeTol.length];
    869     double[] fineTol = new double[timeTol.length];
    870     double[] fineTest = new double[timeTol.length];
    871    
    872910    int minNdx = 0;
    873    
    874    
    875  
     911
     912    //double timeTolTemp = 0;
     913
    876914    for(Feature f:allFeatures){
    877      
    878      
     915
     916
    879917      float fRT = f.getApexRetentionTimeInMinutes();
    880918
     
    884922        {
    885923          fRT = (float) func.value(f
    886             .getApexRetentionTimeInMinutes().doubleValue());
     924              .getApexRetentionTimeInMinutes().doubleValue());
    887925        }
    888926        catch (ArgumentOutsideDomainException e)
     
    890928      }
    891929
    892       while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol[timeTol.length-1]){
     930      while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol){
    893931        minNdx++;
    894932      }
    895      
     933
    896934      Feature bestMatch = null;
    897      
    898      
    899       boolean[] hit = new boolean[timeTol.length];
    900       boolean[] moreHits = new boolean[timeTol.length];
    901      
    902     //  log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes());
    903      
     935
     936      boolean hit = false;
     937   
     938
     939      //  log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes());
     940
    904941      for(Feature f2: allFeatures2.subList(minNdx,allFeatures2.size()-1))
    905942      {
    906943
    907 
    908         /*if(fRT-f2.getApexRetentionTimeInMinutes()>timeTol[timeTol.length-1]){
    909           continue;
    910         }*/
    911 
    912944        //slut på sökning
    913         if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol[timeTol.length-1]){
    914       //    log.debug("For RT: " +fRT +"Ending at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes());
     945        if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol){
    915946          break;
    916947        }
    917948
    918 
    919 
    920949        if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()))
    921950
    922951        {
    923 
    924           for (int k=0;k<timeTol.length;k++){
    925 
    926            
    927 
    928             if(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol[k])
    929             {
    930              
    931               fineTest[k]++;
    932              
    933               if(!hit[k]){
    934                 //there is a single hit
    935                 fine[k]++;
    936                 hit[k] = true;
    937                 bestMatch = f2;
    938                 continue;
    939               }else if(!moreHits[k]){
    940 
    941                 //there is more than one hit for this peptide sequence
    942                
    943                 fineTol[k]++;
    944               //  log.debug("There is a second hit: " +fineTol[k]);
    945                 //moreHits[k] = true;
    946               }
    947 
     952          if(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol)
     953          {
     954            if(!hit){
     955              //there is a single hit             
     956              hit = true;
     957              bestMatch = f2;
     958            }else{
    948959              //check if the second and further hits are better than the current bestMatch, will always contain the best match of all features, even split peaks
    949960              if(Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)>Math.abs(f2.getApexRetentionTimeInMinutes() - fRT))
    950961              {
    951962                log.debug("Best match is updated.");
    952                 log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT) + "new is: " +Math.abs(f2.getApexRetentionTimeInMinutes() - fRT));
     963                log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
    953964                bestMatch = f2;
     965                log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));
     966               
    954967              }
    955968
    956969            }
    957            
    958970
    959971          }
    960 
    961         }
    962        
    963       }
    964 
    965         //TODO: Same with sequence. Write back to feature table as well as hit table. May not be restricted at all to project in future.
    966       /*QueryFactory qf2 = new QueryFactory();
    967       ItemQuery<Feature> featureQuery2 = qf2.select(Feature.class);
    968       featureQuery2.restrict(Restrictions.eq(Hql.property("project"), Hql.entity(project)));*/
    969      
    970       /*ItemQuery query = ...
    971       List<Integer> selected = ...
    972 
    973       query.restrict(
    974         Restrictions.in(
    975           Hql.property("id"),
    976           Expressions.parameter("selectedItems", selected, Type.INT)
    977       ));
    978 */
    979      
    980      
     972        }
     973
     974      }
     975
     976
    981977      if(bestMatch!=null)
    982978      {
     979
     980
     981        //log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes());
    983982       
    984        
    985         log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes());
    986         count++;
    987        
    988         log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
     983
     984        //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    989985        //All features have clusterID:s, they are set at start of alignment -> all features that have a clusterID have undergone alignment
    990986        if(!f.getClusterId().equals(bestMatch.getClusterId())){
    991987          //log.debug("Setting new clusterID");
    992          
     988
    993989          bestMatchList.add(bestMatch);
    994990          fList.add(f);
    995991          bestMatchClusterIDList.add(bestMatch.getClusterId());
    996          
    997           //All features that have the same clusterID as bestMatch will have their clusterID changed to that of f
    998           //TODO: Restrict to project to start with - LATER REMOVE SO ALL FEATURES IN DATABASE WILL BE CHANGED
    999          
    1000           //long tempID = bestMatch.getClusterId();
    1001          
    1002           /*dc.reattachItem(bestMatch);
    1003           bestMatch.setClusterId(f.getClusterId());
    1004          
    1005           dc.commit(); 
    1006           dc = sc.newDbControl();
    1007           dc.reattachItem(f);*/
    1008          
    1009           /*
    1010           log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    1011          
    1012          
    1013           //feature should find itself in the database
    1014          
    1015           featureQuery2.restrict(Restrictions.eq(Hql.property("clusterId"), Expressions.aLong(f.getClusterId())));
    1016           List<Feature> clusterIDFeatures =  featureQuery2.list(dc);
    1017          
    1018           //size of clusterF is larger than 0 only if there exists features that have the same clusterID as f
    1019           log.debug("Size of clusterIdFeatures: " +clusterIDFeatures.size());
    1020           featureQuery2.reset();
    1021          
    1022           for (Feature clusterF: clusterIDFeatures){
    1023             dc.reattachItem(clusterF);
    1024             clusterF.setClusterId(f.getClusterId());
    1025            
    1026           }*/
    1027         }
    1028        
    1029        
    1030         //TODO: Propagate the sequence (within the project for now)-> if both have a sequence or both are null, do nothing (CHANGE LATER)
    1031         //Update hits table
    1032        
    1033         /*if (f.getPeptideSequence()!=null && bestMatch.getPeptideSequence()==null){ 
    1034           updateHitsList(f,bestMatch,localSampleId2, fractionId2,allFeatures,writer,dc,factory,project);
    1035         }else if(f.getPeptideSequence()==null && bestMatch.getPeptideSequence()!=null){
    1036           updateHitsList(bestMatch,f,localSampleId2,fractionId2,allFeatures2,writer,dc,factory,project);
    1037         }*/
    1038        
    1039        
    1040       }
    1041      
    1042 
    1043     }
     992
     993        }
     994
     995      }
     996
     997
     998    }
     999   
     1000    int hitsCounter = 0;
    10441001   
    10451002    if (!bestMatchClusterIDList.isEmpty()){
     
    10551012      log.debug("Size of fList: " +fList.size());
    10561013
    1057 
     1014      count=bestMatchList.size();
    10581015
    10591016
     
    10641021
    10651022        int featureNbr = bestMatchClusterIDList.indexOf(bestMatchClusterID);
    1066         log.debug("Featurenr: " +featureNbr);
     1023        //log.debug("Featurenr: " +featureNbr);
    10671024
    10681025        Feature f = fList.get(featureNbr);
     
    10711028        dc.reattachItem(bestMatch);
    10721029
    1073         log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
     1030        //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    10741031
    10751032        bestMatch.setClusterId(f.getClusterId());
    1076        
     1033
    10771034        //updating hits
    10781035        if (f.getPeptideSequence()!= null && bestMatch.getPeptideSequence()==null){
    1079           updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project);
    1080         }
    1081 
    1082         log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
     1036          //log.debug("Updating hit for sequence: " +f.getPeptideSequence());
     1037          hitsCounter+=updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project);
     1038        }else if(f.getPeptideSequence()== null && bestMatch.getPeptideSequence()!=null){
     1039          hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project);
     1040        }
     1041        //TODO: what to do if both have sequences?
     1042
     1043        //log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());
    10831044
    10841045      }
     
    10871048    }
    10881049    dc.commit();
     1050
     1051
     1052    writer.println();
     1053
     1054    writer.println("Number of features matched:" + count);
     1055    writer.println("Number of hits created:" + hitsCounter);
     1056    writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +fineSeq/features.size());
     1057    writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and  timeTol " +timeTol +": " +(1-fineTolSeq/features.size()));
     1058    writer.println();
     1059
     1060
     1061    return count;
     1062  }
     1063
     1064
     1065  private int updateHitsList(Feature f, Feature f2, String localSampleId2, String fractionId2, ArrayList<Feature> allFeaturesSeq, PrintWriter writer, DbControl dc, ItemFactory factory, Project project){
     1066    //create a hit in database for f2 (the feature whose sequence has been updated)
     1067
     1068    f2.setPeptideSequence(f.getPeptideSequence());
     1069    dc.reattachItem(f);
     1070    dc.reattachItem(f2);
    10891071   
     1072    int newHitCounter = 0;
    10901073   
    1091     writer.println();
    1092     //writer.println("Peptide sequences propagated:" + count);
    1093     writer.println("Alignment recall and precision based on sequences with mzTol " +tolerance +": ");
    1094     for (int k=0;k<timeTol.length;k++){
    1095       writer.println("For timeTol " +timeTol[k] +": " +fineSeq[k]/features.size() +" " +(1-fineTolSeq[k]/features.size()));
    1096     }
    1097     writer.println("Precision for all features with mzTol " +tolerance +": ");
    1098     for (int k=0;k<timeTol.length;k++){
    1099       writer.println("For timeTol " +timeTol[k] +": ");
    1100       writer.println("Number of total matched features: " +fineTest[k]);
    1101       writer.println("Number of matched features: " +fine[k] +" and recall value:  " +(fine[k]*2.0/(allFeatures.size()+allFeatures2.size())));
    1102       writer.println("Number of features matching more than one: " +fineTol[k] +" Precision vaule: " +(1-(fineTol[k]/(fine[k]+fineTol[k]))));
    1103     }
    1104     writer.println();
    1105    
    1106    
    1107     return count;
    1108   }
    1109 
    1110  
    1111   private void updateHitsList(Feature f, Feature f2, String localSampleId2, String fractionId2, ArrayList<Feature> allFeaturesSeq, PrintWriter writer, DbControl dc, ItemFactory factory, Project project){
    1112         //create a hit in database for f2 (the feature whose sequence has been updated)
    1113        
    1114           f2.setPeptideSequence(f.getPeptideSequence());
    1115           dc.reattachItem(f);
    1116           dc.reattachItem(f2);
    1117          
    1118             Hit h = null;
    1119             for (Hit h1 : f.getHits())
    1120             {
    1121               // Take the best hit matching the feature
    1122               if (h == null || h1.getCombinedFDR() != null && (h
    1123                   .getCombinedFDR() == null || (h1
    1124                       .getCombinedFDR() < h.getCombinedFDR())))
    1125               {
    1126                 h = h1;
    1127               }
    1128             }
     1074    Hit h = null;
     1075    for (Hit h1 : f.getHits())
     1076    {
     1077      // Take the best hit matching the feature
     1078      if (h == null || h1.getCombinedFDR() != null && (h
     1079          .getCombinedFDR() == null || (h1
     1080              .getCombinedFDR() < h.getCombinedFDR())))
     1081      {
     1082        h = h1;
     1083      }
     1084    }
    11291085
    11301086    /* * If there was no hit for the feature, then take
    11311087     * from another feature with the same peptide
    11321088     * sequence (ignoring delta of modifications).
    1133 */
    1134             if (h == null)
     1089     */
     1090    if (h == null)
     1091    {
     1092      String fseq = f.getPeptideSequence();
     1093      if (fseq.contains(" "))
     1094        fseq = fseq.substring(0, fseq
     1095            .lastIndexOf(' '));
     1096      for (Feature f3 : allFeaturesSeq)
     1097      {
     1098        if ((fseq.equals(f3.getPeptideSequence()) || (fseq
     1099            .contains(" ") && f3
     1100            .getPeptideSequence().startsWith(fseq))) && !f3
     1101            .getHits().isEmpty() && f.getChargeState().equals(f3.getChargeState()))
     1102        {
     1103          for (Hit h1 : f3.getHits())
     1104          {
     1105            if (h == null || h1
     1106                .getCombinedFDR() != null && (h
     1107                    .getCombinedFDR() == null || (h1
     1108                        .getCombinedFDR() < h
     1109                        .getCombinedFDR())))
    11351110            {
    1136               String fseq = f.getPeptideSequence();
    1137               if (fseq.contains(" "))
    1138                 fseq = fseq.substring(0, fseq
    1139                   .lastIndexOf(' '));
    1140               for (Feature f3 : allFeaturesSeq)
    1141               {
    1142                   if ((fseq.equals(f3.getPeptideSequence()) || (fseq
    1143                       .contains(" ") && f3
    1144                       .getPeptideSequence().startsWith(fseq))) && !f3
    1145                       .getHits().isEmpty() && f.getChargeState().equals(f3.getChargeState()))
    1146                   {
    1147                     for (Hit h1 : f3.getHits())
    1148                     {
    1149                       if (h == null || h1
    1150                           .getCombinedFDR() != null && (h
    1151                               .getCombinedFDR() == null || (h1
    1152                                   .getCombinedFDR() < h
    1153                                   .getCombinedFDR())))
    1154                       {
    1155                         h = h1;
    1156                         log
    1157                         .debug("Assigning hit from feature at RT " + f3
    1158                           .getApexRetentionTimeInMinutes() + " with sequence:" + f3
    1159                           .getPeptideSequence());
    1160                       }
    1161                     }
    1162                   }
    1163                
    1164               }
     1111              h = h1;
     1112              log
     1113              .debug("Assigning hit from feature at RT " + f3
     1114                  .getApexRetentionTimeInMinutes() + " with sequence:" + f3
     1115                  .getPeptideSequence());
    11651116            }
    1166             if (h != null && localSampleId2 != null)
    1167             {
    1168               Hit newHit = factory.create(Hit.class);
    1169               dc.saveItem(newHit);
    1170               // To allow the user to trace the identification:
    1171               newHit.setPeakListFile(h.getPeakListFile());
    1172               newHit.setIdentificationResultFile(h.getIdentificationResultFile());
    1173               newHit.setSpectrumStringId(h.getSpectrumStringId());
    1174               newHit.setSpectrumId(h.getSpectrumId());
    1175               newHit.setCombinedFDR(h.getCombinedFDR());
    1176               newHit.setDescription(h.getDescription());
    1177               newHit.setExternalId(h.getExternalId());
    1178               newHit.setCharge(h.getCharge());
    1179               // From the sample we're matching with:
    1180               newHit.setFractionId(fractionId2);
    1181               newHit.setLocalSampleId(localSampleId2);
    1182               newHit.setScoreType("Proteios aligned");
    1183               newHit.setFeature(f2);
    1184               newHit.setProject(project);
    1185               newHit.setPrecursorQuantity(f2
    1186                 .getIntegratedIntensity());
    1187               newHit.setRetentionTimeInMinutes(f2
    1188                 .getApexRetentionTimeInMinutes());
    1189               newHit.setPrimaryCombined(true);
    1190               newHit.setProtein(false);
    1191               newHit.setFirstInCombination(h);
    1192               writer.println("New hit generated:" + newHit
    1193                 .getDescription());
    1194             }
    1195             else
    1196             {
    1197               writer
    1198               .println("Feature: " + f2
    1199                 .getMassToChargeRatio() + " sequence " + f2
    1200                 .getPeptideSequence() + ". Local sample id: " + localSampleId2);
    1201               f2.setHits(f.getHits());
    1202             }
    1203            
    1204   }
    1205  
     1117          }
     1118        }
     1119
     1120      }
     1121    }
     1122    if (h != null && localSampleId2 != null)
     1123    {
     1124      Hit newHit = factory.create(Hit.class);
     1125      dc.saveItem(newHit);
     1126      // To allow the user to trace the identification:
     1127      newHit.setPeakListFile(h.getPeakListFile());
     1128      newHit.setIdentificationResultFile(h.getIdentificationResultFile());
     1129      newHit.setSpectrumStringId(h.getSpectrumStringId());
     1130      newHit.setSpectrumId(h.getSpectrumId());
     1131      newHit.setCombinedFDR(h.getCombinedFDR());
     1132      newHit.setDescription(h.getDescription());
     1133      newHit.setExternalId(h.getExternalId());
     1134      newHit.setCharge(h.getCharge());
     1135      // From the sample we're matching with:
     1136      newHit.setFractionId(fractionId2);
     1137      newHit.setLocalSampleId(localSampleId2);
     1138      newHit.setScoreType("Proteios aligned");
     1139      newHit.setFeature(f2);
     1140      newHit.setProject(project);
     1141      newHit.setPrecursorQuantity(f2
     1142          .getIntegratedIntensity());
     1143      newHit.setRetentionTimeInMinutes(f2
     1144          .getApexRetentionTimeInMinutes());
     1145      newHit.setPrimaryCombined(true);
     1146      newHit.setProtein(false);
     1147      newHit.setFirstInCombination(h);
     1148      log.debug("New hit generated:" + newHit
     1149          .getDescription());
     1150      newHitCounter++;
     1151    }
     1152    else
     1153    {
     1154      log.debug("Feature: " + f2
     1155          .getMassToChargeRatio() + " sequence " + f2
     1156          .getPeptideSequence() + ". Local sample id: " + localSampleId2);
     1157      f2.setHits(f.getHits());
     1158    }
     1159   
     1160    return newHitCounter;
     1161
     1162  }
     1163
    12061164  private PolynomialSplineFunction estimateSplineFunc(ArrayList<Point> points, PrintWriter writer)
    12071165  {
     
    12161174      double[] y = new double[points.size()];
    12171175      double[] weights = new double[points.size()];
    1218      
     1176
    12191177
    12201178      int index = 0;
     
    12271185        y[index]=tempPoint.getY();
    12281186        weights[index]=tempPoint.getWeight();
    1229        
     1187
    12301188        index++;
    12311189      }
    1232      
     1190
    12331191      try
    12341192      {
     
    12451203  }
    12461204
    1247  
     1205
    12481206  private void setMzDistribution(List<Feature> features, double[][] mzDist, double[] meanLength, int fileNbr){
    1249    
     1207
    12501208    //TODO: get the minMz, maxMz, rtMax values automatically
    1251    
     1209
    12521210    int minMz=400;
    12531211    int maxMz=2000;
    1254    
     1212
    12551213    int mult = 100;
    1256    
     1214
    12571215    mzDist[fileNbr] = new double[(mult)*(maxMz-minMz)];
    1258    
     1216
    12591217    double sum = 0;
    12601218    double nbrSeq = 0;
    1261    
     1219
    12621220    //Double min = Double.POSITIVE_INFINITY;
    12631221    //Double max = Double.NEGATIVE_INFINITY;
    1264    
     1222
    12651223    //int minRT = Integer.MAX_VALUE;
    12661224    //int maxRT = Integer.MIN_VALUE;
    1267    
     1225
    12681226    for(Feature f: features)
    12691227    {
    12701228      int mz = (int) (Math.round((f.getMassToChargeRatio()-minMz)*mult));
    1271      
    1272      
     1229
     1230
    12731231      mzDist[fileNbr][mz]++;
    1274      
     1232
    12751233      if(f.getPeptideSequence()!=null)
    12761234      {
    12771235        nbrSeq++;
    12781236        sum+=f.getEndRetentionTimeInMinutes().doubleValue()-f.getStartRetentionTimeInMinutes().doubleValue();
    1279        
    1280       }
    1281    
    1282     }
    1283    
     1237
     1238      }
     1239
     1240    }
     1241
    12841242    meanLength[fileNbr] = sum/nbrSeq;
    1285    
    1286   }
    1287  
     1243
     1244  }
     1245
    12881246  private class SeqComparator implements Comparator<Feature>{
    12891247
     
    12951253        return f.getChargeState().compareTo(f2.getChargeState());
    12961254      }else{
    1297        
     1255
    12981256        return f.getPeptideSequence().compareTo(f2.getPeptideSequence());
    12991257      }
    1300      
    1301     }
    1302    
    1303   }
    1304  
     1258
     1259    }
     1260
     1261  }
     1262
    13051263  private class FeatureSorter implements Comparator<Feature>{
    13061264
     
    13101268    {
    13111269      if (f.getPeptideSequence().equals(f2.getPeptideSequence()) && f.getChargeState().equals(f2.getChargeState())){
    1312        
     1270
    13131271        return f.getApexRetentionTimeInMinutes().compareTo(f2.getApexRetentionTimeInMinutes());
    1314        
     1272
    13151273      }else if(f.getPeptideSequence().equals(f2.getPeptideSequence())){
    1316        
     1274
    13171275        return f.getChargeState().compareTo(f2.getChargeState());
    1318        
     1276
    13191277      }else{
    1320        
     1278
    13211279        return f.getPeptideSequence().compareTo(f2.getPeptideSequence());
    1322        
    1323       }
    1324    
    1325      
    1326      
     1280
     1281      }
     1282
     1283
     1284
    13271285
    13281286    }
     
    13411299
    13421300  }
    1343  
     1301
    13441302  private class MzDiffComparator implements Comparator<Point>{
    13451303
     
    13531311
    13541312  }
    1355  
     1313
    13561314  private class ApexIntensityComparator implements Comparator<Feature>{
    13571315
     
    13651323
    13661324  }
    1367  
     1325
    13681326  private class IntensityComparator implements Comparator<Feature>{
    13691327
     
    13771335
    13781336  }
    1379  
    1380  
     1337
     1338
    13811339  private class RTComparator implements Comparator<Point>{
    13821340
     
    13901348
    13911349  }
    1392  
     1350
    13931351  private class RTComparatorF implements Comparator<Feature>{
    13941352
     
    14291387
    14301388    }
    1431    
    1432    
     1389
     1390
    14331391    public Feature getXFeature()
    14341392    {
    14351393      return this.x;
    14361394    }
    1437    
     1395
    14381396    public Feature getYFeature()
    14391397    {
    14401398      return this.y;
    14411399    }
    1442    
     1400
    14431401    public double getXLength()
    14441402    {
     
    14601418      return this.y.getApexRetentionTimeInMinutes().doubleValue();
    14611419    }
    1462    
     1420
    14631421    public double getMzX()
    14641422    {
     
    14861444      return Math.abs(this.getX()-this.getY());
    14871445    }
    1488    
     1446
    14891447    public double getMzDiff()
    14901448    {
     
    15251483    private int file2 = 0;
    15261484    private ArrayList<Point> evalPoint = new ArrayList<Point>();
    1527    
     1485
    15281486    public FilePoint()
    15291487    {}
    1530    
     1488
    15311489    public FilePoint(int fileNbr, int fileNbr2, double simValue, PolynomialSplineFunction func, ArrayList<Point> evalPoint)
    15321490    {
     
    15581516      return this.func;
    15591517    }
    1560    
     1518
    15611519    public ArrayList<Point> getEvalList() {
    1562      
     1520
    15631521      return this.evalPoint;
    15641522    }
Note: See TracChangeset for help on using the changeset viewer.