Changeset 4289
- Timestamp:
- Feb 16, 2012, 2:49:23 PM (12 years ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/api/core/conf/common-queries.xml
r4281 r4289 264 264 </description> 265 265 </query> 266 267 <query id="GET_UNIQUE_SEQUENCES_IN_MSFILE_FOR_PROJECT" type="HQL">268 <sql>269 SELECT DISTINCT f.peptideSequence270 FROM FeatureData f271 WHERE f.project = :project272 AND f.msFile = :msFile273 AND NOT f.peptideSequence IS NULL274 AND f.chargeState IS 2275 ORDER by f.apexRetentionTimeInMinutes276 </sql>277 <description>278 Load all distinct (unique) sequences for a ms file in the Feature table for a project279 </description>280 </query>281 282 266 283 267 <query id="GET_IDS_FOR_SEPARATION_METHODS_WITH_SEPARATION_EVENTS" type="HQL"> -
trunk/api/core/src/org/proteios/core/Feature.java
r4280 r4289 21 21 public static final Item TYPE = Item.PROTEIOS_FEATURE; 22 22 23 24 /*@SuppressWarnings("unchecked")25 public static List<String> getAllClusterIDs(DbControl dc)26 {27 org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc28 .getHibernateSession(),29 "GET_ALL_FEATURES_WITH_SPECIFIC_CLUSTERID");30 List<String> retval = query.list();31 return retval;32 }*/33 34 23 35 24 @SuppressWarnings("unchecked") … … 50 39 51 40 52 53 54 @SuppressWarnings("unchecked")55 public static List<String> getUniqueFeatureSequences(Project project, DbControl dc, File msFile)56 {57 org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc58 .getHibernateSession(),59 "GET_UNIQUE_SEQUENCES_IN_MSFILE_FOR_PROJECT");60 query.setEntity("project", project.getData());61 query.setEntity("msFile", msFile.getData());62 List<String> retval = query.list();63 return retval;64 }65 66 @SuppressWarnings("unchecked")67 public static List<String> getUniqueFeatureSequnces(Project project, DbControl dc, File featureFile)68 {69 org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc70 .getHibernateSession(),71 "GET_UNIQUE_SEQUNCES_IN_FEATURE_FILE_FOR_PROJECT");72 query.setEntity("project", project.getData());73 query.setEntity("featureFile", featureFile.getData());74 List<String> retval = query.list();75 return retval;76 }77 78 79 41 /* 80 42 * From the Identifiable interface -
trunk/plugin/src/org/proteios/plugins/FeatureSequencePropagator.java
r4288 r4289 123 123 Timestamp tStmp = new Timestamp(new Date().getTime()); 124 124 long initialClusterID = tStmp.getTime(); 125 126 double tolerance = 0.01; 127 double timeTol = 0.5; 125 126 //DEFAULT VALUES! Used if no sequences in the project at all! 127 double defaultTolerance = 0.01; 128 double defaultTimeTol = 0.5; 129 130 double tolerance = 0; 131 double timeTol = 0; 128 132 int count = 0; 129 133 // dbControl = object used for communicating with database … … 133 137 DbControl dcFile = sc.newDbControl(); 134 138 // Retrieve the job parameters 135 tolerance = ((Float) job.getValue("tolerance")).doubleValue();136 timeTol = ((Float) job.getValue("timeTol")).doubleValue();139 //tolerance = ((Float) job.getValue("tolerance")).doubleValue(); 140 //timeTol = ((Float) job.getValue("timeTol")).doubleValue(); 137 141 boolean matchBetweenFractions = (Boolean) job 138 142 .getValue("matchBetweenFractions"); … … 147 151 //Annotator annotator = new Annotator(factory); 148 152 //AnnotationType fractionAnnotationType = annotator 149 153 // .getAnnotationType("fraction", Type.STRING, File.TYPE); 150 154 //creates an instance of the template class 151 155 File outCoreFile = factory.create(File.class); … … 172 176 writer.println("Propagation report for project:" + project 173 177 .getName()); 174 writer.println(" m/z tolerance:" + tolerance);175 writer.println(" RT (minutes) tolerance:" + timeTol);178 writer.println("Default m/z tolerance if no common pair-wise sequences in project: " + defaultTolerance); 179 writer.println("Default RT (minutes) tolerance if no common pair-wise sequences in project: " + defaultTimeTol); 176 180 writer 177 181 .println("Matching between fraction:" + matchBetweenFractions); 182 178 183 //get unique ms files for the project 179 184 List<File> uniqueMsFiles = Feature.getUniqueMsFiles(project, dc); 180 writer.println("Number of files with MS features:" + uniqueMsFiles.size() + "\n"); 181 log.debug("Number of files with MS features:" + uniqueMsFiles.size() + "\n"); 185 int nbrOfFiles = uniqueMsFiles.size(); 186 writer.println("Number of files with MS features:" + nbrOfFiles + "\n"); 187 log.debug("Number of files with MS features:" + nbrOfFiles + "\n"); 182 188 183 189 //query for the class 184 190 ItemQuery<Feature> featureQuery = qf.select(Feature.class); 191 185 192 // Only features in project should be retrieved 186 193 // OBS! has hql object as input … … 188 195 .property("project"), Hql.entity(project))); 189 196 190 int nbrOfFiles = uniqueMsFiles.size(); 191 //int totNbrFiles = 2*nbrOfFiles-2; 192 //int totMatchEvents = nbrOfFiles - 1; 193 //int currMatchEvents = 0; 197 194 198 195 199 FilePoint[] simList = new FilePoint[nbrOfFiles*(nbrOfFiles-1)/2]; … … 197 201 198 202 ArrayList<TreeSet<Feature>> featureList = new ArrayList<TreeSet<Feature>>(nbrOfFiles); 199 ArrayList<ArrayList<Feature>> totFeatureList = new ArrayList<ArrayList<Feature>>(nbrOfFiles);200 203 201 204 // double[][] mzDist = new double[nbrOfFiles][]; … … 206 209 Arrays.fill(hasSequence,true); 207 210 208 //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences.209 210 211 //Random randGen = new Random(); 211 212 212 213 double[] originalFeatWithSeq = new double[nbrOfFiles]; 213 214 … … 216 217 File currentMsFile = uniqueMsFiles.get(fileNbr); 217 218 218 List<String> currSeq = Feature.getUniqueFeatureSequences(project, dc, currentMsFile); 219 writer.println("Number of unique sequences in file:" + fileNbr + " " + currSeq.size()); 220 221 if (currSeq==null || currSeq.isEmpty()) 222 { 223 hasSequence[fileNbr]=false; 219 220 //Clearing clusterID for current project 221 featureQuery.restrict(Restrictions.eq(Hql 222 .property("msFile"), Hql.entity(currentMsFile))); 223 featureQuery.restrict(Restrictions.neq(Hql 224 .property("clusterId"), null)); 225 226 227 List<Feature> allFeaturesClusterID = featureQuery.list(dc); 228 featureQuery.reset(); 229 230 //Clearing the clusterID:s in current project 231 for (Feature clusterF: allFeaturesClusterID){ 232 if (progress != null) 233 { 234 progress.display((fileNbr+1)/nbrOfFiles, 235 "Clearing clusterID in project. Processing file nr " + (fileNbr+1)); 236 } 237 238 239 clusterF.setClusterId(null); 240 dc.reattachItem(clusterF); 241 224 242 } 225 //seqList.add(currSeq); 243 dc.commit(); 244 dc = sc.newDbControl(); 226 245 227 246 // Retrieve features for current ms file … … 234 253 235 254 List<Feature> allFeatures = featureQuery.list(dc); 236 237 //TODO: Setting clusterID, should be done together with distribution calculation238 for (Feature clusterF: allFeatures){239 //if (clusterF.getClusterId()==null){240 241 clusterF.setClusterId(initialClusterID++);242 dc.reattachItem(clusterF);243 //}244 }245 dc.commit();246 247 dc = sc.newDbControl();248 255 allFeatures = featureQuery.list(dc); 249 250 totFeatureList.add(new ArrayList<Feature>());251 totFeatureList.get(fileNbr).addAll(allFeatures);252 253 writer.println("Number of all features in file:" + fileNbr + " " + allFeatures.size());254 256 255 257 … … 277 279 featureList.add(new TreeSet<Feature>(new SeqComparator())); 278 280 featureList.get(fileNbr).addAll(features); 279 280 281 /*for (Feature f:featureList.get(fileNbr)) 282 { 283 log.debug(f.getPeptideSequence()+ " "+f.getApexRetentionTimeInMinutes()+ " " +f.getChargeState()); 284 }*/ 285 286 281 282 287 283 originalFeatWithSeq[fileNbr]=features.size(); 288 284 289 285 writer.println("File number " +fileNbr + " is " + currentMsFile); 290 286 writer.println("Number of all features in file " + fileNbr + ": " + allFeatures.size()); … … 299 295 log.debug("Number of unique features with sequences in file " + fileNbr + ": " + featureList.get(fileNbr).size()); 300 296 301 302 303 297 //String frac1 = getFirstAnnotationValue(annotator, 304 298 //fractionAnnotationType, currentMsFile); 305 299 } 300 301 306 302 307 303 writer.println("==============================="); … … 353 349 int indexKey = (secFileNbr+1)*(secFileNbr-2)/2+fileNbr+1; 354 350 355 356 357 // writer.println("The index key :" + indexKey);358 //359 //if (simValues[indexKey]<0)360 //{361 362 351 ArrayList<Point> points = getPointMatchBySeq(features, features2); 352 ArrayList<Point> evalPoints = new ArrayList<Point>(); 353 double simValue = 0; 354 PolynomialSplineFunction func = null; 363 355 364 356 //double percentageForRegression = 10; … … 370 362 if(totNbrMatches==0){ 371 363 372 writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1)); 373 writer.println("These files will not be aligned."); 374 continue; 364 writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1) +": " +totNbrMatches); 365 writer.println("Alignment will be performed without retention time correction."); 366 367 }else{ 368 369 370 Collections.sort(points,new DiffComparator()); 371 372 double lowerQuartile = points.get((int) Math.round(0.25*points.size())).getDiff(); 373 double upperQuartile = points.get((int) Math.round(0.75*points.size())).getDiff(); 374 double medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff(); 375 double interQuartileRange = upperQuartile-lowerQuartile; 376 double upperFence = upperQuartile + 3*interQuartileRange; 377 378 writer.println("Median RT difference before alignment: " +medianQuartile); 379 writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff()); 380 381 int maxCutOff = points.size()-1; 382 while(points.get(maxCutOff).getDiff()>upperFence) 383 { 384 points.remove(maxCutOff); 385 maxCutOff--; 386 } 387 388 389 writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size()); 390 391 medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff(); 392 393 writer.println("Median RT difference is now: " +medianQuartile); 394 writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff()); 395 396 Collections.sort(points,new MzDiffComparator()); 397 double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff(); 398 writer.println("Median mz difference before alignment: " +medianQuartileMz); 399 writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff()); 400 401 tolerance=points.get(points.size()-1).getMzDiff(); 402 timeTol=medianQuartile; 403 404 405 406 407 408 //no duplicates in x-values 409 Collections.sort(points, new RTComparator()); 410 411 log.debug("Points size: " +points.size()); 412 413 414 //TODO: bortkommentera när mzDist ska användas som likhetsvärde 415 simValue = points.size()*2.0/(features2.size()+features.size()); 416 writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue); 417 418 /* 419 * Weight order 420 Collections.sort(points, new WeightComparator()); 421 422 int halfLength = (int) Math.ceil(points.size()/2); 423 424 evalPoints.addAll(points.subList(0,halfLength-1)); 425 426 points.subList(0,halfLength).clear(); 427 428 Collections.sort(points, new RTComparator()); 429 Collections.sort(evalPoints, new RTComparator());*/ 430 431 432 //Partitioning matching sequences in alignment set and evaluation set 433 int ndx = 0; 434 while(ndx<points.size()) 435 { 436 //TODO: avkommentera när random ska användas 437 //features.remove(randGen.nextInt(features.size())); 438 439 evalPoints.add(points.remove(ndx)); 440 ndx++; 441 442 } 443 444 log.debug("Points size: " +points.size()); 445 log.debug("Eval size: " +evalPoints.size()); 446 447 ArrayList<Point> tempPoints = new ArrayList<Point>(); 448 tempPoints = evalPoints; 449 evalPoints = points; 450 points = tempPoints; 451 452 func = estimateSplineFunc(points, writer); 453 if (func==null){ 454 writer.println("Adding evaluation points to spline function estimation. No precision and recall will be computed for this pair-wise alignment."); 455 points.addAll(evalPoints); 456 Collections.sort(points, new RTComparator()); 457 evalPoints.clear(); 458 func = estimateSplineFunc(points, writer); 459 if (func==null){ 460 writer.println("Too few points for alignment."); 461 continue; 462 } 463 464 } 375 465 } 376 377 378 Collections.sort(points,new DiffComparator()); 379 380 double lowerQuartile = points.get((int) Math.round(0.25*points.size())).getDiff(); 381 double upperQuartile = points.get((int) Math.round(0.75*points.size())).getDiff(); 382 double medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff(); 383 double interQuartileRange = upperQuartile-lowerQuartile; 384 double upperFence = upperQuartile + 3*interQuartileRange; 385 386 writer.println("Median RT difference before alignment: " +medianQuartile); 387 writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff()); 388 389 int maxCutOff = points.size()-1; 390 while(points.get(maxCutOff).getDiff()>upperFence && points.get(maxCutOff).getDiff()>timeTol) 391 { 392 points.remove(maxCutOff); 393 maxCutOff--; 394 } 395 396 397 writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size()); 398 399 medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff(); 400 401 writer.println("Median RT difference is now: " +medianQuartile); 402 writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff()); 403 404 Collections.sort(points,new MzDiffComparator()); 405 double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff(); 406 writer.println("Median mz difference before alignment: " +medianQuartileMz); 407 writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff()); 408 409 tolerance=points.get(points.size()-1).getMzDiff(); 410 timeTol=medianQuartile; 411 412 413 ArrayList<Point> evalPoints = new ArrayList<Point>(); 414 415 416 //no duplicates in x-values 417 Collections.sort(points, new RTComparator()); 418 419 log.debug("Points size: " +points.size()); 420 421 422 //TODO: bortkommentera när mzDist ska användas som likhetsvärde 423 double simValue = points.size()*2.0/(features2.size()+features.size()); 424 writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue); 425 426 /* 427 Collections.sort(points, new WeightComparator()); 428 429 int halfLength = (int) Math.ceil(points.size()/2); 430 431 evalPoints.addAll(points.subList(0,halfLength-1)); 432 433 points.subList(0,halfLength).clear(); 434 435 Collections.sort(points, new RTComparator()); 436 Collections.sort(evalPoints, new RTComparator());*/ 437 438 int ndx = 0; 439 440 while(ndx<points.size()) 441 { 442 //TODO: avkommentera när random ska användas 443 //features.remove(randGen.nextInt(features.size())); 444 445 evalPoints.add(points.remove(ndx)); 446 ndx++; 447 448 } 449 450 log.debug("Points size: " +points.size()); 451 log.debug("Eval size: " +evalPoints.size()); 452 453 ArrayList<Point> tempPoints = new ArrayList<Point>(); 454 tempPoints = evalPoints; 455 evalPoints = points; 456 points = tempPoints; 457 458 459 //} 460 461 462 // writer.println("Similarity:" + simValues[indexKey]); 463 464 465 PolynomialSplineFunction func = estimateSplineFunc(points, writer); 466 simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue,func,evalPoints); 466 simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue, tolerance, timeTol, func,evalPoints); 467 467 468 468 writer.println(); … … 471 471 } 472 472 //writer.println("Best match is :" + (firstMatch+1) +" and " +(secMatch+1)+ "\n"); 473 474 475 Arrays.sort(simList,Collections.reverseOrder(new SimComparator())); 476 477 // Propagate and get count 473 478 479 double noSeqMzTol = 0; 480 double noSeqRtTol = 0; 474 481 475 Arrays.sort(simList,Collections.reverseOrder(new SimComparator())); 476 477 // Propagate and get count 478 479 countNbrMatches=0; 480 for(FilePoint f:simList) 482 for(int i=0; i<simList.length; i++) 481 483 { 482 if (f.getSimValue()>0){ 483 countNbrMatches++;484 484 485 FilePoint f = simList[i]; 486 485 487 if (progress != null) 486 488 { 487 489 progress.display((100 *countNbrMatches/simList.length), 488 "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " + countNbrMatches +" of " +simList.length);490 "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +(i+1) +" of " +simList.length +". Similarity score: " +f.getSimValue()); 489 491 } 490 492 491 493 //there are no common features at all, setting the mzTol and rtTol to the one with the least similar files 494 if (f.getMzTol()==0 && f.getRtTol()==0){ 495 496 497 if (noSeqMzTol == 0 && noSeqRtTol==0){ 498 499 if (i-1>=0){ 500 501 noSeqMzTol = simList[i-1].getMzTol(); 502 noSeqRtTol = simList[i-1].getRtTol(); 503 504 writer.println("==============================="); 505 writer.println("There are no common sequences for these files. Alignment will be performed with mz and rt tolerance for least similar files."); 506 507 }else{ 508 509 writer.println("==============================="); 510 writer.println("There are no sequences in this project. Alignment will be performed with default values for mz and rt tolerance."); 511 512 noSeqMzTol = defaultTolerance; 513 noSeqRtTol = defaultTimeTol; 514 515 } 516 517 }else{ 518 f.setMzTol(noSeqMzTol); 519 f.setRtTol(noSeqRtTol); 520 521 } 522 523 } 524 492 525 493 526 //TODO: här ska allFeatures skickas in för ytterligare utvärdering 494 count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles ,tolerance,timeTol);495 496 //TODO: lägg in hur många unika sekvenser filen har EFTER matchning 497 } 498 } 499 500 527 count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles); 528 529 530 531 } 532 533 501 534 //writerDat.close(); 502 535 // Commit to database 503 536 dc.commit(); 504 505 537 538 506 539 writer.println("==============================="); 507 540 508 541 featureQuery.reset(); 509 542 dc = sc.newDbControl(); … … 517 550 featureQuery.restrict(Restrictions.gteq(Hql 518 551 .property("chargeState"),Expressions.integer(2))); 519 552 520 553 writer.println("Number of features with sequences in file " +currentMsFile +" after alignment: " +featureQuery.list(dc).size()); 521 554 writer.println("Total increase in sequence coverage for file " +currentMsFile + ": " +(featureQuery.list(dc).size()-originalFeatWithSeq[fileNbr])/originalFeatWithSeq[fileNbr]); … … 523 556 } 524 557 dc.close(); 525 558 526 559 writer.println("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count); 527 560 response.setDone("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count); 528 561 529 562 // Close log file 530 563 writer.close(); … … 624 657 log.debug("Point replaced."); 625 658 } 626 //TODO: lägg in hits sorteringen..???? 659 627 660 } 628 661 … … 746 779 */ 747 780 private int matchFeatureLists(FilePoint fp, ItemFactory factory, 748 Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles , double tolerance, double timeTol)781 Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles) 749 782 { 750 783 784 int hitsCounter = 0; 785 //timestamp for clusterID 786 Timestamp tStmp = new Timestamp(new Date().getTime()); 787 long initialClusterID = tStmp.getTime(); 751 788 752 789 DbControl dc = sc.newDbControl(); 753 790 754 ArrayList<Point> features= fp.getEvalList();791 ArrayList<Point> evalList = fp.getEvalList(); 755 792 PolynomialSplineFunction func = fp.getSplineFunction(); 793 double tolerance = fp.getMzTol(); 794 double timeTol = fp.getRtTol(); 756 795 757 796 featureQuery.restrict(Restrictions.eq(Hql … … 782 821 ArrayList<Feature> allFeatures2 = new ArrayList<Feature>(tempAllFeatures2); 783 822 featureQuery.reset(); 784 823 785 824 featureQuery.restrict(Restrictions.eq(Hql 786 825 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile())))); … … 790 829 featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null)); 791 830 List<Feature> tempAllFeaturesSeq2 = featureQuery.list(dc); 792 ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq );831 ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq2); 793 832 featureQuery.reset(); 833 794 834 795 835 ArrayList<Feature> bestMatchList = new ArrayList<Feature>(); … … 803 843 804 844 writer.println("==============================="); 805 writer.println("Evaluating match with " + features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());845 writer.println("Evaluating match with " +evalList.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue()); 806 846 writer.println(); 807 847 808 848 809 log.debug("Evaluating match with " + features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue());810 811 849 log.debug("Evaluating match with " +evalList.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue()); 850 851 812 852 String localSampleId = null; 813 853 String fractionId = null; … … 854 894 double fineTolSeq = 0; 855 895 856 for (Point p : features) 857 { 858 Feature f = p.getXFeature(); 859 String fseq = f.getPeptideSequence(); 896 if (!evalList.isEmpty()){ 897 898 for (Point p : evalList) 899 { 900 Feature f = p.getXFeature(); 901 String fseq = f.getPeptideSequence(); 902 903 float fRT = f.getApexRetentionTimeInMinutes(); 904 905 if (func != null) 906 { 907 try 908 { 909 fRT = (float) func.value(f 910 .getApexRetentionTimeInMinutes().doubleValue()); 911 } 912 catch (ArgumentOutsideDomainException e) 913 {} 914 } 915 916 for(Point p2: evalList) 917 { 918 Feature f2 = p2.getYFeature(); 919 String f2seq = f2.getPeptideSequence(); 920 921 if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol)) 922 { 923 if (f2seq.equals(fseq)) 924 { 925 fineSeq++; 926 927 }else{ 928 929 fineTolSeq++; 930 931 writer.println("Current timetol: " +timeTol); 932 writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 933 934 } 935 936 //sequences that match but are not within tolerance limits => conflict 937 }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){ 938 939 log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance); 940 log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 941 942 } 943 } 944 } 945 } 946 947 948 949 int count = 0; 950 int minNdx = 0; 951 952 //double timeTolTemp = 0; 953 Feature bestMatchPrev = null; 954 for(Feature f:allFeatures){ 955 860 956 861 957 float fRT = f.getApexRetentionTimeInMinutes(); … … 872 968 } 873 969 874 for(Point p2: features)875 {876 Feature f2 = p2.getYFeature();877 String f2seq = f2.getPeptideSequence();878 // log.debug("Feature to match RT:" + f.getApexRetentionTimeInMinutes() + "after shift:" + fRT + " diff:" + (fRT - f.getApexRetentionTimeInMinutes()));879 //sequence evaluation for recall and tol evaluation for precision880 if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol))881 {882 883 //fseq och f2seq ska inte kunna vara null884 if (f2seq.equals(fseq))885 {886 fineSeq++;887 888 }else{889 890 fineTolSeq++;891 892 writer.println("Current timetol: " +timeTol);893 writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));894 895 }896 897 //sequences that should be matching898 }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){899 900 log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance);901 log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()))));902 903 }904 }905 }906 907 908 909 int count = 0;910 int minNdx = 0;911 912 //double timeTolTemp = 0;913 914 for(Feature f:allFeatures){915 916 917 float fRT = f.getApexRetentionTimeInMinutes();918 919 if (func != null)920 {921 try922 {923 fRT = (float) func.value(f924 .getApexRetentionTimeInMinutes().doubleValue());925 }926 catch (ArgumentOutsideDomainException e)927 {}928 }929 930 970 while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol){ 931 971 minNdx++; … … 935 975 936 976 boolean hit = false; 937 977 938 978 939 979 // log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()); … … 942 982 { 943 983 944 // slut på sökning984 //end of search 945 985 if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol){ 946 986 break; … … 960 1000 if(Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)>Math.abs(f2.getApexRetentionTimeInMinutes() - fRT)) 961 1001 { 962 log.debug("Best match is updated.");963 log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT));964 1002 bestMatch = f2; 965 log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)); 966 1003 //log.debug("Best match is updated."); 1004 //log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)); 1005 1006 //log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)); 1007 967 1008 } 968 1009 … … 975 1016 976 1017 977 if(bestMatch!=null) 978 { 979 980 981 //log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes()); 982 983 984 //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 985 //All features have clusterID:s, they are set at start of alignment -> all features that have a clusterID have undergone alignment 986 if(!f.getClusterId().equals(bestMatch.getClusterId())){ 987 //log.debug("Setting new clusterID"); 988 989 bestMatchList.add(bestMatch); 990 fList.add(f); 991 bestMatchClusterIDList.add(bestMatch.getClusterId()); 992 993 } 994 995 } 996 997 998 } 999 1000 int hitsCounter = 0; 1001 1018 if(bestMatch!=null){ 1019 1020 if(f.getClusterId()!=null && bestMatch.getClusterId()!=null || f.getClusterId()==null && bestMatch.getClusterId()!=null){ 1021 if (!bestMatch.getClusterId().equals(f.getClusterId())){ 1022 //Feature pair have undergone matching before and are not in same cluster => add to bestMatchClusterIDList 1023 bestMatchList.add(bestMatch); 1024 fList.add(f); 1025 bestMatchClusterIDList.add(bestMatch.getClusterId()); 1026 } 1027 }else{ 1028 1029 //single match 1030 1031 count++; 1032 1033 if(f.getClusterId()==null && bestMatch.getClusterId()==null){ 1034 //Feature-pair has not undergone alignment before, no need to add to bestMatchClusterIDList 1035 long clusterMatchID=initialClusterID++; 1036 bestMatch.setClusterId(clusterMatchID); 1037 f.setClusterId(clusterMatchID); 1038 1039 }else if (f.getClusterId()!=null && bestMatch.getClusterId()==null){ 1040 //only f has undergone alignment before but not bestMatch, no need to add to bestMatchClusterIDList 1041 bestMatch.setClusterId(f.getClusterId()); 1042 } 1043 1044 dc.reattachItem(f); 1045 dc.reattachItem(bestMatch); 1046 1047 //Updating hits list for single match 1048 if (f.getPeptideSequence()!= null && bestMatch.getPeptideSequence()==null){ 1049 hitsCounter+=updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project); 1050 }else if(f.getPeptideSequence()== null && bestMatch.getPeptideSequence()!=null){ 1051 hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project); 1052 } 1053 1054 1055 } 1056 1057 if (bestMatchPrev!=null && bestMatchPrev.getMassToChargeRatio()==bestMatch.getMassToChargeRatio() && Math.abs(bestMatchPrev.getApexRetentionTimeInMinutes()-bestMatch.getApexRetentionTimeInMinutes())<0.00001){ 1058 log.debug("Previous bestMatch equals new bestMatch."); 1059 1060 } 1061 bestMatchPrev=bestMatch; 1062 1063 } 1064 1065 1066 1067 1068 1069 } 1070 1071 1002 1072 if (!bestMatchClusterIDList.isEmpty()){ 1003 1073 … … 1012 1082 log.debug("Size of fList: " +fList.size()); 1013 1083 1014 count =bestMatchList.size();1084 count+=bestMatchList.size(); 1015 1085 1016 1086 … … 1025 1095 Feature f = fList.get(featureNbr); 1026 1096 1097 //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1098 if (f.getClusterId()!=null){ 1099 bestMatch.setClusterId(f.getClusterId()); 1100 }else{ 1101 f.setClusterId(bestMatch.getClusterId()); 1102 } 1103 1027 1104 dc.reattachItem(f); 1028 1105 dc.reattachItem(bestMatch); 1029 1030 //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());1031 1032 bestMatch.setClusterId(f.getClusterId());1033 1106 1034 1107 //updating hits … … 1039 1112 hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project); 1040 1113 } 1041 //TODO: what to do if both have sequences? 1042 1043 //log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1114 //TODO: what to do if both have sequences??? CONFILCT resolution needed! 1044 1115 1045 1116 } … … 1054 1125 writer.println("Number of features matched:" + count); 1055 1126 writer.println("Number of hits created:" + hitsCounter); 1056 writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +fineSeq/ features.size());1057 writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +(1-fineTolSeq/ features.size()));1127 writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +fineSeq/evalList.size()); 1128 writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +(1-fineTolSeq/evalList.size())); 1058 1129 writer.println(); 1059 1130 … … 1069 1140 dc.reattachItem(f); 1070 1141 dc.reattachItem(f2); 1071 1142 1072 1143 int newHitCounter = 0; 1073 1144 1074 1145 Hit h = null; 1075 1146 for (Hit h1 : f.getHits()) … … 1095 1166 .lastIndexOf(' ')); 1096 1167 for (Feature f3 : allFeaturesSeq) 1097 { 1168 { 1169 1098 1170 if ((fseq.equals(f3.getPeptideSequence()) || (fseq 1099 1171 .contains(" ") && f3 … … 1157 1229 f2.setHits(f.getHits()); 1158 1230 } 1159 1231 1160 1232 return newHitCounter; 1161 1233 … … 1196 1268 catch (MathException e1) 1197 1269 { 1198 throw new BaseException(e1.getMessage());1270 writer.println("Too few points to perform spline interpolation: " +points.size()); 1199 1271 } 1200 1272 } … … 1478 1550 private class FilePoint{ 1479 1551 1480 private double simValue = 0.0; 1552 private double simValue = 0; 1553 private double mzTol = 0; 1554 private double rtTol = 0; 1481 1555 private PolynomialSplineFunction func = null; 1482 1556 private int file1 = 0; … … 1484 1558 private ArrayList<Point> evalPoint = new ArrayList<Point>(); 1485 1559 1560 1486 1561 public FilePoint() 1487 1562 {} 1488 1563 1489 public FilePoint(int fileNbr, int fileNbr2, double simValue, PolynomialSplineFunction func, ArrayList<Point> evalPoint)1564 public FilePoint(int fileNbr, int fileNbr2, double simValue, double mzTol, double rtTol, PolynomialSplineFunction func, ArrayList<Point> evalPoint) 1490 1565 { 1491 1566 this.file1 = fileNbr; 1492 1567 this.file2 = fileNbr2; 1493 1568 this.simValue = simValue; 1569 this.mzTol = mzTol; 1570 this.rtTol = rtTol; 1494 1571 this.func = func; 1495 1572 this.evalPoint.addAll(evalPoint); … … 1512 1589 } 1513 1590 1591 public double getMzTol(){ 1592 1593 return this.mzTol; 1594 } 1595 1596 public void setMzTol(double mzTol){ 1597 1598 this.mzTol = mzTol; 1599 } 1600 1601 public double getRtTol(){ 1602 1603 return this.rtTol; 1604 } 1605 1606 public void setRtTol(double rtTol){ 1607 1608 this.rtTol = rtTol; 1609 } 1610 1514 1611 public PolynomialSplineFunction getSplineFunction() { 1515 1612
Note: See TracChangeset
for help on using the changeset viewer.