Changeset 4288
- Timestamp:
- Feb 13, 2012, 9:16:52 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/plugin/src/org/proteios/plugins/FeatureSequencePropagator.java
r4285 r4288 86 86 */ 87 87 private static final org.apache.log4j.Logger log = org.apache.log4j.LogManager 88 .getLogger("org.proteios.plugins.featuresequencepropagator");88 .getLogger("org.proteios.plugins.featuresequencepropagator"); 89 89 90 90 … … 98 98 { 99 99 return new AboutImpl("Propagate identifications", 100 "Match features and propagate identifications", "0.2",101 "2011, Marianne Sandin, Fredrik Levander", null, null,102 "http://www.proteios.org");100 "Match features and propagate identifications", "0.2", 101 "2011, Marianne Sandin, Fredrik Levander", null, null, 102 "http://www.proteios.org"); 103 103 } 104 104 … … 107 107 public void init(SessionControl sc, ParameterValues configuration, 108 108 ParameterValues job) 109 throws BaseException110 {109 throws BaseException 110 { 111 111 super.init(sc, configuration, job); 112 }112 } 113 113 114 114 … … 123 123 Timestamp tStmp = new Timestamp(new Date().getTime()); 124 124 long initialClusterID = tStmp.getTime(); 125 125 126 126 double tolerance = 0.01; 127 127 double timeTol = 0.5; … … 129 129 // dbControl = object used for communicating with database 130 130 DbControl dc = sc.newDbControl(); 131 131 132 132 //used for alignment file 133 133 DbControl dcFile = sc.newDbControl(); … … 136 136 timeTol = ((Float) job.getValue("timeTol")).doubleValue(); 137 137 boolean matchBetweenFractions = (Boolean) job 138 .getValue("matchBetweenFractions");138 .getValue("matchBetweenFractions"); 139 139 Project project = (Project) job.getValue("project"); 140 140 // qf is used to query the database … … 145 145 ItemFactory factory = new ItemFactory(dcFile); 146 146 147 Annotator annotator = new Annotator(factory);148 AnnotationType fractionAnnotationType = annotator149 .getAnnotationType("fraction", Type.STRING, File.TYPE);147 //Annotator annotator = new Annotator(factory); 148 //AnnotationType fractionAnnotationType = annotator 149 // .getAnnotationType("fraction", Type.STRING, File.TYPE); 150 150 //creates an instance of the template class 151 151 File outCoreFile = factory.create(File.class); … … 157 157 outCoreFile.setDirectory(project.getProjectDirectory()); 158 158 outCoreFile.setName("AlignLog-" + job.getId() + ".txt"); 159 159 160 160 //datFile.setDirectory(project.getProjectDirectory()); 161 161 //datFile.setName("AlignDat" + job.getId() + ".txt"); 162 162 163 163 dcFile.saveItem(outCoreFile); 164 164 //dc.saveItem(datFile); … … 166 166 OutputStream outstream = outCoreFile.getUploadStream(false); 167 167 //OutputStream outstream2 = datFile.getUploadStream(false); 168 168 169 169 PrintWriter writer = new PrintWriter(outstream); 170 170 //PrintWriter writerDat = new PrintWriter(outstream2); 171 171 172 172 writer.println("Propagation report for project:" + project 173 .getName());173 .getName()); 174 174 writer.println("m/z tolerance:" + tolerance); 175 175 writer.println("RT (minutes) tolerance:" + timeTol); … … 186 186 // OBS! has hql object as input 187 187 featureQuery.restrictPermanent(Restrictions.eq(Hql 188 .property("project"), Hql.entity(project)));188 .property("project"), Hql.entity(project))); 189 189 190 190 int nbrOfFiles = uniqueMsFiles.size(); … … 192 192 //int totMatchEvents = nbrOfFiles - 1; 193 193 //int currMatchEvents = 0; 194 194 195 195 FilePoint[] simList = new FilePoint[nbrOfFiles*(nbrOfFiles-1)/2]; 196 196 Arrays.fill(simList, new FilePoint()); 197 197 198 198 ArrayList<TreeSet<Feature>> featureList = new ArrayList<TreeSet<Feature>>(nbrOfFiles); 199 199 ArrayList<ArrayList<Feature>> totFeatureList = new ArrayList<ArrayList<Feature>>(nbrOfFiles); 200 201 // double[][] mzDist = new double[nbrOfFiles][];202 PearsonsCorrelation pearCorr = new PearsonsCorrelation();203 200 201 // double[][] mzDist = new double[nbrOfFiles][]; 202 //PearsonsCorrelation pearCorr = new PearsonsCorrelation(); 203 204 204 double[] meanFeatureLength = new double[nbrOfFiles]; 205 205 boolean[] hasSequence = new boolean[nbrOfFiles]; 206 206 Arrays.fill(hasSequence,true); 207 208 //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences. 209 210 //Random randGen = new Random(); 207 211 208 //TODO:What to do if hasSequence is false? Filterfunction needed for features when file has no sequences. 209 210 Random randGen = new Random(); 212 double[] originalFeatWithSeq = new double[nbrOfFiles]; 211 213 212 214 for (int fileNbr = 0; fileNbr<nbrOfFiles; fileNbr++) 213 215 { 214 216 File currentMsFile = uniqueMsFiles.get(fileNbr); 215 217 216 218 List<String> currSeq = Feature.getUniqueFeatureSequences(project, dc, currentMsFile); 219 writer.println("Number of unique sequences in file:" + fileNbr + " " + currSeq.size()); 220 217 221 if (currSeq==null || currSeq.isEmpty()) 218 222 { … … 224 228 // Only retrieve features with peptide sequences assigned 225 229 featureQuery.restrict(Restrictions.eq(Hql 226 .property("msFile"), Hql.entity(currentMsFile)));230 .property("msFile"), Hql.entity(currentMsFile))); 227 231 featureQuery.restrict(Restrictions.gteq(Hql 228 .property("chargeState"),Expressions.integer(2)));232 .property("chargeState"),Expressions.integer(2))); 229 233 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 230 231 dc.reconnect(); 234 232 235 List<Feature> allFeatures = featureQuery.list(dc); 233 236 234 237 //TODO: Setting clusterID, should be done together with distribution calculation 235 238 for (Feature clusterF: allFeatures){ 236 239 //if (clusterF.getClusterId()==null){ 237 238 239 240 241 clusterF.setClusterId(initialClusterID++); 242 dc.reattachItem(clusterF); 240 243 //} 241 244 } 242 245 dc.commit(); 243 246 244 247 dc = sc.newDbControl(); 245 248 allFeatures = featureQuery.list(dc); 246 249 247 250 totFeatureList.add(new ArrayList<Feature>()); 248 251 totFeatureList.get(fileNbr).addAll(allFeatures); 249 252 250 253 writer.println("Number of all features in file:" + fileNbr + " " + allFeatures.size()); 251 252 254 255 253 256 //setMzDistribution(allFeatures,mzDist,meanFeatureLength,fileNbr); 254 257 featureQuery.reset(); 255 258 256 259 log.debug("Mean length: " +meanFeatureLength[fileNbr]); 257 260 //log.debug("Length of mzDist: " +mzDist[fileNbr].length); 258 259 261 262 260 263 //ordered according to RT for the treeset so that the first one (least RT) will be picked 261 264 featureQuery.restrict(Restrictions.eq(Hql 262 .property("msFile"), Hql.entity(currentMsFile)));265 .property("msFile"), Hql.entity(currentMsFile))); 263 266 featureQuery.restrict(Restrictions.neq(Hql 264 .property("peptideSequence"), null));267 .property("peptideSequence"), null)); 265 268 featureQuery.restrict(Restrictions.gteq(Hql 266 .property("chargeState"),Expressions.integer(2)));269 .property("chargeState"),Expressions.integer(2))); 267 270 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 268 271 … … 271 274 List<Feature> features = featureQuery.list(dc); 272 275 featureQuery.reset(); 273 274 featureList.add(new TreeSet (new SeqComparator()));276 277 featureList.add(new TreeSet<Feature>(new SeqComparator())); 275 278 featureList.get(fileNbr).addAll(features); 276 279 280 277 281 /*for (Feature f:featureList.get(fileNbr)) 278 282 { 279 283 log.debug(f.getPeptideSequence()+ " "+f.getApexRetentionTimeInMinutes()+ " " +f.getChargeState()); 280 284 }*/ 285 281 286 287 originalFeatWithSeq[fileNbr]=features.size(); 282 288 283 289 writer.println("File number " +fileNbr + " is " + currentMsFile); 284 290 writer.println("Number of all features in file " + fileNbr + ": " + allFeatures.size()); 285 291 writer.println("Number of features with sequences in file " + fileNbr + ": " + features.size()); 286 writer.println("Number of unique features with sequences in file " + fileNbr + ": " + 287 292 writer.println("Number of unique features with sequences in file " + fileNbr + ": " +featureList.get(fileNbr).size()); 293 288 294 writer.println(); 289 295 290 296 log.debug("File number " +fileNbr + " is " + currentMsFile); 291 297 log.debug("Number of all features in file " + fileNbr + ": " + allFeatures.size()); 292 298 log.debug("Number of features with sequences in file " + fileNbr + ": " + features.size()); 293 299 log.debug("Number of unique features with sequences in file " + fileNbr + ": " + featureList.get(fileNbr).size()); 294 295 296 300 301 302 297 303 //String frac1 = getFirstAnnotationValue(annotator, 298 304 //fractionAnnotationType, currentMsFile); … … 300 306 301 307 writer.println("==============================="); 302 303 304 308 309 310 305 311 int countNbrMatches=0; 306 312 307 313 for(int fileNbr=0; fileNbr<nbrOfFiles-1; fileNbr++ ) 308 314 { 309 310 315 316 311 317 for (int secFileNbr=fileNbr+1; secFileNbr<nbrOfFiles; secFileNbr++) 312 318 { 313 319 314 320 countNbrMatches++; 315 321 316 322 if (progress != null) 317 323 { 318 324 progress.display((100 *countNbrMatches)/((nbrOfFiles*(nbrOfFiles-1)/2)), 319 "Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1));325 "Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1)); 320 326 } 321 327 322 328 writer.println("Calculating similarities. Processing file nr " + (fileNbr+1) + " and " +(secFileNbr+1) +". Progress is: " +(100 *countNbrMatches)/((nbrOfFiles*(nbrOfFiles-1)/2))); 323 329 //double simValue = pearCorr.correlation(mzDist[fileNbr], mzDist[secFileNbr]); 324 330 //writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue); 325 326 ArrayList<Feature> features = new ArrayList(featureList.get(fileNbr)); 327 ArrayList<Feature> features2 = new ArrayList(featureList.get(secFileNbr)); 328 329 //ArrayList<Feature> features = new ArrayList(totFeatureList.get(fileNbr)); 330 //ArrayList<Feature> features2 = new ArrayList(totFeatureList.get(secFileNbr)); 331 332 double meanWeight1=meanFeatureLength[fileNbr]; 333 double meanWeight2=meanFeatureLength[secFileNbr]; 331 332 ArrayList<Feature> features = new ArrayList<Feature>(featureList.get(fileNbr)); 333 ArrayList<Feature> features2 = new ArrayList<Feature>(featureList.get(secFileNbr)); 334 335 //double meanWeight1=meanFeatureLength[fileNbr]; 336 //double meanWeight2=meanFeatureLength[secFileNbr]; 334 337 /* 335 338 … … 349 352 350 353 int indexKey = (secFileNbr+1)*(secFileNbr-2)/2+fileNbr+1; 351 352 353 354 355 356 354 357 // writer.println("The index key :" + indexKey); 355 358 // … … 358 361 359 362 ArrayList<Point> points = getPointMatchBySeq(features, features2); 360 361 double percentageForRegression = 10;363 364 //double percentageForRegression = 10; 362 365 //ArrayList<Point> points = getPointMatchByTol(features,features2,percentageForRegression); 363 366 364 367 int totNbrMatches = points.size(); 365 368 writer.println("Total number of matches: " +totNbrMatches); 369 370 if(totNbrMatches==0){ 371 372 writer.println("There are no common sequences for file: " + (fileNbr+1) + " and " +(secFileNbr+1)); 373 writer.println("These files will not be aligned."); 374 continue; 375 } 376 366 377 367 378 Collections.sort(points,new DiffComparator()); … … 372 383 double interQuartileRange = upperQuartile-lowerQuartile; 373 384 double upperFence = upperQuartile + 3*interQuartileRange; 374 385 375 386 writer.println("Median RT difference before alignment: " +medianQuartile); 376 387 writer.println("Largest RT difference before alignment: " +points.get(points.size()-1).getDiff()); 377 388 378 389 int maxCutOff = points.size()-1; 379 390 while(points.get(maxCutOff).getDiff()>upperFence && points.get(maxCutOff).getDiff()>timeTol) … … 382 393 maxCutOff--; 383 394 } 384 385 395 396 386 397 writer.println("Removing " +(totNbrMatches-points.size()) + " outliers. Total number of sequence matches is now: " +points.size()); 387 398 388 399 medianQuartile = points.get((int) Math.round(0.5*points.size())).getDiff(); 389 400 390 401 writer.println("Median RT difference is now: " +medianQuartile); 391 402 writer.println("Largest RT difference is now: " +points.get(points.size()-1).getDiff()); 392 403 393 404 Collections.sort(points,new MzDiffComparator()); 394 405 double medianQuartileMz = points.get((int) Math.round(0.5*points.size())).getMzDiff(); 395 406 writer.println("Median mz difference before alignment: " +medianQuartileMz); 396 407 writer.println("Largest mz difference before alignment: " +points.get(points.size()-1).getMzDiff()); 397 408 398 409 tolerance=points.get(points.size()-1).getMzDiff(); 399 410 timeTol=medianQuartile; 400 401 411 412 402 413 ArrayList<Point> evalPoints = new ArrayList<Point>(); 403 404 414 415 405 416 //no duplicates in x-values 406 417 Collections.sort(points, new RTComparator()); 407 418 408 419 log.debug("Points size: " +points.size()); 409 410 420 421 411 422 //TODO: bortkommentera när mzDist ska användas som likhetsvärde 412 423 double simValue = points.size()*2.0/(features2.size()+features.size()); 413 424 writer.println("Similarity for file number" +fileNbr + " and " + secFileNbr +": " +simValue); 414 425 415 426 /* 416 427 Collections.sort(points, new WeightComparator()); 417 428 418 429 int halfLength = (int) Math.ceil(points.size()/2); 419 430 420 431 evalPoints.addAll(points.subList(0,halfLength-1)); 421 432 422 433 points.subList(0,halfLength).clear(); 423 434 424 435 Collections.sort(points, new RTComparator()); 425 436 Collections.sort(evalPoints, new RTComparator());*/ 426 437 427 438 int ndx = 0; 428 439 429 440 while(ndx<points.size()) 430 441 { 431 442 //TODO: avkommentera när random ska användas 432 443 //features.remove(randGen.nextInt(features.size())); 433 444 434 445 evalPoints.add(points.remove(ndx)); 435 446 ndx++; 436 447 437 448 } 438 449 439 450 log.debug("Points size: " +points.size()); 440 451 log.debug("Eval size: " +evalPoints.size()); 441 452 442 453 ArrayList<Point> tempPoints = new ArrayList<Point>(); 443 454 tempPoints = evalPoints; … … 448 459 //} 449 460 450 461 451 462 // writer.println("Similarity:" + simValues[indexKey]); 452 463 453 464 454 465 PolynomialSplineFunction func = estimateSplineFunc(points, writer); 455 466 simList[indexKey] = new FilePoint(fileNbr,secFileNbr,simValue,func,evalPoints); 456 467 457 468 writer.println(); 458 469 } … … 460 471 } 461 472 //writer.println("Best match is :" + (firstMatch+1) +" and " +(secMatch+1)+ "\n"); 462 463 //TODO: What if func is null?473 474 464 475 Arrays.sort(simList,Collections.reverseOrder(new SimComparator())); 465 476 466 477 // Propagate and get count 467 478 468 479 countNbrMatches=0; 469 480 for(FilePoint f:simList) 470 481 { 471 482 if (f.getSimValue()>0){ 472 483 countNbrMatches++; 473 484 474 485 if (progress != null) 475 486 { 476 487 progress.display((100 *countNbrMatches/simList.length), 477 "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +countNbrMatches +" of " +simList.length);488 "Aligning files " +(f.getFirstFile()+1) + " and " + (f.getSecondFile()+1) +". File-pair " +countNbrMatches +" of " +simList.length); 478 489 } 479 480 481 490 491 492 482 493 //TODO: här ska allFeatures skickas in för ytterligare utvärdering 483 494 count += matchFeatureLists(f,factory, project,writer,featureQuery,uniqueMsFiles,tolerance,timeTol); 484 485 } 486 487 /*seqList.add(ListUtils.sum(seqList.get(firstMatch),seqList.get(secMatch))); 488 writer.println("Size of union list is :" + seqList.get(seqList.size()-1).size() + "\n"); 489 seqList.set(firstMatch,null); 490 seqList.set(secMatch,null); 491 492 featureSet.add(bestSet); 493 writer.println("Size of union set is :" + bestSet.size() + "\n"); 494 featureSet.set(firstMatch,null); 495 featureSet.set(secMatch,null); 496 497 currMatchEvents++;*/ 498 499 495 496 //TODO: lägg in hur många unika sekvenser filen har EFTER matchning 497 } 498 } 499 500 500 501 501 //writerDat.close(); 502 502 // Commit to database 503 503 dc.commit(); 504 response.setDone("Features matched for " + uniqueMsFiles.size() + " MS files. " + count + " features sequences propagated."); 505 writer.println("Features matched for " + uniqueMsFiles.size() + " MS files. " + count + " features sequences propagated."); 504 505 506 writer.println("==============================="); 507 508 featureQuery.reset(); 509 dc = sc.newDbControl(); 510 for(int fileNbr=0; fileNbr<nbrOfFiles; fileNbr++ ) 511 { 512 File currentMsFile = uniqueMsFiles.get(fileNbr); 513 featureQuery.restrict(Restrictions.eq(Hql 514 .property("msFile"), Hql.entity(currentMsFile))); 515 featureQuery.restrict(Restrictions.neq(Hql 516 .property("peptideSequence"), null)); 517 featureQuery.restrict(Restrictions.gteq(Hql 518 .property("chargeState"),Expressions.integer(2))); 519 520 writer.println("Number of features with sequences in file " +currentMsFile +" after alignment: " +featureQuery.list(dc).size()); 521 writer.println("Total increase in sequence coverage for file " +currentMsFile + ": " +(featureQuery.list(dc).size()-originalFeatWithSeq[fileNbr])/originalFeatWithSeq[fileNbr]); 522 featureQuery.reset(); 523 } 524 dc.close(); 525 526 writer.println("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count); 527 response.setDone("Total number of features matched for " + uniqueMsFiles.size() + " MS files: " + count); 528 506 529 // Close log file 507 530 writer.close(); … … 536 559 String retval = null; 537 560 Annotation anno2 = annotator.getAnnotation(currentMsFile, 538 annotationType);561 annotationType); 539 562 List<String> fracList2 = extracted(anno2); 540 563 if (fracList2 != null && !fracList2.isEmpty()) … … 545 568 } 546 569 547 570 548 571 private ArrayList<Point> getPointMatchBySeq(ArrayList<Feature> features, ArrayList<Feature> features2) 549 572 { 550 573 551 574 Iterator<Feature> itr = features.iterator(); 552 575 Iterator<Feature> itr2; 553 576 554 577 ArrayList<Point> points = new ArrayList<Point>(); 555 578 556 579 while(itr.hasNext()) 557 580 { 558 581 Feature f = itr.next(); 559 582 560 583 itr2 = features2.iterator(); 561 584 562 585 int nbrHits = 0; 563 586 … … 565 588 566 589 Feature f2 = itr2.next(); 567 568 590 591 569 592 if(f.getPeptideSequence().equals(f2.getPeptideSequence()) && f.getChargeState().equals(f2.getChargeState())) 570 593 { … … 572 595 573 596 Point tempPoint = new Point(f,f2); 574 597 575 598 //contains is based on the RT x-value, no duplicate x is allowed for the regression 576 599 if(!points.contains(tempPoint)) … … 579 602 if(!(Math.abs(f.getMassToChargeRatio()-f2.getMassToChargeRatio())>0.01) ) 580 603 { 581 582 604 605 583 606 //double weight = Math.max(Math.abs(tempPoint.getXLength()-meanWeight1)/meanWeight1,Math.abs(tempPoint.getYLength()-meanWeight2)/meanWeight2); 584 607 //weight = 1/weight; 585 608 //tempPoint.setWeight(weight); 586 609 587 610 points.add(tempPoint); 588 611 } … … 607 630 } 608 631 609 632 610 633 } 611 634 … … 621 644 622 645 } 623 646 624 647 return points; 625 626 } 627 648 649 } 650 628 651 private ArrayList<Point> getPointMatchByTol(ArrayList<Feature> featuresIn, ArrayList<Feature> featuresIn2, double percentage ) 629 652 { 630 653 631 654 Comparator<Feature> apexC = new ApexIntensityComparator(); 632 655 int cutOff1 = (int) Math.floor(featuresIn.size()*(100-percentage)/100); 633 656 int cutOff2 = (int) Math.floor(featuresIn2.size()*(100-percentage)/100); 634 657 635 658 Collections.sort(featuresIn,apexC); 636 659 Collections.sort(featuresIn2,apexC); 637 660 638 661 //Collections.sort(featuresIn,Collections.reverseOrder(apexC)); 639 662 //Collections.sort(featuresIn2,Collections.reverseOrder(apexC)); 640 663 641 664 List<Feature> features = featuresIn.subList(cutOff1,featuresIn.size()-1); 642 665 List<Feature> features2 = featuresIn2.subList(cutOff2,featuresIn2.size()-1); 643 666 644 667 log.debug("Tol matching size: " +features.size() +" and " +features2.size()); 645 646 668 669 647 670 Iterator<Feature> itr = features.iterator(); 648 671 Iterator<Feature> itr2; 649 672 650 673 ArrayList<Point> points = new ArrayList<Point>(); 651 674 652 675 while(itr.hasNext()) 653 676 { 654 677 Feature f = itr.next(); 655 678 656 679 itr2 = features2.iterator(); 657 680 658 681 int nbrHits = 0; 659 682 … … 685 708 //multiple hit for the tolerance 686 709 687 710 688 711 log.debug("Duplicate tol hit. RT diff p1: " +points.get(points.size()-1).getDiff() + " p2: " +tempPoint.getDiff()); 689 712 //use the one with the "BEST" match, ie the one with the smallest difference … … 700 723 } 701 724 702 703 } 704 705 } 706 707 } 708 725 726 } 727 728 } 729 730 } 731 709 732 Comparator<Feature> rtC = new RTComparatorF(); 710 733 Collections.sort(featuresIn,rtC); 711 734 Collections.sort(featuresIn2,rtC); 712 735 713 736 log.debug("Tol matching points size: " +points.size()); 714 737 715 738 716 739 return points; 717 718 } 719 720 740 741 } 742 743 721 744 /** 722 745 * Match feature lists and propagate sequences from first to second list. 723 746 */ 724 747 private int matchFeatureLists(FilePoint fp, ItemFactory factory, 725 Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles, double tolerance, double timeTol in)748 Project project, PrintWriter writer, ItemQuery<Feature> featureQuery,List<File> uniqueMsFiles, double tolerance, double timeTol) 726 749 { 727 728 750 751 729 752 DbControl dc = sc.newDbControl(); 730 753 731 754 ArrayList<Point> features = fp.getEvalList(); 732 755 PolynomialSplineFunction func = fp.getSplineFunction(); 733 756 734 757 featureQuery.restrict(Restrictions.eq(Hql 735 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile()))));758 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile())))); 736 759 featureQuery.restrict(Restrictions.gteq(Hql 737 .property("chargeState"),Expressions.integer(2)));760 .property("chargeState"),Expressions.integer(2))); 738 761 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 739 762 List<Feature> tempAllFeatures = featureQuery.list(dc); 740 763 ArrayList<Feature> allFeatures = new ArrayList<Feature>(tempAllFeatures); 741 764 featureQuery.reset(); 742 765 743 766 featureQuery.restrict(Restrictions.eq(Hql 744 767 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getFirstFile())))); 745 768 featureQuery.restrict(Restrictions.gteq(Hql 746 769 .property("chargeState"),Expressions.integer(2))); 747 770 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 748 771 featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null)); 749 772 List<Feature> tempAllFeaturesSeq = featureQuery.list(dc); 750 773 ArrayList<Feature> allFeaturesSeq = new ArrayList<Feature>(tempAllFeaturesSeq); 751 774 featureQuery.reset(); 752 775 753 776 featureQuery.restrict(Restrictions.eq(Hql 754 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile()))));777 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile())))); 755 778 featureQuery.restrict(Restrictions.gteq(Hql 756 .property("chargeState"),Expressions.integer(2)));779 .property("chargeState"),Expressions.integer(2))); 757 780 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 758 781 List<Feature> tempAllFeatures2 = featureQuery.list(dc); … … 760 783 featureQuery.reset(); 761 784 785 featureQuery.restrict(Restrictions.eq(Hql 786 .property("msFile"), Hql.entity(uniqueMsFiles.get(fp.getSecondFile())))); 787 featureQuery.restrict(Restrictions.gteq(Hql 788 .property("chargeState"),Expressions.integer(2))); 789 featureQuery.order(Orders.asc(Hql.property("apexRetentionTimeInMinutes"))); 790 featureQuery.restrict(Restrictions.neq(Hql.property("peptideSequence"), null)); 791 List<Feature> tempAllFeaturesSeq2 = featureQuery.list(dc); 792 ArrayList<Feature> allFeaturesSeq2 = new ArrayList<Feature>(tempAllFeaturesSeq); 793 featureQuery.reset(); 794 762 795 ArrayList<Feature> bestMatchList = new ArrayList<Feature>(); 763 796 ArrayList<Feature> fList = new ArrayList<Feature>(); … … 767 800 //double timeTol = 0.5; 768 801 //double[] timeTol = {0.25,0.5,0.75,1,1.5,2,3,4,5}; 769 double[] timeTol = {0, timeTolin}; 770 802 803 771 804 writer.println("==============================="); 772 805 writer.println("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue()); 773 806 writer.println(); 807 808 809 log.debug("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue()); 774 810 775 811 776 log.debug("Evaluating match with " +features.size() +" features with sequences for file " +fp.getFirstFile() +" and " +fp.getSecondFile() +" with similarity score: " +fp.getSimValue()); 777 812 String localSampleId = null; 813 String fractionId = null; 814 815 for (Feature f : allFeatures) 816 { 817 if (f.getPeptideSequence() != null) 818 { 819 Iterator<Hit> i = f.getHits().iterator(); 820 if (i.hasNext()) 821 { 822 Hit h = i.next(); 823 localSampleId = h.getLocalSampleId(); 824 fractionId = h.getFractionId(); 825 log 826 .debug("Found local sample id : " + localSampleId + " fraction: " + fractionId); 827 break; 828 } 829 } 830 } 778 831 779 832 String localSampleId2 = null; … … 791 844 fractionId2 = h.getFractionId(); 792 845 log 793 .debug("Found local sample id2:" + localSampleId2 + " fraction:" + fractionId2);846 .debug("Found local sample id2: " + localSampleId2 + " fraction for set 2: " + fractionId2); 794 847 break; 795 848 } … … 797 850 } 798 851 799 800 double[] fineSeq = new double[timeTol.length]; 801 double[] notFineSeq = new double[timeTol.length]; 802 double[] fineTolSeq = new double[timeTol.length]; 803 /* 852 853 double fineSeq = 0; 854 double fineTolSeq = 0; 855 804 856 for (Point p : features) 805 857 { … … 814 866 { 815 867 fRT = (float) func.value(f 816 .getApexRetentionTimeInMinutes().doubleValue());868 .getApexRetentionTimeInMinutes().doubleValue()); 817 869 } 818 870 catch (ArgumentOutsideDomainException e) … … 824 876 Feature f2 = p2.getYFeature(); 825 877 String f2seq = f2.getPeptideSequence(); 826 827 828 829 878 // log.debug("Feature to match RT:" + f.getApexRetentionTimeInMinutes() + "after shift:" + fRT + " diff:" + (fRT - f.getApexRetentionTimeInMinutes())); 830 831 879 //sequence evaluation for recall and tol evaluation for precision 832 //TODO: skriv detta som funktion så att både allFeatures och Points kan skickas in för utvärdering 833 for (int k=0;k<timeTol.length;k++){ 834 835 836 837 if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol[k])) 880 if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState()) && (Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol)) 881 { 882 883 //fseq och f2seq ska inte kunna vara null 884 if (f2seq.equals(fseq)) 838 885 { 839 840 if (f2seq!=null && fseq!=null && f2seq.equals(fseq)) 841 { 842 fineSeq[k]++; 843 844 }else{ 845 846 fineTolSeq[k]++; 847 848 writer.println("Current timetol: " +timeTol[k]); 849 writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 850 851 } 852 853 }else if (f2seq!=null && fseq!=null && f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())) 854 { 855 notFineSeq[k]++; 856 log.debug("Current timetol: " +timeTol[k]); 857 log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 886 fineSeq++; 887 888 }else{ 889 890 fineTolSeq++; 891 892 writer.println("Current timetol: " +timeTol); 893 writer.println("Feature with sequence " + f.getPeptideSequence() + " matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 894 858 895 } 859 860 } 861 862 } 863 } 864 */ 896 897 //sequences that should be matching 898 }else if (f2seq.equals(fseq) && f2.getChargeState().equals(f.getChargeState())){ 899 900 log.debug("Current timetol: " +timeTol +" and current mz tol: " +tolerance); 901 log.debug("Feature with sequence " + f.getPeptideSequence() + " not matching " + f2.getPeptideSequence() + " time diff: " +(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) + " mz diff: " +(Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio())))); 902 903 } 904 } 905 } 906 865 907 866 908 867 909 int count = 0; 868 double[] fine = new double[timeTol.length];869 double[] fineTol = new double[timeTol.length];870 double[] fineTest = new double[timeTol.length];871 872 910 int minNdx = 0; 873 874 875 911 912 //double timeTolTemp = 0; 913 876 914 for(Feature f:allFeatures){ 877 878 915 916 879 917 float fRT = f.getApexRetentionTimeInMinutes(); 880 918 … … 884 922 { 885 923 fRT = (float) func.value(f 886 .getApexRetentionTimeInMinutes().doubleValue());924 .getApexRetentionTimeInMinutes().doubleValue()); 887 925 } 888 926 catch (ArgumentOutsideDomainException e) … … 890 928 } 891 929 892 while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol [timeTol.length-1]){930 while( minNdx<(allFeatures2.size()-1) && fRT-allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()>timeTol){ 893 931 minNdx++; 894 932 } 895 933 896 934 Feature bestMatch = null; 897 898 899 boolean[] hit = new boolean[timeTol.length]; 900 boolean[] moreHits = new boolean[timeTol.length]; 901 902 // log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()); 903 935 936 boolean hit = false; 937 938 939 // log.debug("For RT: " +fRT +" Starting at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()); 940 904 941 for(Feature f2: allFeatures2.subList(minNdx,allFeatures2.size()-1)) 905 942 { 906 943 907 908 /*if(fRT-f2.getApexRetentionTimeInMinutes()>timeTol[timeTol.length-1]){909 continue;910 }*/911 912 944 //slut på sökning 913 if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol[timeTol.length-1]){ 914 // log.debug("For RT: " +fRT +"Ending at: " +allFeatures2.get(minNdx).getApexRetentionTimeInMinutes()); 945 if(f2.getApexRetentionTimeInMinutes()-fRT>timeTol){ 915 946 break; 916 947 } 917 948 918 919 920 949 if ((Math.abs(f.getMassToChargeRatio() - f2.getMassToChargeRatio()) < tolerance) && f2.getChargeState().equals(f.getChargeState())) 921 950 922 951 { 923 924 for (int k=0;k<timeTol.length;k++){ 925 926 927 928 if(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol[k]) 929 { 930 931 fineTest[k]++; 932 933 if(!hit[k]){ 934 //there is a single hit 935 fine[k]++; 936 hit[k] = true; 937 bestMatch = f2; 938 continue; 939 }else if(!moreHits[k]){ 940 941 //there is more than one hit for this peptide sequence 942 943 fineTol[k]++; 944 // log.debug("There is a second hit: " +fineTol[k]); 945 //moreHits[k] = true; 946 } 947 952 if(Math.abs(f2.getApexRetentionTimeInMinutes() - fRT) < timeTol) 953 { 954 if(!hit){ 955 //there is a single hit 956 hit = true; 957 bestMatch = f2; 958 }else{ 948 959 //check if the second and further hits are better than the current bestMatch, will always contain the best match of all features, even split peaks 949 960 if(Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)>Math.abs(f2.getApexRetentionTimeInMinutes() - fRT)) 950 961 { 951 962 log.debug("Best match is updated."); 952 log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT) + "new is: " +Math.abs(f2.getApexRetentionTimeInMinutes() - fRT));963 log.debug("Old rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)); 953 964 bestMatch = f2; 965 log.debug("New rt diff is: " +Math.abs(bestMatch.getApexRetentionTimeInMinutes() - fRT)); 966 954 967 } 955 968 956 969 } 957 958 970 959 971 } 960 961 } 962 963 } 964 965 //TODO: Same with sequence. Write back to feature table as well as hit table. May not be restricted at all to project in future. 966 /*QueryFactory qf2 = new QueryFactory(); 967 ItemQuery<Feature> featureQuery2 = qf2.select(Feature.class); 968 featureQuery2.restrict(Restrictions.eq(Hql.property("project"), Hql.entity(project)));*/ 969 970 /*ItemQuery query = ... 971 List<Integer> selected = ... 972 973 query.restrict( 974 Restrictions.in( 975 Hql.property("id"), 976 Expressions.parameter("selectedItems", selected, Type.INT) 977 )); 978 */ 979 980 972 } 973 974 } 975 976 981 977 if(bestMatch!=null) 982 978 { 979 980 981 //log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes()); 983 982 984 985 log.debug("Matched features with RT " + f.getApexRetentionTimeInMinutes() + " and RT:" + bestMatch.getApexRetentionTimeInMinutes()); 986 count++; 987 988 log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 983 984 //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 989 985 //All features have clusterID:s, they are set at start of alignment -> all features that have a clusterID have undergone alignment 990 986 if(!f.getClusterId().equals(bestMatch.getClusterId())){ 991 987 //log.debug("Setting new clusterID"); 992 988 993 989 bestMatchList.add(bestMatch); 994 990 fList.add(f); 995 991 bestMatchClusterIDList.add(bestMatch.getClusterId()); 996 997 //All features that have the same clusterID as bestMatch will have their clusterID changed to that of f 998 //TODO: Restrict to project to start with - LATER REMOVE SO ALL FEATURES IN DATABASE WILL BE CHANGED 999 1000 //long tempID = bestMatch.getClusterId(); 1001 1002 /*dc.reattachItem(bestMatch); 1003 bestMatch.setClusterId(f.getClusterId()); 1004 1005 dc.commit(); 1006 dc = sc.newDbControl(); 1007 dc.reattachItem(f);*/ 1008 1009 /* 1010 log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1011 1012 1013 //feature should find itself in the database 1014 1015 featureQuery2.restrict(Restrictions.eq(Hql.property("clusterId"), Expressions.aLong(f.getClusterId()))); 1016 List<Feature> clusterIDFeatures = featureQuery2.list(dc); 1017 1018 //size of clusterF is larger than 0 only if there exists features that have the same clusterID as f 1019 log.debug("Size of clusterIdFeatures: " +clusterIDFeatures.size()); 1020 featureQuery2.reset(); 1021 1022 for (Feature clusterF: clusterIDFeatures){ 1023 dc.reattachItem(clusterF); 1024 clusterF.setClusterId(f.getClusterId()); 1025 1026 }*/ 1027 } 1028 1029 1030 //TODO: Propagate the sequence (within the project for now)-> if both have a sequence or both are null, do nothing (CHANGE LATER) 1031 //Update hits table 1032 1033 /*if (f.getPeptideSequence()!=null && bestMatch.getPeptideSequence()==null){ 1034 updateHitsList(f,bestMatch,localSampleId2, fractionId2,allFeatures,writer,dc,factory,project); 1035 }else if(f.getPeptideSequence()==null && bestMatch.getPeptideSequence()!=null){ 1036 updateHitsList(bestMatch,f,localSampleId2,fractionId2,allFeatures2,writer,dc,factory,project); 1037 }*/ 1038 1039 1040 } 1041 1042 1043 } 992 993 } 994 995 } 996 997 998 } 999 1000 int hitsCounter = 0; 1044 1001 1045 1002 if (!bestMatchClusterIDList.isEmpty()){ … … 1055 1012 log.debug("Size of fList: " +fList.size()); 1056 1013 1057 1014 count=bestMatchList.size(); 1058 1015 1059 1016 … … 1064 1021 1065 1022 int featureNbr = bestMatchClusterIDList.indexOf(bestMatchClusterID); 1066 log.debug("Featurenr: " +featureNbr);1023 //log.debug("Featurenr: " +featureNbr); 1067 1024 1068 1025 Feature f = fList.get(featureNbr); … … 1071 1028 dc.reattachItem(bestMatch); 1072 1029 1073 log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId());1030 //log.debug("First feature: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1074 1031 1075 1032 bestMatch.setClusterId(f.getClusterId()); 1076 1033 1077 1034 //updating hits 1078 1035 if (f.getPeptideSequence()!= null && bestMatch.getPeptideSequence()==null){ 1079 updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project); 1080 } 1081 1082 log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1036 //log.debug("Updating hit for sequence: " +f.getPeptideSequence()); 1037 hitsCounter+=updateHitsList(f,bestMatch,localSampleId2,fractionId2,allFeaturesSeq,writer,dc,factory,project); 1038 }else if(f.getPeptideSequence()== null && bestMatch.getPeptideSequence()!=null){ 1039 hitsCounter+=updateHitsList(bestMatch,f,localSampleId,fractionId,allFeaturesSeq2,writer,dc,factory,project); 1040 } 1041 //TODO: what to do if both have sequences? 1042 1043 //log.debug("First feature after set: " +f.getClusterId() + " bestMatch feature: " +bestMatch.getClusterId()); 1083 1044 1084 1045 } … … 1087 1048 } 1088 1049 dc.commit(); 1050 1051 1052 writer.println(); 1053 1054 writer.println("Number of features matched:" + count); 1055 writer.println("Number of hits created:" + hitsCounter); 1056 writer.println("Pairwsie alignment recall based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +fineSeq/features.size()); 1057 writer.println("Pairwsie alignment precision based on sequences with mzTol " +tolerance +" and timeTol " +timeTol +": " +(1-fineTolSeq/features.size())); 1058 writer.println(); 1059 1060 1061 return count; 1062 } 1063 1064 1065 private int updateHitsList(Feature f, Feature f2, String localSampleId2, String fractionId2, ArrayList<Feature> allFeaturesSeq, PrintWriter writer, DbControl dc, ItemFactory factory, Project project){ 1066 //create a hit in database for f2 (the feature whose sequence has been updated) 1067 1068 f2.setPeptideSequence(f.getPeptideSequence()); 1069 dc.reattachItem(f); 1070 dc.reattachItem(f2); 1089 1071 1072 int newHitCounter = 0; 1090 1073 1091 writer.println(); 1092 //writer.println("Peptide sequences propagated:" + count); 1093 writer.println("Alignment recall and precision based on sequences with mzTol " +tolerance +": "); 1094 for (int k=0;k<timeTol.length;k++){ 1095 writer.println("For timeTol " +timeTol[k] +": " +fineSeq[k]/features.size() +" " +(1-fineTolSeq[k]/features.size())); 1096 } 1097 writer.println("Precision for all features with mzTol " +tolerance +": "); 1098 for (int k=0;k<timeTol.length;k++){ 1099 writer.println("For timeTol " +timeTol[k] +": "); 1100 writer.println("Number of total matched features: " +fineTest[k]); 1101 writer.println("Number of matched features: " +fine[k] +" and recall value: " +(fine[k]*2.0/(allFeatures.size()+allFeatures2.size()))); 1102 writer.println("Number of features matching more than one: " +fineTol[k] +" Precision vaule: " +(1-(fineTol[k]/(fine[k]+fineTol[k])))); 1103 } 1104 writer.println(); 1105 1106 1107 return count; 1108 } 1109 1110 1111 private void updateHitsList(Feature f, Feature f2, String localSampleId2, String fractionId2, ArrayList<Feature> allFeaturesSeq, PrintWriter writer, DbControl dc, ItemFactory factory, Project project){ 1112 //create a hit in database for f2 (the feature whose sequence has been updated) 1113 1114 f2.setPeptideSequence(f.getPeptideSequence()); 1115 dc.reattachItem(f); 1116 dc.reattachItem(f2); 1117 1118 Hit h = null; 1119 for (Hit h1 : f.getHits()) 1120 { 1121 // Take the best hit matching the feature 1122 if (h == null || h1.getCombinedFDR() != null && (h 1123 .getCombinedFDR() == null || (h1 1124 .getCombinedFDR() < h.getCombinedFDR()))) 1125 { 1126 h = h1; 1127 } 1128 } 1074 Hit h = null; 1075 for (Hit h1 : f.getHits()) 1076 { 1077 // Take the best hit matching the feature 1078 if (h == null || h1.getCombinedFDR() != null && (h 1079 .getCombinedFDR() == null || (h1 1080 .getCombinedFDR() < h.getCombinedFDR()))) 1081 { 1082 h = h1; 1083 } 1084 } 1129 1085 1130 1086 /* * If there was no hit for the feature, then take 1131 1087 * from another feature with the same peptide 1132 1088 * sequence (ignoring delta of modifications). 1133 */ 1134 if (h == null) 1089 */ 1090 if (h == null) 1091 { 1092 String fseq = f.getPeptideSequence(); 1093 if (fseq.contains(" ")) 1094 fseq = fseq.substring(0, fseq 1095 .lastIndexOf(' ')); 1096 for (Feature f3 : allFeaturesSeq) 1097 { 1098 if ((fseq.equals(f3.getPeptideSequence()) || (fseq 1099 .contains(" ") && f3 1100 .getPeptideSequence().startsWith(fseq))) && !f3 1101 .getHits().isEmpty() && f.getChargeState().equals(f3.getChargeState())) 1102 { 1103 for (Hit h1 : f3.getHits()) 1104 { 1105 if (h == null || h1 1106 .getCombinedFDR() != null && (h 1107 .getCombinedFDR() == null || (h1 1108 .getCombinedFDR() < h 1109 .getCombinedFDR()))) 1135 1110 { 1136 String fseq = f.getPeptideSequence(); 1137 if (fseq.contains(" ")) 1138 fseq = fseq.substring(0, fseq 1139 .lastIndexOf(' ')); 1140 for (Feature f3 : allFeaturesSeq) 1141 { 1142 if ((fseq.equals(f3.getPeptideSequence()) || (fseq 1143 .contains(" ") && f3 1144 .getPeptideSequence().startsWith(fseq))) && !f3 1145 .getHits().isEmpty() && f.getChargeState().equals(f3.getChargeState())) 1146 { 1147 for (Hit h1 : f3.getHits()) 1148 { 1149 if (h == null || h1 1150 .getCombinedFDR() != null && (h 1151 .getCombinedFDR() == null || (h1 1152 .getCombinedFDR() < h 1153 .getCombinedFDR()))) 1154 { 1155 h = h1; 1156 log 1157 .debug("Assigning hit from feature at RT " + f3 1158 .getApexRetentionTimeInMinutes() + " with sequence:" + f3 1159 .getPeptideSequence()); 1160 } 1161 } 1162 } 1163 1164 } 1111 h = h1; 1112 log 1113 .debug("Assigning hit from feature at RT " + f3 1114 .getApexRetentionTimeInMinutes() + " with sequence:" + f3 1115 .getPeptideSequence()); 1165 1116 } 1166 if (h != null && localSampleId2 != null) 1167 { 1168 Hit newHit = factory.create(Hit.class); 1169 dc.saveItem(newHit); 1170 // To allow the user to trace the identification: 1171 newHit.setPeakListFile(h.getPeakListFile()); 1172 newHit.setIdentificationResultFile(h.getIdentificationResultFile()); 1173 newHit.setSpectrumStringId(h.getSpectrumStringId()); 1174 newHit.setSpectrumId(h.getSpectrumId()); 1175 newHit.setCombinedFDR(h.getCombinedFDR()); 1176 newHit.setDescription(h.getDescription()); 1177 newHit.setExternalId(h.getExternalId()); 1178 newHit.setCharge(h.getCharge()); 1179 // From the sample we're matching with: 1180 newHit.setFractionId(fractionId2); 1181 newHit.setLocalSampleId(localSampleId2); 1182 newHit.setScoreType("Proteios aligned"); 1183 newHit.setFeature(f2); 1184 newHit.setProject(project); 1185 newHit.setPrecursorQuantity(f2 1186 .getIntegratedIntensity()); 1187 newHit.setRetentionTimeInMinutes(f2 1188 .getApexRetentionTimeInMinutes()); 1189 newHit.setPrimaryCombined(true); 1190 newHit.setProtein(false); 1191 newHit.setFirstInCombination(h); 1192 writer.println("New hit generated:" + newHit 1193 .getDescription()); 1194 } 1195 else 1196 { 1197 writer 1198 .println("Feature: " + f2 1199 .getMassToChargeRatio() + " sequence " + f2 1200 .getPeptideSequence() + ". Local sample id: " + localSampleId2); 1201 f2.setHits(f.getHits()); 1202 } 1203 1204 } 1205 1117 } 1118 } 1119 1120 } 1121 } 1122 if (h != null && localSampleId2 != null) 1123 { 1124 Hit newHit = factory.create(Hit.class); 1125 dc.saveItem(newHit); 1126 // To allow the user to trace the identification: 1127 newHit.setPeakListFile(h.getPeakListFile()); 1128 newHit.setIdentificationResultFile(h.getIdentificationResultFile()); 1129 newHit.setSpectrumStringId(h.getSpectrumStringId()); 1130 newHit.setSpectrumId(h.getSpectrumId()); 1131 newHit.setCombinedFDR(h.getCombinedFDR()); 1132 newHit.setDescription(h.getDescription()); 1133 newHit.setExternalId(h.getExternalId()); 1134 newHit.setCharge(h.getCharge()); 1135 // From the sample we're matching with: 1136 newHit.setFractionId(fractionId2); 1137 newHit.setLocalSampleId(localSampleId2); 1138 newHit.setScoreType("Proteios aligned"); 1139 newHit.setFeature(f2); 1140 newHit.setProject(project); 1141 newHit.setPrecursorQuantity(f2 1142 .getIntegratedIntensity()); 1143 newHit.setRetentionTimeInMinutes(f2 1144 .getApexRetentionTimeInMinutes()); 1145 newHit.setPrimaryCombined(true); 1146 newHit.setProtein(false); 1147 newHit.setFirstInCombination(h); 1148 log.debug("New hit generated:" + newHit 1149 .getDescription()); 1150 newHitCounter++; 1151 } 1152 else 1153 { 1154 log.debug("Feature: " + f2 1155 .getMassToChargeRatio() + " sequence " + f2 1156 .getPeptideSequence() + ". Local sample id: " + localSampleId2); 1157 f2.setHits(f.getHits()); 1158 } 1159 1160 return newHitCounter; 1161 1162 } 1163 1206 1164 private PolynomialSplineFunction estimateSplineFunc(ArrayList<Point> points, PrintWriter writer) 1207 1165 { … … 1216 1174 double[] y = new double[points.size()]; 1217 1175 double[] weights = new double[points.size()]; 1218 1176 1219 1177 1220 1178 int index = 0; … … 1227 1185 y[index]=tempPoint.getY(); 1228 1186 weights[index]=tempPoint.getWeight(); 1229 1187 1230 1188 index++; 1231 1189 } 1232 1190 1233 1191 try 1234 1192 { … … 1245 1203 } 1246 1204 1247 1205 1248 1206 private void setMzDistribution(List<Feature> features, double[][] mzDist, double[] meanLength, int fileNbr){ 1249 1207 1250 1208 //TODO: get the minMz, maxMz, rtMax values automatically 1251 1209 1252 1210 int minMz=400; 1253 1211 int maxMz=2000; 1254 1212 1255 1213 int mult = 100; 1256 1214 1257 1215 mzDist[fileNbr] = new double[(mult)*(maxMz-minMz)]; 1258 1216 1259 1217 double sum = 0; 1260 1218 double nbrSeq = 0; 1261 1219 1262 1220 //Double min = Double.POSITIVE_INFINITY; 1263 1221 //Double max = Double.NEGATIVE_INFINITY; 1264 1222 1265 1223 //int minRT = Integer.MAX_VALUE; 1266 1224 //int maxRT = Integer.MIN_VALUE; 1267 1225 1268 1226 for(Feature f: features) 1269 1227 { 1270 1228 int mz = (int) (Math.round((f.getMassToChargeRatio()-minMz)*mult)); 1271 1272 1229 1230 1273 1231 mzDist[fileNbr][mz]++; 1274 1232 1275 1233 if(f.getPeptideSequence()!=null) 1276 1234 { 1277 1235 nbrSeq++; 1278 1236 sum+=f.getEndRetentionTimeInMinutes().doubleValue()-f.getStartRetentionTimeInMinutes().doubleValue(); 1279 1280 } 1281 1282 } 1283 1237 1238 } 1239 1240 } 1241 1284 1242 meanLength[fileNbr] = sum/nbrSeq; 1285 1286 } 1287 1243 1244 } 1245 1288 1246 private class SeqComparator implements Comparator<Feature>{ 1289 1247 … … 1295 1253 return f.getChargeState().compareTo(f2.getChargeState()); 1296 1254 }else{ 1297 1255 1298 1256 return f.getPeptideSequence().compareTo(f2.getPeptideSequence()); 1299 1257 } 1300 1301 } 1302 1303 } 1304 1258 1259 } 1260 1261 } 1262 1305 1263 private class FeatureSorter implements Comparator<Feature>{ 1306 1264 … … 1310 1268 { 1311 1269 if (f.getPeptideSequence().equals(f2.getPeptideSequence()) && f.getChargeState().equals(f2.getChargeState())){ 1312 1270 1313 1271 return f.getApexRetentionTimeInMinutes().compareTo(f2.getApexRetentionTimeInMinutes()); 1314 1272 1315 1273 }else if(f.getPeptideSequence().equals(f2.getPeptideSequence())){ 1316 1274 1317 1275 return f.getChargeState().compareTo(f2.getChargeState()); 1318 1276 1319 1277 }else{ 1320 1278 1321 1279 return f.getPeptideSequence().compareTo(f2.getPeptideSequence()); 1322 1323 } 1324 1325 1326 1280 1281 } 1282 1283 1284 1327 1285 1328 1286 } … … 1341 1299 1342 1300 } 1343 1301 1344 1302 private class MzDiffComparator implements Comparator<Point>{ 1345 1303 … … 1353 1311 1354 1312 } 1355 1313 1356 1314 private class ApexIntensityComparator implements Comparator<Feature>{ 1357 1315 … … 1365 1323 1366 1324 } 1367 1325 1368 1326 private class IntensityComparator implements Comparator<Feature>{ 1369 1327 … … 1377 1335 1378 1336 } 1379 1380 1337 1338 1381 1339 private class RTComparator implements Comparator<Point>{ 1382 1340 … … 1390 1348 1391 1349 } 1392 1350 1393 1351 private class RTComparatorF implements Comparator<Feature>{ 1394 1352 … … 1429 1387 1430 1388 } 1431 1432 1389 1390 1433 1391 public Feature getXFeature() 1434 1392 { 1435 1393 return this.x; 1436 1394 } 1437 1395 1438 1396 public Feature getYFeature() 1439 1397 { 1440 1398 return this.y; 1441 1399 } 1442 1400 1443 1401 public double getXLength() 1444 1402 { … … 1460 1418 return this.y.getApexRetentionTimeInMinutes().doubleValue(); 1461 1419 } 1462 1420 1463 1421 public double getMzX() 1464 1422 { … … 1486 1444 return Math.abs(this.getX()-this.getY()); 1487 1445 } 1488 1446 1489 1447 public double getMzDiff() 1490 1448 { … … 1525 1483 private int file2 = 0; 1526 1484 private ArrayList<Point> evalPoint = new ArrayList<Point>(); 1527 1485 1528 1486 public FilePoint() 1529 1487 {} 1530 1488 1531 1489 public FilePoint(int fileNbr, int fileNbr2, double simValue, PolynomialSplineFunction func, ArrayList<Point> evalPoint) 1532 1490 { … … 1558 1516 return this.func; 1559 1517 } 1560 1518 1561 1519 public ArrayList<Point> getEvalList() { 1562 1520 1563 1521 return this.evalPoint; 1564 1522 }
Note: See TracChangeset
for help on using the changeset viewer.