Changeset 4446
- Timestamp:
- Mar 13, 2013, 3:45:40 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/plugin/src/org/proteios/plugins/FeatureSequencePropagator.java
r4442 r4446 36 36 import java.util.Comparator; 37 37 import java.util.Date; 38 import java.util.Iterator;39 38 import java.util.List; 40 39 import java.util.ListIterator; … … 47 46 import org.apache.commons.collections.ListUtils; 48 47 import org.apache.commons.collections.functors.NotNullPredicate; 49 import org.apache.commons.lang.ArrayUtils;50 48 import org.apache.commons.math.ArgumentOutsideDomainException; 51 49 import org.apache.commons.math.MathException; … … 545 543 546 544 if (totNbrMatches < MIN_NBR_MATCHING_SEQ) { 547 548 double mzForPointMatchByTol = 0.01; 549 double rtForPointMatchByTol = 5; 550 551 //TODO:fill up with tolerances matches, check for duplicates when finding tolerance matches instead of clearing the points list 552 points.clear(); 553 545 554 546 writer.println("There are too few common sequences for file " 555 547 + (fileNbr + 1) 556 548 + " and " 557 + (secFileNbr + 1) + "."); 558 writer.println("Alignment will be performed by using tolerances: " +mzForPointMatchByTol +" Da and " +rtForPointMatchByTol +" min."); 549 + (secFileNbr + 1) + ". Adding landmarks by tolerances."); 559 550 log.debug("There are too few common sequences for file " 560 551 + (fileNbr + 1) 561 552 + " and " 562 + (secFileNbr + 1) + "."); 563 log.debug("Alignment will be performed by using tolerances: " +mzForPointMatchByTol +" Da and " +rtForPointMatchByTol +" min."); 553 + (secFileNbr + 1) + ". Adding landmarks by tolerances."); 554 555 556 double mzForPointMatchByTol = 0.01; 557 double rtForPointMatchByTol = 10; 558 559 560 if (totNbrMatches >= 20){ 561 562 writer.println("Estimating tolerances from sequences."); 563 log.debug("Estimating tolerances from sequences."); 564 565 int quartileFactor = 10; 566 double[] initialMzRTVec = getInitialMzAndRT(points, quartileFactor, writer); 567 568 mzForPointMatchByTol = initialMzRTVec[0]; 569 rtForPointMatchByTol = initialMzRTVec[1]; 570 571 } 572 573 points.clear(); 574 575 576 writer.println("Feature pairs will be extracted by using tolerances: " +mzForPointMatchByTol +" Da and " +rtForPointMatchByTol +" min."); 577 578 log.debug("Feature pairs will be extracted by using tolerances: " +mzForPointMatchByTol +" Da and " +rtForPointMatchByTol +" min."); 564 579 565 580 simValue = getPointMatchByTol(points, … … 573 588 simTemp = simValue; 574 589 575 //TODO: integrate simValue for using both sequences and tolerances576 590 simValue = getPointMatchByTol(points, 577 591 uniqueMsFiles.get … … 596 610 if (totNbrMatches < MIN_NBR_MATCHING_SEQ) { 597 611 598 writer.println("There are still too few common sequences for file "612 writer.println("There are still too few common features for file " 599 613 + (fileNbr + 1) 600 614 + " and " 601 615 + (secFileNbr + 1) + "."); 602 616 writer.println("No alignment will be performed for this file-pair."); 603 log.debug("There are still too few common sequences for file "617 log.debug("There are still too few common features for file " 604 618 + (fileNbr + 1) 605 619 + " and " … … 614 628 // considered an outlier 615 629 int quartileFactor = 10; 616 int nbrOfOutliers = 0; 617 618 Collections.sort(points, new DiffComparator()); 619 620 double lowerQuartile = points.get( 621 (int) Math.round(0.25 * points.size())) 622 .getDiff(); 623 double upperQuartile = points.get( 624 (int) Math.round(0.75 * points.size())) 625 .getDiff(); 626 double medianQuartile = points.get( 627 (int) Math.round(0.5 * points.size())) 628 .getDiff(); 629 double interQuartileRange = upperQuartile 630 - lowerQuartile; 631 double upperFence = upperQuartile + quartileFactor 632 * interQuartileRange; 633 634 writer.println("Median RT difference before alignment: " 635 + medianQuartile); 636 writer.println("Largest RT difference before alignment: " 637 + points.get(points.size() - 1).getDiff()); 638 639 int maxCutOff = points.size() - 1; 640 while (points.get(maxCutOff).getDiff() > upperFence) { 641 points.remove(maxCutOff); 642 maxCutOff--; 643 nbrOfOutliers++; 644 } 645 646 writer.println("Removing " 647 + nbrOfOutliers 648 + " outliers. Total number of sequence matches is now: " 649 + points.size()); 650 651 medianQuartile = points.get( 652 (int) Math.round(0.5 * points.size())) 653 .getDiff(); 654 655 writer.println("Median RT difference is now: " 656 + medianQuartile); 657 writer.println("Largest RT difference is now: " 658 + points.get(points.size() - 1).getDiff()); 659 660 Collections.sort(points, new MzDiffComparator()); 661 double medianQuartileMz = points.get( 662 (int) Math.round(0.5 * points.size())) 663 .getMzDiff(); 664 writer.println("Median mz difference before alignment: " 665 + medianQuartileMz); 666 writer.println("Largest mz difference before alignment: " 667 + points.get(points.size() - 1).getMzDiff()); 668 669 // the mz tolrance is set as the largest (rounded) mz 670 double mzTol = points.get(points.size() - 1) 671 .getMzDiff(); 672 mzTol = Math.round(100000 * mzTol + 1) 673 / (double) 100000; 674 675 log.debug("Mz tol is: " + mzTol); 676 677 Collections.sort(points, new RTComparator()); 678 630 631 double[] initialMzRTVec = getInitialMzAndRT(points, quartileFactor, writer); 632 double mzTol = initialMzRTVec[0]; 633 634 679 635 if (simValue == 0) { 680 636 simValue = points.size() * 2.0 … … 1471 1427 } 1472 1428 1429 } 1430 1431 private double[] getInitialMzAndRT(ArrayList<Point> points, int quartileFactor, PrintWriter writer){ 1432 1433 double[] initialMzRTVec = new double[2]; 1434 1435 int nbrOfOutliers = 0; 1436 1437 Collections.sort(points, new DiffComparator()); 1438 1439 double lowerQuartile = points.get( 1440 (int) Math.round(0.25 * points.size())) 1441 .getDiff(); 1442 double upperQuartile = points.get( 1443 (int) Math.round(0.75 * points.size())) 1444 .getDiff(); 1445 double medianQuartile = points.get( 1446 (int) Math.round(0.5 * points.size())) 1447 .getDiff(); 1448 double interQuartileRange = upperQuartile 1449 - lowerQuartile; 1450 double upperFence = upperQuartile + quartileFactor 1451 * interQuartileRange; 1452 1453 writer.println("Median RT difference before alignment: " 1454 + medianQuartile); 1455 writer.println("Largest RT difference before alignment: " 1456 + points.get(points.size() - 1).getDiff()); 1457 1458 int maxCutOff = points.size() - 1; 1459 while (points.get(maxCutOff).getDiff() > upperFence) { 1460 points.remove(maxCutOff); 1461 maxCutOff--; 1462 nbrOfOutliers++; 1463 } 1464 1465 writer.println("Removing " 1466 + nbrOfOutliers 1467 + " outliers. Total number of sequence matches is now: " 1468 + points.size()); 1469 1470 medianQuartile = points.get( 1471 (int) Math.round(0.5 * points.size())) 1472 .getDiff(); 1473 1474 writer.println("Median RT difference is now: " 1475 + medianQuartile); 1476 1477 double largestRTDiff = points.get(points.size() - 1).getDiff(); 1478 1479 1480 writer.println("Largest RT difference is now: " 1481 +largestRTDiff ); 1482 1483 largestRTDiff = Math.round(100000 * largestRTDiff + 1) 1484 / (double) 100000; 1485 1486 Collections.sort(points, new MzDiffComparator()); 1487 double medianQuartileMz = points.get( 1488 (int) Math.round(0.5 * points.size())) 1489 .getMzDiff(); 1490 writer.println("Median mz difference before alignment: " 1491 + medianQuartileMz); 1492 writer.println("Largest mz difference before alignment: " 1493 + points.get(points.size() - 1).getMzDiff()); 1494 1495 // the mz tolrance is set as the largest (rounded) mz 1496 double mzTol = points.get(points.size() - 1) 1497 .getMzDiff(); 1498 mzTol = Math.round(100000 * mzTol + 1) 1499 / (double) 100000; 1500 1501 log.debug("Mz tol is: " + mzTol); 1502 1503 initialMzRTVec[0]=mzTol; 1504 initialMzRTVec[1]=largestRTDiff; 1505 1506 Collections.sort(points, new RTComparator()); 1507 1508 return initialMzRTVec; 1473 1509 } 1474 1510
Note: See TracChangeset
for help on using the changeset viewer.