Changeset 5843
- Timestamp:
- Feb 25, 2020, 2:21:49 PM (3 years ago)
- Location:
- extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/Reggie.java
r5822 r5843 300 300 } 301 301 302 public static String formatCount(long count) 303 { 304 if (count > 1000000) 305 { 306 int numDecimals = count < 100000000 ? 1 : 0; 307 return Values.formatNumber(count / 1000000f, numDecimals, "M"); 308 } 309 else if (count > 1000) 310 { 311 int numDecimals = count < 100000 ? 1 : 0; 312 return Values.formatNumber(count / 1000f, numDecimals, "k"); 313 } 314 return Long.toString(count); 302 public static String formatCount(Number count) 303 { 304 if (count == null) return ""; 305 long c = count.longValue(); 306 if (c > 1000000) 307 { 308 int numDecimals = c < 100000000 ? 1 : 0; 309 return Values.formatNumber(c / 1000000f, numDecimals, "M"); 310 } 311 else if (c > 1000) 312 { 313 int numDecimals = c < 100000 ? 1 : 0; 314 return Values.formatNumber(c / 1000f, numDecimals, "k"); 315 } 316 return Long.toString(c); 315 317 } 316 318 -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Annotationtype.java
r5799 r5843 1552 1552 public static final Annotationtype ALIGNED_PAIRS = 1553 1553 new Annotationtype("ALIGNED_PAIRS", Type.LONG, false, Item.DERIVEDBIOASSAY); 1554 1555 /** 1556 The "ALIGNED_BASES" annotation, used for derived bioassays (AlignedSequences). 1557 The annotation is the number of bases that has been aligned. 1558 @since 4.26 1559 */ 1560 public static final Annotationtype ALIGNED_BASES = 1561 new Annotationtype("ALIGNED_BASES", Type.LONG, false, Item.DERIVEDBIOASSAY); 1562 1554 1563 1555 1564 /** -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java
r5841 r5843 2 2 3 3 import java.util.ArrayList; 4 import java.util.Arrays; 4 5 import java.util.List; 5 6 import java.util.Set; … … 409 410 script.cmd("rm -rf ${AlignFolder}/*"); 410 411 script.cmd("cp out/* ${AlignFolder}"); 411 // TODO - copy some files to $WD so that we can update some annotations like ALIGNED_PAIRS, etc.412 script.cmd("cp out/concordant.*_metrics.txt ${WD}"); 412 413 if (externalGroup != null) 413 414 { … … 446 447 String jobName = status.getName(); 447 448 String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); 448 449 Metrics metrics = parseAlignedOut(sc, job, files); 450 return null; 449 String alignmentMetrics = session.getJobFileAsString(jobName, "concordant.alignment_summary_metrics.txt", "UTF-8"); 450 String duplicateMetrics = session.getJobFileAsString(jobName, "concordant.dedup_metrics.txt", "UTF-8"); 451 452 Metrics metrics = parseAlignedOut(sc, job, files, alignmentMetrics, duplicateMetrics); 453 String msg = Reggie.formatCount(metrics.alignedReadPairs) + " reads after alignment; "; 454 msg += Values.formatNumber(metrics.fractionDuplication * 100, 1) + "% duplicates; "; 455 msg += Reggie.formatCount(metrics.alignedBases) + " aligned bases"; 456 return msg; 451 457 } 452 458 453 private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut )459 private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut, String alignmentMetrics, String duplicateMetrics) 454 460 { 455 461 Metrics metrics = new Metrics(); 462 463 int categoryIndex = -1; 464 int alignedPairsIndex = -1; 465 int alignedBasesIndex = -1; 466 for (String line : alignmentMetrics.split("\n")) 467 { 468 String[] cols = line.split("\t"); 469 if (cols.length >= 20) 470 { 471 if (categoryIndex == -1) 472 { 473 List<String> colsA = Arrays.asList(cols); 474 categoryIndex = colsA.indexOf("CATEGORY"); 475 alignedPairsIndex = colsA.indexOf("READS_ALIGNED_IN_PAIRS"); 476 alignedBasesIndex = colsA.indexOf("PF_ALIGNED_BASES"); 477 } 478 else if ("PAIR".equals(cols[categoryIndex])) 479 { 480 Long tmp = Values.getLong(cols[alignedPairsIndex], null); 481 if (tmp != null) metrics.alignedReadPairs = tmp / 2; 482 metrics.alignedBases = Values.getLong(cols[alignedBasesIndex], null); 483 break; 484 } 485 } 486 } 487 488 489 int readPairsExaminedIndex = -1; 490 int readPairDuplicatesIndex = -1; 491 int percentDuplicationIndex = -1; 492 for (String line : duplicateMetrics.split("\n")) 493 { 494 String[] cols = line.split("\t"); 495 if (cols.length >= 9) 496 { 497 if (readPairsExaminedIndex == -1) 498 { 499 List<String> colsA = Arrays.asList(cols); 500 readPairsExaminedIndex = colsA.indexOf("READ_PAIRS_EXAMINED"); 501 readPairDuplicatesIndex = colsA.indexOf("READ_PAIR_DUPLICATES"); 502 percentDuplicationIndex = colsA.indexOf("PERCENT_DUPLICATION"); 503 } 504 else 505 { 506 metrics.readPairsExamined = Values.getLong(cols[readPairsExaminedIndex], null); 507 metrics.readPairDuplicates = Values.getLong(cols[readPairDuplicatesIndex], null); 508 metrics.fractionDuplication = Values.getFloat(cols[percentDuplicationIndex], null); 509 break; 510 } 511 } 512 } 456 513 457 514 DbControl dc = null; … … 462 519 AlignedSequences alignedSequences = AlignedSequences.getByJob(dc, job); 463 520 DerivedBioAssay aligned = alignedSequences.getItem(); 521 522 Annotationtype.ALIGNED_PAIRS.setAnnotationValue(dc, aligned, metrics.alignedReadPairs); 523 Annotationtype.ALIGNED_BASES.setAnnotationValue(dc, aligned, metrics.alignedBases); 524 Annotationtype.READ_PAIRS_EXAMINED.setAnnotationValue(dc, aligned, metrics.readPairsExamined); 525 Annotationtype.READ_PAIR_DUPLICATES.setAnnotationValue(dc, aligned, metrics.readPairDuplicates); 526 Annotationtype.FRACTION_DUPLICATION.setAnnotationValue(dc, aligned, metrics.fractionDuplication); 464 527 465 528 // Create file links … … 497 560 if (f.getName().equals("concordant.bam")) 498 561 { 499 //f.setDescription(metrics.numReadsAfterAlign + " ALIGNED PAIRS");562 f.setDescription(metrics.alignedReadPairs + " ALIGNED PAIRS; " + metrics.alignedBases + " ALIGNED BASES"); 500 563 f.setItemSubtype(bamType); 501 564 FileSetMember member = aligned.getFileSet().addMember(f, bamData); … … 522 585 static class Metrics 523 586 { 587 Long alignedReadPairs = null; 588 Long alignedBases = null; 589 Long readPairsExamined = null; 590 Long readPairDuplicates = null; 591 Float fractionDuplication = null; 524 592 } 525 593 -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/InstallServlet.java
r5826 r5843 628 628 jsonChecks.add(checkAnnotationType(dc, Annotationtype.PM_READS, 1, null, createIfMissing, effectivePermissionsUse)); 629 629 jsonChecks.add(checkAnnotationType(dc, Annotationtype.ALIGNED_PAIRS, 1, null, createIfMissing, effectivePermissionsUse)); 630 jsonChecks.add(checkAnnotationType(dc, Annotationtype.ALIGNED_BASES, 1, null, createIfMissing, effectivePermissionsUse)); 630 631 jsonChecks.add(checkAnnotationType(dc, Annotationtype.READ_PAIRS_EXAMINED, 1, null, createIfMissing, effectivePermissionsUse)); 631 632 jsonChecks.add(checkAnnotationType(dc, Annotationtype.READ_PAIR_DUPLICATES, 1, null, createIfMissing, effectivePermissionsUse)); … … 965 966 jsonChecks.add(checkAnnotationTypeCategory(dc, Subtype.ALIGNED_SEQUENCES, createIfMissing, 966 967 Annotationtype.DATA_FILES_FOLDER, Annotationtype.ALIGNED_PAIRS, 968 Annotationtype.ALIGNED_BASES, 967 969 Annotationtype.READ_PAIRS_EXAMINED, Annotationtype.READ_PAIR_DUPLICATES, 968 970 Annotationtype.FRACTION_DUPLICATION, Annotationtype.FRAGMENT_SIZE_AVG,
Note: See TracChangeset
for help on using the changeset viewer.