Changeset 5843


Ignore:
Timestamp:
Feb 25, 2020, 2:21:49 PM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Importing some numbers from the metrics files to annotations. Existing annotations:

  • ALIGNED_PAIRS
  • READ_PAIRS_EXAMINED
  • READ_PAIR_DUPLICATES
  • FRACTION_DUPLICATION

New annotation:

  • ALIGNED_BASES
Location:
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/Reggie.java

    r5822 r5843  
    300300  }
    301301   
    302   public static String formatCount(long count)
    303   {
    304     if (count > 1000000)
    305     {
    306       int numDecimals = count < 100000000 ? 1 : 0;
    307       return Values.formatNumber(count / 1000000f, numDecimals, "M");
    308     }
    309     else if (count > 1000)
    310     {
    311       int numDecimals = count < 100000 ? 1 : 0;
    312       return Values.formatNumber(count / 1000f, numDecimals, "k");
    313     }
    314     return Long.toString(count);
     302  public static String formatCount(Number count)
     303  {
     304    if (count == null) return "";
     305    long c = count.longValue();
     306    if (c > 1000000)
     307    {
     308      int numDecimals = c < 100000000 ? 1 : 0;
     309      return Values.formatNumber(c / 1000000f, numDecimals, "M");
     310    }
     311    else if (c > 1000)
     312    {
     313      int numDecimals = c < 100000 ? 1 : 0;
     314      return Values.formatNumber(c / 1000f, numDecimals, "k");
     315    }
     316    return Long.toString(c);
    315317  }
    316318
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Annotationtype.java

    r5799 r5843  
    15521552  public static final Annotationtype ALIGNED_PAIRS =
    15531553    new Annotationtype("ALIGNED_PAIRS", Type.LONG, false, Item.DERIVEDBIOASSAY);
     1554
     1555  /**
     1556    The "ALIGNED_BASES" annotation, used for derived bioassays (AlignedSequences).
     1557    The annotation is the number of bases that has been aligned.
     1558    @since 4.26
     1559  */
     1560  public static final Annotationtype ALIGNED_BASES =
     1561    new Annotationtype("ALIGNED_BASES", Type.LONG, false, Item.DERIVEDBIOASSAY);
     1562
    15541563 
    15551564  /**
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java

    r5841 r5843  
    22
    33import java.util.ArrayList;
     4import java.util.Arrays;
    45import java.util.List;
    56import java.util.Set;
     
    409410      script.cmd("rm -rf ${AlignFolder}/*");
    410411      script.cmd("cp out/* ${AlignFolder}");
    411       // TODO - copy some files to $WD so that we can update some annotations like ALIGNED_PAIRS, etc.
     412      script.cmd("cp out/concordant.*_metrics.txt ${WD}");
    412413      if (externalGroup != null)
    413414      {
     
    446447      String jobName = status.getName();
    447448      String files = session.getJobFileAsString(jobName, "files.out", "UTF-8");
    448 
    449       Metrics metrics = parseAlignedOut(sc, job, files);
    450       return null;
     449      String alignmentMetrics = session.getJobFileAsString(jobName, "concordant.alignment_summary_metrics.txt", "UTF-8");
     450      String duplicateMetrics = session.getJobFileAsString(jobName, "concordant.dedup_metrics.txt", "UTF-8");
     451
     452      Metrics metrics = parseAlignedOut(sc, job, files, alignmentMetrics, duplicateMetrics);
     453      String msg = Reggie.formatCount(metrics.alignedReadPairs) + " reads after alignment; ";
     454      msg += Values.formatNumber(metrics.fractionDuplication * 100,  1) + "% duplicates; ";
     455      msg += Reggie.formatCount(metrics.alignedBases) + " aligned bases";
     456      return msg;
    451457    }
    452458   
    453     private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut)
     459    private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut, String alignmentMetrics, String duplicateMetrics)
    454460    {
    455461      Metrics metrics = new Metrics();
     462
     463      int categoryIndex = -1;
     464      int alignedPairsIndex = -1;
     465      int alignedBasesIndex = -1;
     466      for (String line : alignmentMetrics.split("\n"))
     467      {
     468        String[] cols = line.split("\t");
     469        if (cols.length >= 20)
     470        {
     471          if (categoryIndex == -1)
     472          {
     473            List<String> colsA = Arrays.asList(cols);
     474            categoryIndex = colsA.indexOf("CATEGORY");
     475            alignedPairsIndex = colsA.indexOf("READS_ALIGNED_IN_PAIRS");
     476            alignedBasesIndex = colsA.indexOf("PF_ALIGNED_BASES");
     477          }
     478          else if ("PAIR".equals(cols[categoryIndex]))
     479          {
     480            Long tmp = Values.getLong(cols[alignedPairsIndex], null);
     481            if (tmp != null) metrics.alignedReadPairs = tmp / 2;
     482            metrics.alignedBases = Values.getLong(cols[alignedBasesIndex], null);
     483            break;
     484          }
     485        }
     486      }
     487
     488     
     489      int readPairsExaminedIndex = -1;
     490      int readPairDuplicatesIndex = -1;
     491      int percentDuplicationIndex = -1;
     492      for (String line : duplicateMetrics.split("\n"))
     493      {
     494        String[] cols = line.split("\t");
     495        if (cols.length >= 9)
     496        {
     497          if (readPairsExaminedIndex == -1)
     498          {
     499            List<String> colsA = Arrays.asList(cols);
     500            readPairsExaminedIndex = colsA.indexOf("READ_PAIRS_EXAMINED");
     501            readPairDuplicatesIndex = colsA.indexOf("READ_PAIR_DUPLICATES");
     502            percentDuplicationIndex = colsA.indexOf("PERCENT_DUPLICATION");
     503          }
     504          else
     505          {
     506            metrics.readPairsExamined = Values.getLong(cols[readPairsExaminedIndex], null);
     507            metrics.readPairDuplicates = Values.getLong(cols[readPairDuplicatesIndex], null);
     508            metrics.fractionDuplication = Values.getFloat(cols[percentDuplicationIndex], null);
     509            break;
     510          }
     511        }
     512      }
    456513     
    457514      DbControl dc = null;
     
    462519        AlignedSequences alignedSequences = AlignedSequences.getByJob(dc, job);
    463520        DerivedBioAssay aligned = alignedSequences.getItem();
     521
     522        Annotationtype.ALIGNED_PAIRS.setAnnotationValue(dc, aligned, metrics.alignedReadPairs);
     523        Annotationtype.ALIGNED_BASES.setAnnotationValue(dc, aligned, metrics.alignedBases);
     524        Annotationtype.READ_PAIRS_EXAMINED.setAnnotationValue(dc, aligned, metrics.readPairsExamined);
     525        Annotationtype.READ_PAIR_DUPLICATES.setAnnotationValue(dc, aligned, metrics.readPairDuplicates);
     526        Annotationtype.FRACTION_DUPLICATION.setAnnotationValue(dc, aligned, metrics.fractionDuplication);
    464527
    465528        // Create file links
     
    497560          if (f.getName().equals("concordant.bam"))
    498561          {
    499             //f.setDescription(metrics.numReadsAfterAlign + " ALIGNED PAIRS");
     562            f.setDescription(metrics.alignedReadPairs + " ALIGNED PAIRS; " + metrics.alignedBases + " ALIGNED BASES");
    500563            f.setItemSubtype(bamType);
    501564            FileSetMember member = aligned.getFileSet().addMember(f, bamData);
     
    522585  static class Metrics
    523586  {
     587    Long alignedReadPairs = null;
     588    Long alignedBases = null;
     589    Long readPairsExamined = null;
     590    Long readPairDuplicates = null;
     591    Float fractionDuplication = null;
    524592  }
    525593
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/InstallServlet.java

    r5826 r5843  
    628628        jsonChecks.add(checkAnnotationType(dc, Annotationtype.PM_READS, 1, null, createIfMissing, effectivePermissionsUse));
    629629        jsonChecks.add(checkAnnotationType(dc, Annotationtype.ALIGNED_PAIRS, 1, null, createIfMissing, effectivePermissionsUse));
     630        jsonChecks.add(checkAnnotationType(dc, Annotationtype.ALIGNED_BASES, 1, null, createIfMissing, effectivePermissionsUse));
    630631        jsonChecks.add(checkAnnotationType(dc, Annotationtype.READ_PAIRS_EXAMINED, 1, null, createIfMissing, effectivePermissionsUse));
    631632        jsonChecks.add(checkAnnotationType(dc, Annotationtype.READ_PAIR_DUPLICATES, 1, null, createIfMissing, effectivePermissionsUse));
     
    965966        jsonChecks.add(checkAnnotationTypeCategory(dc, Subtype.ALIGNED_SEQUENCES, createIfMissing,
    966967            Annotationtype.DATA_FILES_FOLDER, Annotationtype.ALIGNED_PAIRS,
     968            Annotationtype.ALIGNED_BASES,
    967969            Annotationtype.READ_PAIRS_EXAMINED, Annotationtype.READ_PAIR_DUPLICATES,
    968970            Annotationtype.FRACTION_DUPLICATION, Annotationtype.FRAGMENT_SIZE_AVG,
Note: See TracChangeset for help on using the changeset viewer.