Changeset 5794


Ignore:
Timestamp:
Dec 16, 2019, 12:44:42 PM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1215: Include mutation signature analysis in the variant calling pipeline

Implemented support for parsing the output file with scores for the mutation signatures.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/VariantCallingJobCreator.java

    r5790 r5794  
    11package net.sf.basedb.reggie.grid;
    22
     3import java.io.BufferedReader;
    34import java.io.IOException;
    45import java.io.InputStream;
     6import java.io.InputStreamReader;
    57import java.util.ArrayList;
    68import java.util.List;
    79import java.util.Set;
     10import java.util.regex.Matcher;
     11import java.util.regex.Pattern;
    812import java.util.zip.GZIPInputStream;
    913
     
    573577        {
    574578          FileOwner alignedOwner = FileOwner.create(dc, aligned, analysisDir);
    575           pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz"));
     579          msg = pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz"));
    576580          Annotationtype.CALLABLE_BASES.setAnnotationValue(dc, aligned, pf.stat.numCallableBases);
    577581          Annotationtype.VARIANTS_RAW.setAnnotationValue(dc, aligned, pf.stat.numRawVariants);
    578582        }
     583        Annotationtype maxMutationSignature = null;
     584        Float maxScore = null;
    579585        if (!filterSkipped)
    580586        {
    581587          FileOwner vCallOwner = FileOwner.create(dc, raw, analysisDir);
    582           msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf"));
     588          msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf", "mutation_signature.txt", "mutation_signature.pdf"));
    583589          Annotationtype.VARIANTS_PASSED_FILTER.setAnnotationValue(dc, raw, pf.stat.numFiltered);
     590          for (int index = 0; index < Annotationtype.NUM_MUTATION_SIGNATURES; index++)
     591          {
     592            Float score = pf.mutationSignatureScore[index];
     593            if (score != null)
     594            {
     595              Annotationtype.mutationSignature(index+1).setAnnotationValue(dc, raw, score);
     596              if (maxScore == null || maxScore < score)
     597              {
     598                maxScore = score;
     599                maxMutationSignature = Annotationtype.mutationSignature(index+1);
     600              }
     601            }
     602          }
    584603        }
    585604
     
    588607          if (rawCallingSkipped)
    589608          {
    590             msg = "Used existing raw variants; annotated " + pf.stat.numAnnotated + " variants; " + pf.stat.numFiltered + " passed filter.";
     609            msg = "Used existing raw variants; annotated " + pf.stat.numAnnotated + " variants; " + pf.stat.numFiltered + " passed filter";
    591610          }
    592611          else if (filterSkipped)
    593612          {
    594613            msg = "Found " + pf.stat.numRawVariants + " variants; skipped filtering; ";
    595             msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases.";
     614            msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases";
    596615          }
    597616          else
    598617          {
    599             msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter. ";
    600             msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases.";
    601           }
     618            msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter ";
     619            msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases";
     620          }
     621          if (maxMutationSignature != null)
     622          {
     623            msg += "; " + maxMutationSignature.getName() + "=" + Values.formatNumber(maxScore, 2);
     624          }
     625          msg += ".";
    602626        }
    603627        dc.commit();
     
    690714    Stats stat;
    691715    ItemSubtype vcfType;
    692     DataFileType vcfData;
     716    DataFileType vcfData;   
     717    Float[] mutationSignatureScore = new Float[Annotationtype.NUM_MUTATION_SIGNATURES];
    693718   
    694719    String parseFiles(DbControl dc, FileOwner owner, String filesOut, Set<String> filenames)
     
    742767          {
    743768            link.setDescription("Created with " + software.getName());
     769          }
     770        }
     771        if (filename.equals("mutation_signature.txt"))
     772        {
     773          try
     774          {
     775            parseMutationSignatureScores(f);
     776          }
     777          catch (IOException | RuntimeException ex)
     778          {
     779            msg = "Could not parse " + filename + " (" + ex.getMessage() + ")";
     780            logger.warn("Could not parse file: " + f, ex);
    744781          }
    745782        }
     
    794831      return vcfData;
    795832    }
     833   
     834    /**
     835      Parse the mutation signature file. Lines should have pattern:
     836     
     837      Signature.<NN><tab><score>
     838     
     839      where <NN> is the signature index and <score> is a positive
     840      floating point number. Lines with '0' score are parsed as null.
     841    */
     842    private void parseMutationSignatureScores(File f)
     843      throws IOException
     844    {
     845      InputStream in = null;
     846      try
     847      {
     848        Pattern p = Pattern.compile("Signature\\.(\\d+)\\t([0-9.]+)");
     849        in = f.getDownloadStream(0);
     850        BufferedReader r = new BufferedReader(new InputStreamReader(in, "UTF-8"));
     851       
     852        String line = r.readLine();
     853        while (line != null)
     854        {
     855          Matcher m = p.matcher(line);
     856          if (m.matches())
     857          {
     858            int index = Values.getInt(m.group(1));
     859            float score = Values.getFloat(m.group(2));
     860            if (score > 0 && index > 0 && index < mutationSignatureScore.length)
     861            {
     862              mutationSignatureScore[index] = score;
     863            }
     864          }
     865          line = r.readLine();
     866        }
     867      }
     868      finally
     869      {
     870        FileUtil.close(in);
     871      }
     872     
     873    }
    796874  }
    797875 
Note: See TracChangeset for help on using the changeset viewer.