Changeset 5794
- Timestamp:
- Dec 16, 2019, 12:44:42 PM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/VariantCallingJobCreator.java
r5790 r5794 1 1 package net.sf.basedb.reggie.grid; 2 2 3 import java.io.BufferedReader; 3 4 import java.io.IOException; 4 5 import java.io.InputStream; 6 import java.io.InputStreamReader; 5 7 import java.util.ArrayList; 6 8 import java.util.List; 7 9 import java.util.Set; 10 import java.util.regex.Matcher; 11 import java.util.regex.Pattern; 8 12 import java.util.zip.GZIPInputStream; 9 13 … … 573 577 { 574 578 FileOwner alignedOwner = FileOwner.create(dc, aligned, analysisDir); 575 pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz"));579 msg = pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz")); 576 580 Annotationtype.CALLABLE_BASES.setAnnotationValue(dc, aligned, pf.stat.numCallableBases); 577 581 Annotationtype.VARIANTS_RAW.setAnnotationValue(dc, aligned, pf.stat.numRawVariants); 578 582 } 583 Annotationtype maxMutationSignature = null; 584 Float maxScore = null; 579 585 if (!filterSkipped) 580 586 { 581 587 FileOwner vCallOwner = FileOwner.create(dc, raw, analysisDir); 582 msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf" ));588 msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf", "mutation_signature.txt", "mutation_signature.pdf")); 583 589 Annotationtype.VARIANTS_PASSED_FILTER.setAnnotationValue(dc, raw, pf.stat.numFiltered); 590 for (int index = 0; index < Annotationtype.NUM_MUTATION_SIGNATURES; index++) 591 { 592 Float score = pf.mutationSignatureScore[index]; 593 if (score != null) 594 { 595 Annotationtype.mutationSignature(index+1).setAnnotationValue(dc, raw, score); 596 if (maxScore == null || maxScore < score) 597 { 598 maxScore = score; 599 maxMutationSignature = Annotationtype.mutationSignature(index+1); 600 } 601 } 602 } 584 603 } 585 604 … … 588 607 if (rawCallingSkipped) 589 608 { 590 msg = "Used existing raw variants; annotated " + pf.stat.numAnnotated + " variants; " + pf.stat.numFiltered + " passed filter .";609 msg = "Used existing raw variants; annotated " + pf.stat.numAnnotated + " variants; " + pf.stat.numFiltered + " passed filter"; 591 610 } 592 611 else if (filterSkipped) 593 612 { 594 613 msg = "Found " + pf.stat.numRawVariants + " variants; skipped filtering; "; 595 msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases .";614 msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases"; 596 615 } 597 616 else 598 617 { 599 msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter. "; 600 msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases."; 601 } 618 msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter "; 619 msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases"; 620 } 621 if (maxMutationSignature != null) 622 { 623 msg += "; " + maxMutationSignature.getName() + "=" + Values.formatNumber(maxScore, 2); 624 } 625 msg += "."; 602 626 } 603 627 dc.commit(); … … 690 714 Stats stat; 691 715 ItemSubtype vcfType; 692 DataFileType vcfData; 716 DataFileType vcfData; 717 Float[] mutationSignatureScore = new Float[Annotationtype.NUM_MUTATION_SIGNATURES]; 693 718 694 719 String parseFiles(DbControl dc, FileOwner owner, String filesOut, Set<String> filenames) … … 742 767 { 743 768 link.setDescription("Created with " + software.getName()); 769 } 770 } 771 if (filename.equals("mutation_signature.txt")) 772 { 773 try 774 { 775 parseMutationSignatureScores(f); 776 } 777 catch (IOException | RuntimeException ex) 778 { 779 msg = "Could not parse " + filename + " (" + ex.getMessage() + ")"; 780 logger.warn("Could not parse file: " + f, ex); 744 781 } 745 782 } … … 794 831 return vcfData; 795 832 } 833 834 /** 835 Parse the mutation signature file. Lines should have pattern: 836 837 Signature.<NN><tab><score> 838 839 where <NN> is the signature index and <score> is a positive 840 floating point number. Lines with '0' score are parsed as null. 841 */ 842 private void parseMutationSignatureScores(File f) 843 throws IOException 844 { 845 InputStream in = null; 846 try 847 { 848 Pattern p = Pattern.compile("Signature\\.(\\d+)\\t([0-9.]+)"); 849 in = f.getDownloadStream(0); 850 BufferedReader r = new BufferedReader(new InputStreamReader(in, "UTF-8")); 851 852 String line = r.readLine(); 853 while (line != null) 854 { 855 Matcher m = p.matcher(line); 856 if (m.matches()) 857 { 858 int index = Values.getInt(m.group(1)); 859 float score = Values.getFloat(m.group(2)); 860 if (score > 0 && index > 0 && index < mutationSignatureScore.length) 861 { 862 mutationSignatureScore[index] = score; 863 } 864 } 865 line = r.readLine(); 866 } 867 } 868 finally 869 { 870 FileUtil.close(in); 871 } 872 873 } 796 874 } 797 875
Note: See TracChangeset
for help on using the changeset viewer.