Changeset 2497
- Timestamp:
- Jun 5, 2014, 3:41:07 PM (9 years ago)
- Location:
- extensions/net.sf.basedb.reggie/trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/reggie-ogs-hosts.xml
r2464 r2497 59 59 </demux> 60 60 61 <!-- Settings for the Trimmomatic step --> 62 <trimmomatic> 63 <!-- full path to the JAR file with the Trimmomatic program --> 64 <jar-path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</jar-path> 65 <!-- full path to the file with Illumina adapter information --> 66 <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file> 67 <!-- static options for Trimmomatic --> 68 <options>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true LEADING:3 TRAILING:3 MAXINFO:40:0.9 MINLEN:20</options> 69 </trimmomatic> 70 61 71 <pe-filter> 62 72 <!-- parallel environment option to the queue system --> -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Annotationtype.java
r2454 r2497 867 867 868 868 /** 869 The "PT_READS" annotation, used for derived bioassays (MergedSequences). 870 The annotation is the number of sequences that remains for a library after 871 running trimmomatic. 872 @since 2.16 873 */ 874 public static final Annotationtype PT_READS = 875 new Annotationtype("PT_READS", Type.LONG, false, Item.DERIVEDBIOASSAY); 876 877 /** 869 878 The "PF_NNNN_PCT" annotation, used for derived bioassays (DemuxedSequences). 870 879 The annotation is the percentage of sequences that has could not be assigned -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/DemuxMergeServlet.java
r2464 r2497 13 13 import java.util.Set; 14 14 import java.util.TreeMap; 15 import java.util.regex.Matcher; 15 16 import java.util.regex.Pattern; 16 17 … … 285 286 mg.loadAnnotations(dc, "READS", Annotationtype.READS, null); 286 287 mg.loadAnnotations(dc, "PF_READS", Annotationtype.PF_READS, null); 288 mg.loadAnnotations(dc, "PT_READS", Annotationtype.PT_READS, null); 287 289 jsonMerged.add(mg.asJSONObject()); 288 290 } … … 376 378 String debug_tileLimit = cluster.getConfig("demux/debug-tile-limit", "16"); 377 379 380 String trim_jarPath = cluster.getRequiredConfig("trimmomatic/jar-path"); 381 String trim_adapterFile = cluster.getRequiredConfig("trimmomatic/adapter-file"); 382 String trim_options = cluster.getRequiredConfig("trimmomatic/options"); 383 378 384 String projectRoot = cluster.getRequiredConfig("project-archive"); 379 385 String projectFolder = projectName+(debug ? ".debug" : ""); … … 430 436 script.cmd("export PicardDir="+demux_picardDir); 431 437 script.cmd("export PicardMemory="+demux_picardMemory); 438 script.cmd("export TrimmomaticJAR="+trim_jarPath); 439 script.cmd("export AdapterFile="+trim_adapterFile); 432 440 script.newLine(); 433 441 script.cmd("mkdir -p " + tmpFolder); 434 442 script.cmd("cd " + tmpFolder); 435 443 script.cmd("mkdir fastq"); 444 script.cmd("mkdir fastq.merged"); 445 script.cmd("mkdir fastq.trimmomatic"); 436 446 script.cmd("cp ${ScriptDir}/picard ."); 447 script.cmd("cp ${ScriptDir}/trimmomatic ."); 437 448 script.newLine(); 438 449 } … … 555 566 List<Tag> allTags = barcodeQuery.list(dc); 556 567 String totalMetricsFile = jobFolder + "/demultiplex_metrics.txt"; 568 String trimmomaticOut = jobFolder + "/trimmomatic.out"; 557 569 558 570 int currentLane = 0; … … 612 624 613 625 script.comment(flowCellBarcode + "; lane " + lane); 614 // Progress between 5- 75%615 int percent = 5 + ( currentLane * 70/ totalLanes);626 // Progress between 5-50% 627 int percent = 5 + ((currentLane * 45) / totalLanes); 616 628 currentLane++; 617 629 script.progress(percent, "Extracting barcodes: " + flowCellBarcode + "; lane "+ lane); … … 632 644 if (debug) 633 645 { 634 //extractCmd += " TILE_LIMIT="+debug_tileLimit;646 extractCmd += " TILE_LIMIT="+debug_tileLimit; 635 647 } 636 648 extractCmd += " >> " + demuxName + "_" + lane + ".out"; … … 654 666 fastqCmd += " MACHINE_NAME="+sequencerName; 655 667 fastqCmd += " NUM_PROCESSORS=0"; 656 fastqCmd += " COMPRESS_OUTPUTS=true";668 //fastqCmd += " COMPRESS_OUTPUTS=true"; 657 669 if (demux_fastqOptions != null) 658 670 { … … 681 693 String fastqFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged); 682 694 script.comment("Consolidating FASTQ: " + merged.getName()); 683 // Progress between 75-95%684 int percent = 75 + (currentToMove * 20/ totalToMove);695 // Progress between 50-95% 696 int percent = 50 + ((currentToMove * 45) / totalToMove); 685 697 currentToMove++; 698 699 String R1_name = merged.getName()+"_R1.fastq.gz"; 700 String R2_name = merged.getName()+"_R2.fastq.gz"; 701 686 702 script.progress(percent, "Consolidating FASTQ: " + merged.getName()); 703 script.bkgr("cat fastq/" + merged.getName() + "_*.1.fastq | gzip > fastq.merged/"+R1_name); 704 script.cmd("cat fastq/" + merged.getName() + "_*.2.fastq | gzip > fastq.merged/"+R2_name); 705 script.newLine(); 706 707 script.progress(percent, "Trimmomatic: " + merged.getName()); 708 script.cmd("echo [" + merged.getName() + "] >> " + trimmomaticOut); 709 String trimCmd = "./trimmomatic PE"; 710 //trimCmd += " -trimlog fastq.trimmomatic/" + merged.getName()+".log"; 711 trimCmd += " fastq.merged/"+R1_name; 712 trimCmd += " fastq.merged/"+R2_name; 713 trimCmd += " fastq.trimmomatic/"+R1_name; 714 trimCmd += " fastq.trimmomatic/un_"+R1_name; 715 trimCmd += " fastq.trimmomatic/"+R2_name; 716 trimCmd += " fastq.trimmomatic/un_"+R2_name; 717 718 if (trim_options != null) 719 { 720 trimCmd += " " + trim_options; 721 } 722 trimCmd += " >> "+trimmomaticOut+" 2>&1"; 723 script.cmd(trimCmd); 724 script.newLine(); 725 726 script.progress(percent, "Archiving FASTQ: " + merged.getName()); 687 727 script.cmd("mkdir -p " + projectRoot + "/" + fastqFolder); 688 script. cmd("cat fastq/" + merged.getName() + "_*.1.fastq.gz > " + projectRoot + "/" + fastqFolder + "/"+merged.getName()+"_R1.fastq.gz");689 script. cmd("cat fastq/" + merged.getName() + "_*.2.fastq.gz > " + projectRoot + "/" + fastqFolder + "/"+merged.getName()+"_R2.fastq.gz");728 script.bkgr("cat fastq.trimmomatic/" + R1_name + " > " + projectRoot + "/" + fastqFolder + "/"+R1_name); 729 script.bkgr("cat fastq.trimmomatic/" + R2_name + " > " + projectRoot + "/" + fastqFolder + "/"+R2_name); 690 730 script.newLine(); 691 731 } … … 977 1017 { 978 1018 CmdResult metrics = cluster.executeCmd(ssh, "cat " + cluster.getJobFolder() + "/" + jobStatus.getJobName() + "/demultiplex_metrics.txt", 2); 979 if (metrics.getExitStatus() == 0) 980 { 981 Reads total = parseDemultiplexMetrics(sc, metrics.getStdout()); 982 String msg = Values.formatNumber(total.reads/1000000f, 1) + "M reads; " + Values.formatNumber(total.passedFilter/1000000f, 1) + "M passed filter"; 1019 CmdResult trimmomatic = cluster.executeCmd(ssh, "cat " + cluster.getJobFolder() + "/" + jobStatus.getJobName() + "/trimmomatic.out", 2); 1020 if (metrics.getExitStatus() != 0) 1021 { 1022 job.setDescription(metrics.getStderr()); 1023 } 1024 else if (trimmomatic.getExitStatus() != 0) 1025 { 1026 job.setDescription(trimmomatic.getStderr()); 1027 } 1028 else 1029 { 1030 Reads total = parseDemultiplexMetrics(sc, metrics.getStdout(), trimmomatic.getStdout()); 1031 String msg = Values.formatNumber(total.reads/1000000f, 1) + "M reads; "; 1032 msg += Values.formatNumber(total.passedFilter/1000000f, 1) + "M passed filter; "; 1033 msg += Values.formatNumber(total.passedTrimmomatic/1000000f, 1) + "M passed trimmomatic; "; 983 1034 if (total.warnings.size() > 0) 984 1035 { … … 987 1038 return msg; 988 1039 } 989 else990 {991 job.setDescription(metrics.getStderr());992 }993 1040 return null; 994 1041 } 995 1042 996 private Reads parseDemultiplexMetrics(SessionControl sc, String metrics) 997 { 1043 private Reads parseDemultiplexMetrics(SessionControl sc, String metrics, String trimmomatic) 1044 { 1045 Map<String, Reads> sumReads = new HashMap<String, Reads>(); 1046 1047 // Parse the demultiplex_metrics file 998 1048 FlatFileParser ffp = new FlatFileParser(); 999 1049 ffp.setIgnoreRegexp(Pattern.compile("#.*")); … … 1005 1055 ffp.setMinDataColumns(10); 1006 1056 1007 Map<String, Reads> sumReads = new HashMap<String, Reads>();1008 1057 Set<String> demuxNames = new HashSet<String>(); 1009 1058 try … … 1094 1143 throw new RuntimeException(ex); 1095 1144 } 1096 1145 1146 // Parse the trimmomatic.out file 1147 Pattern libPattern = Pattern.compile("\\[(.*)\\]"); 1148 Pattern dataPattern = Pattern.compile(".*Both Surviving:\\s+(\\d+).*"); 1149 Reads currentLib = null; 1150 int lineNo = 0; 1151 for (String line : trimmomatic.split("\n")) 1152 { 1153 lineNo++; 1154 Matcher m = libPattern.matcher(line); 1155 if (m.matches()) 1156 { 1157 String libName = m.group(1); 1158 currentLib = sumReads.get(libName); 1159 if (currentLib == null) 1160 { 1161 logger.error("At line " + lineNo + ": Found trimmomatic section for lib '" + libName + "' but not demultiplex metrics"); 1162 } 1163 continue; 1164 } 1165 m = dataPattern.matcher(line); 1166 if (m.matches()) 1167 { 1168 if (currentLib == null) 1169 { 1170 logger.error("At line " + lineNo + ": Found trimmomatic data but has not found a library name"); 1171 } 1172 else 1173 { 1174 currentLib.passedTrimmomatic = Values.getLong(m.group(1), -1); 1175 if (logger.isDebugEnabled()) 1176 { 1177 logger.debug("Trimmomatic: " + currentLib.libName + "; " + currentLib.passedTrimmomatic); 1178 } 1179 currentLib = null; 1180 } 1181 } 1182 } 1183 1097 1184 DbControl dc = null; 1098 1185 Reads total = new Reads(null); … … 1144 1231 if (logger.isDebugEnabled()) 1145 1232 { 1146 logger.debug(r.libName + "; " + r.reads + "; " + r.passedFilter );1233 logger.debug(r.libName + "; " + r.reads + "; " + r.passedFilter + "; " + r.passedTrimmomatic); 1147 1234 } 1148 1235 … … 1150 1237 if (merged != null) 1151 1238 { 1152 Annotationtype.READS.setAnnotationValue(dc, merged.getItem(), r.reads); 1153 Annotationtype.PF_READS.setAnnotationValue(dc, merged.getItem(), r.passedFilter); 1239 DerivedBioAssay m = merged.getItem(); 1240 Annotationtype.READS.setAnnotationValue(dc, m, r.reads); 1241 Annotationtype.PF_READS.setAnnotationValue(dc, m, r.passedFilter); 1242 Annotationtype.PT_READS.setAnnotationValue(dc, m, r.passedTrimmomatic); 1154 1243 total.reads += r.reads; 1155 1244 total.passedFilter += r.passedFilter; 1245 total.passedTrimmomatic += r.passedTrimmomatic; 1156 1246 } 1157 1247 } … … 1174 1264 long reads = 0; 1175 1265 long passedFilter = 0; 1266 long passedTrimmomatic = 0; 1176 1267 1177 1268 Reads(String libName) -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/InstallServlet.java
r2454 r2497 388 388 jsonChecks.add(checkAnnotationType(dc, Annotationtype.READS, 1, null, effectivePermissionsUse, createIfMissing)); 389 389 jsonChecks.add(checkAnnotationType(dc, Annotationtype.PF_READS, 1, null, effectivePermissionsUse, createIfMissing)); 390 jsonChecks.add(checkAnnotationType(dc, Annotationtype.PT_READS, 1, null, effectivePermissionsUse, createIfMissing)); 390 391 jsonChecks.add(checkAnnotationType(dc, Annotationtype.PF_NNNN_PCT, 1, null, effectivePermissionsUse, createIfMissing)); 391 392 jsonChecks.add(checkAnnotationType(dc, Annotationtype.PF_UNUSED_PCT, 1, null, effectivePermissionsUse, createIfMissing)); … … 505 506 506 507 jsonChecks.add(checkAnnotationTypeCategory(dc, Subtype.MERGED_SEQUENCES, createIfMissing, 507 Annotationtype.ANALYSIS_RESULT, Annotationtype.READS, Annotationtype.PF_READS, 508 Annotationtype.ANALYSIS_RESULT, 509 Annotationtype.READS, Annotationtype.PF_READS, Annotationtype.PT_READS, 508 510 Annotationtype.DATA_FILES_FOLDER, 509 511 Annotationtype.AUTO_PROCESSING
Note: See TracChangeset
for help on using the changeset viewer.