Changeset 5830
- Timestamp:
- Feb 21, 2020, 11:35:42 AM (3 years ago)
- Location:
- extensions/net.sf.basedb.reggie/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml
r5829 r5830 202 202 <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path> 203 203 </fgbio> 204 <novoalign> 205 <!-- full path to the novoalign file (required) --> 206 <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path> 207 </novoalign> 208 <novosort> 209 <!-- full path to the novosort file (required) --> 210 <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path> 211 </novosort> 204 212 </programs> 205 213 … … 339 347 </trimmomatic> 340 348 349 <amplicons> 350 <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir> 351 <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed> 352 <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed> 353 <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed> 354 <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed> 355 <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed> 356 </amplicons> 357 358 <novoalign> 359 <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index> 360 <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options> 361 </novoalign> 341 362 342 363 </align-mips> -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java
r5829 r5830 12 12 import net.sf.basedb.core.DerivedBioAssay; 13 13 import net.sf.basedb.core.File; 14 import net.sf.basedb.core.InvalidDataException; 14 15 import net.sf.basedb.core.ItemList; 15 16 import net.sf.basedb.core.ItemNotFoundException; … … 34 35 import net.sf.basedb.reggie.dao.BiomaterialList; 35 36 import net.sf.basedb.reggie.dao.Datafiletype; 37 import net.sf.basedb.reggie.dao.Dna; 36 38 import net.sf.basedb.reggie.dao.DoNotUse; 37 39 import net.sf.basedb.reggie.dao.Library; … … 144 146 String picard_path = cfg.getRequiredConfig("programs/picard/path", alignParameterSet); 145 147 String fgbio_path = cfg.getRequiredConfig("programs/fgbio/path", alignParameterSet); 148 String novoalign_path = cfg.getRequiredConfig("programs/novoalign/path", alignParameterSet); 149 String novosort_path = cfg.getRequiredConfig("programs/novosort/path", alignParameterSet); 146 150 147 151 // Options for the programs … … 149 153 String align_trimmomaticOptions1 = cfg.getRequiredConfig("align-mips/trimmomatic/step-1", alignParameterSet); 150 154 String align_trimmomaticOptions2 = cfg.getRequiredConfig("align-mips/trimmomatic/step-2", alignParameterSet); 155 String ampliconsRoot = cfg.getRequiredConfig("align-mips/amplicons/main-dir", alignParameterSet); 156 String novoalign_index = cfg.getRequiredConfig("align-mips/novoalign/index", alignParameterSet); 157 String novoalign_options = cfg.getRequiredConfig("align-mips/novoalign/options", alignParameterSet); 151 158 152 159 // Load common items … … 180 187 Sample specimen = (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN); 181 188 String fastQFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged); 189 190 Dna dna = lib.getDna(dc, true); 191 String mipsPanel = (String)Annotationtype.MIPS_PANEL.getAnnotationValue(dc, dna.getItem()); 192 if (mipsPanel == null) 193 { 194 throw new InvalidDataException("No value for '"+Annotationtype.MIPS_PANEL.getName() + "' on DNA: " + dna.getName()); 195 } 196 197 String amplicons_bed = cfg.getRequiredConfig("align-mips/amplicons/bed[@panel='"+mipsPanel+"']", alignParameterSet); 182 198 183 199 // Get all FASTQ files … … 260 276 script.cmd("TrimmomaticJAR="+trimmomatic_path); 261 277 script.cmd("FgBioJAR="+fgbio_path); 278 script.cmd("NovoAlign="+novoalign_path); 279 script.cmd("NovoSort="+novosort_path); 280 script.cmd("ReferenceDir=" + referenceRoot); 281 script.cmd("AmpliconsDir=" + ampliconsRoot); 282 script.cmd("AmpliconsBed=${AmpliconsDir}/"+amplicons_bed); 283 script.cmd("NovoIndex="+novoalign_index); 262 284 script.cmd("ArchiveRoot="+archiveRoot); 263 285 script.cmd("FastqFolder=${ArchiveRoot}"+fastQFolder); … … 280 302 script.cmd("cd ${TMPDIR}"); 281 303 script.cmd("mkdir fastq"); 282 script.cmd("mkdir trimmomatic.1"); 283 script.cmd("mkdir trimmomatic.2"); 284 script.cmd("mkdir bam.1"); 285 script.cmd("mkdir bam.2"); 304 script.cmd("mkdir trim.adapter"); 305 script.cmd("mkdir trim.quality"); 306 script.cmd("mkdir bam"); 307 script.cmd("mkdir bam.umi"); 308 script.cmd("mkdir bam.aligned"); 309 script.cmd("mkdir bam.sorted"); 310 script.cmd("mkdir bam.merged"); 286 311 script.cmd("cp ${ScriptDir}/stdwrap.sh ."); 312 script.cmd("cp ${ScriptDir}/stderrwrap.sh ."); 287 313 script.cmd("cp ${ScriptDir}/mips_adapters.sh ."); 288 314 script.cmd("cp ${ScriptDir}/picard2 ."); … … 309 335 String trimCmd1 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE"; 310 336 trimCmd1 += " -threads ${NumThreads}"; 311 trimCmd1 += " -phred33 -trimlog trim momatic.1/${prefix}.log";337 trimCmd1 += " -phred33 -trimlog trim.adapter/${prefix}.log"; 312 338 trimCmd1 += " fastq/${prefix}_R1.fastq.gz"; 313 339 trimCmd1 += " fastq/${prefix}_R2.fastq.gz"; 314 trimCmd1 += " trim momatic.1/${prefix}_R1.fastq";315 trimCmd1 += " trim momatic.1/un_${prefix}_R1.fastq";316 trimCmd1 += " trim momatic.1/${prefix}_R2.fastq";317 trimCmd1 += " trim momatic.1/un_${prefix}_R2.fastq";340 trimCmd1 += " trim.adapter/${prefix}_R1.fastq"; 341 trimCmd1 += " trim.adapter/un_${prefix}_R1.fastq"; 342 trimCmd1 += " trim.adapter/${prefix}_R2.fastq"; 343 trimCmd1 += " trim.adapter/un_${prefix}_R2.fastq"; 318 344 if (align_trimmomaticOptions1 != null) 319 345 { 320 346 trimCmd1 += " " + align_trimmomaticOptions1; 321 347 } 322 trimCmd1 += " >> trim momatic.1/trimmomatic.out";348 trimCmd1 += " >> trim.adapter/trimmomatic.out"; 323 349 324 350 String trimCmd2 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE"; 325 351 trimCmd2 += " -threads ${NumThreads}"; 326 trimCmd2 += " -phred33 -trimlog trim momatic.2/${prefix}.log";327 trimCmd2 += " trim momatic.1/${prefix}_R1.fastq";328 trimCmd2 += " trim momatic.1/${prefix}_R2.fastq";329 trimCmd2 += " trim momatic.2/${prefix}_R1.fastq";330 trimCmd2 += " trim momatic.2/un_${prefix}_R1.fastq";331 trimCmd2 += " trim momatic.2/${prefix}_R2.fastq";332 trimCmd2 += " trim momatic.2/un_${prefix}_R2.fastq";352 trimCmd2 += " -phred33 -trimlog trim.quality/${prefix}.log"; 353 trimCmd2 += " trim.adapter/${prefix}_R1.fastq"; 354 trimCmd2 += " trim.adapter/${prefix}_R2.fastq"; 355 trimCmd2 += " trim.quality/${prefix}_R1.fastq"; 356 trimCmd2 += " trim.quality/un_${prefix}_R1.fastq"; 357 trimCmd2 += " trim.quality/${prefix}_R2.fastq"; 358 trimCmd2 += " trim.quality/un_${prefix}_R2.fastq"; 333 359 if (align_trimmomaticOptions2 != null) 334 360 { 335 361 trimCmd2 += " " + align_trimmomaticOptions2; 336 362 } 337 trimCmd2 += " >> trim momatic.2/trimmomatic.out";363 trimCmd2 += " >> trim.quality/trimmomatic.out"; 338 364 script.cmd(" " + trimCmd1); 339 365 script.cmd(" " + trimCmd2); 340 366 script.cmd("done"); 367 script.newLine(); 341 368 342 369 script.comment("Run Picard FastqToSam and annotat the BAM files with UMI"); … … 349 376 toBamCmd += " -QUALITY_FORMAT Standard"; 350 377 toBamCmd += " ${rg_info}"; 351 toBamCmd += " -FASTQ trim momatic.2/${prefix}_R1.fastq";352 toBamCmd += " -FASTQ2 trim momatic.2/${prefix}_R2.fastq";353 toBamCmd += " -OUTPUT bam .1/${prefix}.bam";378 toBamCmd += " -FASTQ trim.quality/${prefix}_R1.fastq"; 379 toBamCmd += " -FASTQ2 trim.quality/${prefix}_R2.fastq"; 380 toBamCmd += " -OUTPUT bam/${prefix}.bam"; 354 381 toBamCmd += " >> fastqtosam.out"; 355 382 356 383 String umiCmd = "./stdwrap.sh ${JAVA} -jar ${FgBioJAR} AnnotateBamWithUmis"; 357 384 umiCmd += " --fail-fast true -t RX"; 358 umiCmd += " -i bam .1/${prefix}.bam";385 umiCmd += " -i bam/${prefix}.bam"; 359 386 umiCmd += " -f fastq/${prefix}_UMI.fastq.gz"; 360 umiCmd += " -o bam. 2/${prefix}.bam";387 umiCmd += " -o bam.umi/${prefix}.bam"; 361 388 umiCmd += " >> annotatebam.out"; 362 389 … … 364 391 script.cmd(" " + umiCmd); 365 392 script.cmd("done"); 393 script.newLine(); 394 395 script.cmd("min_insert=$(awk 'NR == 1 || $3 - $2 < min {min = $3 - $2}END{print min - 1}' \"${AmpliconsBed}\")"); 396 script.cmd("max_insert=$(awk 'NR == 1 || $3 - $2 > max {max = $3 - $2}END{print max + 1}' \"${AmpliconsBed}\")"); 397 script.comment("Run novoalign"); 398 script.cmd("AlignedBams=''"); 399 script.cmd("for prefix in ${FastqPrefix[@]} ; do"); 400 401 String alignCmd = "./stderrwrap.sh ${NovoAlign}"; 402 alignCmd += " -c ${NumThreads}"; 403 alignCmd += " " + novoalign_options; 404 alignCmd += " -d ${NovoIndex} --tags MC,ZP -i PE ${min_insert}-${max_insert}"; 405 alignCmd += " --amplicons ${AmpliconsBed}"; 406 alignCmd += " 1 bam.aligned/${prefix}.novo_coverage.bed"; 407 alignCmd += " -f bam.umi/${prefix}.bam"; 408 alignCmd += " > bam.aligned/${prefix}.bam"; 409 alignCmd += " 3> bam.aligned/${prefix}.novo.log"; 410 411 String sortCmd = "./stderrwrap.sh ${NovoSort}"; 412 sortCmd += " -c ${NumThreads} -t . -i"; 413 sortCmd += " -o bam.sorted/${prefix}.bam"; 414 sortCmd += " bam.aligned/${prefix}.bam"; 415 sortCmd += " 3> bam.sorted/${prefix}.novo.log"; 416 417 script.cmd(" " + alignCmd); 418 script.cmd(" " + sortCmd); 419 script.cmd(" AlignedBams=\"${AlignedBams} -INPUT bam.sorted/${prefix}.bam\""); 420 script.cmd("done"); 421 script.newLine(); 422 423 script.comment("Merge BAM files"); 424 String mergeCmd = "./stdwrap.sh ./picard2 MergeSamFiles"; 425 mergeCmd += " -SORT_ORDER coordinate -ASSUME_SORTED true"; 426 mergeCmd += " ${AlignedBams}"; 427 mergeCmd += " -OUTPUT bam.merged/novo.bam"; 428 mergeCmd += " > mergesam.out"; 429 script.cmd(mergeCmd); 430 366 431 /* 367 432 script.progress(95, "Copying result files to project archive");
Note: See TracChangeset
for help on using the changeset viewer.