Changeset 5775
- Timestamp:
- Dec 6, 2019, 7:56:14 AM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/VariantCallingJobCreator.java
r5760 r5775 357 357 script.comment("Skipping raw variant calling since we can use existing file"); 358 358 script.cmd("zcat ${BamFolder}/" + vcfName + " > tmp/variants-raw-2.vcf"); 359 // 'grep' to find number of variants The ' || true' is critical since 'grep' will exit with code 1 if there are no matches 360 script.cmd("NUMVARIANTS=`grep -c -v '^#' tmp/variants-raw-2.vcf || true`"); 359 361 script.newLine(); 360 362 } … … 395 397 script.progress(30, "Running VarDict (${NumThreads} threads)"); 396 398 script.cmd(vardictCmd); 397 script.newLine(); 398 399 // 'grep' to find number of variants The ' || true' is critical since 'grep' will exit with code 1 if there are no matches 400 script.cmd("NUMVARIANTS=`grep -c -v '^#' tmp/variants-raw-1.vcf || true`"); 401 script.newLine(); 402 403 script.cmd("if [ $NUMVARIANTS -gt 0 ]; then"); 399 404 script.comment("Calculate GC content and DNA strength using custom script"); 400 405 script.comment("We need to get the reference sequence within 50 base-pairs of each entry in the VCF"); … … 406 411 script.cmd("tabix tmp/gc-50.vcf.gz"); 407 412 script.cmd("./stderrwrap.sh " + vcfanno_path + " gc_stat.toml tmp/variants-raw-1.vcf > tmp/variants-raw-2.vcf 3>> vcfanno.out"); 413 script.cmd("else"); 414 script.progress(33, "Before copy"); 415 script.comment("No variants found, simply copy the raw file"); 416 script.cmd("cp tmp/variants-raw-1.vcf tmp/variants-raw-2.vcf"); 417 script.progress(34, "After copy"); 418 script.cmd("fi"); 408 419 script.cmd("cat tmp/variants-raw-2.vcf | bgzip -c > resultsraw/variants-raw.vcf.gz"); 409 420 script.newLine(); … … 412 423 if (!rawOnly) 413 424 { 425 script.cmd("if [ $NUMVARIANTS -gt 0 ]; then"); 414 426 script.comment("Calculate distance to closest G5 SNP and add back"); 415 427 script.comment("annotations DIST_SNP and DIST_DIV to the variants VCF"); … … 456 468 script.cmd(snpSiftCmd); 457 469 script.newLine(); 470 script.cmd("else"); 471 script.cmd(" cat tmp/variants-raw-2.vcf | bgzip -c > resultsfilter/variants-annotated.vcf.gz"); 472 script.cmd(" cp tmp/variants-raw-2.vcf resultsfilter/variants-filtered.vcf"); 473 script.cmd("fi"); 458 474 } 459 475 … … 463 479 script.cmd("\\cp resultsraw/variants-* ${BamFolder}"); // "\cp" will enable overwriting existing files 464 480 script.cmd("ls -1 resultsraw/variants-* >> ${WD}/files.out"); 465 script.cmd("echo \"Callable bases: `awk -F'\ t' 'BEGIN{SUM=0}{ SUM+=$3-$2 } END{print SUM}' resultsraw/variants-callable.bed`\" >> ${WD}/stats.out");466 script.cmd("echo \"Raw variants: `zcat resultsraw/variants-raw.vcf.gz | grep - v '^#\' | wc -l`\" >> ${WD}/stats.out");481 script.cmd("echo \"Callable bases: `awk -F'\\t' 'BEGIN{SUM=0}{ SUM+=$3-$2 } END{print SUM}' resultsraw/variants-callable.bed`\" >> ${WD}/stats.out"); 482 script.cmd("echo \"Raw variants: `zcat resultsraw/variants-raw.vcf.gz | grep -c -v '^#'`\" >> ${WD}/stats.out"); 467 483 if (externalGroup != null) 468 484 { … … 476 492 script.cmd("cp resultsfilter/* ${FilteredFolder}"); 477 493 script.cmd("ls -1 ${FilteredFolder}/* >> ${WD}/files.out"); 478 script.cmd("echo \"Annotated variants: `zcat resultsfilter/variants-annotated.vcf.gz | grep - v '^#\' | wc -l`\" >> ${WD}/stats.out");479 script.cmd("echo \"Filtered variants: `cat resultsfilter/variants-filtered.vcf | grep - v '^#\' | wc -l`\" >> ${WD}/stats.out");494 script.cmd("echo \"Annotated variants: `zcat resultsfilter/variants-annotated.vcf.gz | grep -c -v '^#'`\" >> ${WD}/stats.out"); 495 script.cmd("echo \"Filtered variants: `cat resultsfilter/variants-filtered.vcf | grep -c -v '^#'`\" >> ${WD}/stats.out"); 480 496 if (externalGroup != null) 481 497 {
Note: See TracChangeset
for help on using the changeset viewer.