Changeset 5833


Ignore:
Timestamp:
Feb 24, 2020, 11:05:56 AM (22 months ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Started to re-design the MIPs alignment script. Moved a lot of functionality into an external script (mips_functions.sh). The Trimmomatic script has been fixed including logging and error handling.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java

    r5831 r5833  
    291291      script.cmd("ArchiveRoot="+archiveRoot);
    292292      script.cmd("FastqFolder=${ArchiveRoot}"+fastQFolder);
    293       script.cmd("FastqPrefix=( \\");
    294       script.cmd("  " + Values.getString(fastqPrefix, " \\\n  ", true) + " \\");
    295       script.cmd("  )");
     293      script.cmd("FastqPrefix=( ");
     294      script.cmd("  " + Values.getString(fastqPrefix, "\n  ", true));
     295      script.cmd(")");
     296      script.cmd("TrimOptionsAdapter=\""+align_trimmomaticOptions1+"\"");
     297      script.cmd("TrimOptionsQual=\""+align_trimmomaticOptions2+"\"");
    296298     
    297299      script.newLine();
     
    309311      script.cmd("cd ${TMPDIR}");
    310312      script.cmd("mkdir fastq");
    311       script.cmd("mkdir trim.adapter");
    312       script.cmd("mkdir trim.quality");
    313       script.cmd("mkdir bam");
    314       script.cmd("mkdir bam.umi");
    315       script.cmd("mkdir bam.aligned");
    316       script.cmd("mkdir bam.sorted");
    317       script.cmd("mkdir bam.merged");
    318       script.cmd("mkdir bam.final");
     313      script.cmd("mkdir tmp");
     314      script.cmd("mkdir out");
    319315      script.cmd("cp ${ScriptDir}/stdwrap.sh .");
    320316      script.cmd("cp ${ScriptDir}/stderrwrap.sh .");
    321       script.cmd("cp ${ScriptDir}/mips_adapters.sh .");
    322317      script.cmd("cp ${ScriptDir}/picard2 .");
    323       script.cmd("cp ${ScriptDir}/convert_picard_parameters.sh .");
    324       script.newLine();
    325 
    326       script.comment("Setup adapters in a separate script");
    327       script.cmd(". ./mips_adapters.sh");
     318      script.cmd("cp ${ScriptDir}/mips_functions.sh .");
     319      script.newLine();
     320
     321      script.comment("Load script with helper functions");
     322      script.cmd(". ./mips_functions.sh");
    328323      script.newLine();
    329324     
    330325      script.comment("Copy FASTQ and .rg files to tmp folder");
    331326      script.progress(10, "Copying FASTQ files");
    332       script.cmd("if [ ! -d \"${FastqFolder}\" ]; then");
    333       script.cmd("echo \"Can't find FASTQ data folder ${FastqFolder}\" 1>&2");
     327      script.cmd("copy_fastq_and_rg \"${FastqFolder}\" \"fastq\"");
     328      script.newLine();
     329     
     330      script.comment("Run Trimmomatic");
     331      script.progress(20, "Running Trimmomatic");
     332      script.comment("Create adapter.fa for use with trimmomatic");
     333      script.cmd("create_adapter_fa \"adapter.fa\"");
     334      script.newLine();
     335     
     336      script.cmd("n=0");
     337      script.cmd("nAfterTrim=0");
     338      script.cmd("for prefix in ${FastqPrefix[@]} ; do");
     339      script.cmd("   n=$((n + 1))");
     340      script.cmd("   run_trimmomatic \"${prefix}\" $n");
     341      script.cmd("done");
     342      script.cmd("trimlog_stats \"tmp/*.adaptrim.log\" \"out/adaptrim.hist.txt\"");
     343      script.cmd("trimlog_stats \"tmp/*.qualtrim.log\" \"out/qualtrim.hist.txt\"");
     344      script.cmd("if [ ${nAfterTrim} -eq 0 ]; then");
     345      script.cmd("  echo \"No paired end reads surviving adapter and quality trimming for any fastq prefix\" 1>&2");
    334346      script.cmd("exit 1");
    335347      script.cmd("fi");
    336       script.cmd("cp ${FastqFolder}/*.fastq.gz fastq");
    337       script.cmd("cp ${FastqFolder}/*.rg fastq");
    338       script.newLine();
    339 
    340       script.comment("Run Trimmomatic");
    341       script.cmd("for prefix in ${FastqPrefix[@]} ; do");
    342      
    343       String trimCmd1 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
    344       trimCmd1 += " -threads ${NumThreads}";
    345       trimCmd1 += " -phred33 -trimlog trim.adapter/${prefix}.log";
    346       trimCmd1 += " fastq/${prefix}_R1.fastq.gz";
    347       trimCmd1 += " fastq/${prefix}_R2.fastq.gz";
    348       trimCmd1 += " trim.adapter/${prefix}_R1.fastq";
    349       trimCmd1 += " trim.adapter/un_${prefix}_R1.fastq";
    350       trimCmd1 += " trim.adapter/${prefix}_R2.fastq";
    351       trimCmd1 += " trim.adapter/un_${prefix}_R2.fastq";
    352       if (align_trimmomaticOptions1 != null)
    353       {
    354         trimCmd1 += " " + align_trimmomaticOptions1;
    355       }
    356       trimCmd1 += " >> trim.adapter/trimmomatic.out";
    357      
    358       String trimCmd2 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
    359       trimCmd2 += " -threads ${NumThreads}";
    360       trimCmd2 += " -phred33 -trimlog trim.quality/${prefix}.log";
    361       trimCmd2 += " trim.adapter/${prefix}_R1.fastq";
    362       trimCmd2 += " trim.adapter/${prefix}_R2.fastq";
    363       trimCmd2 += " trim.quality/${prefix}_R1.fastq";
    364       trimCmd2 += " trim.quality/un_${prefix}_R1.fastq";
    365       trimCmd2 += " trim.quality/${prefix}_R2.fastq";
    366       trimCmd2 += " trim.quality/un_${prefix}_R2.fastq";
    367       if (align_trimmomaticOptions2 != null)
    368       {
    369         trimCmd2 += " " + align_trimmomaticOptions2;
    370       }
    371       trimCmd2 += " >> trim.quality/trimmomatic.out";
    372       script.cmd("   " + trimCmd1);
    373       script.cmd("   " + trimCmd2);
    374       script.cmd("done");
    375       script.newLine();
    376      
     348     
     349      /*
    377350      script.comment("Run Picard FastqToSam and annotat the BAM files with UMI");
    378351      script.cmd("for prefix in ${FastqPrefix[@]} ; do");
     
    467440      markDupCmd += " >> markduplicates.out";
    468441      script.cmd(markDupCmd);
    469      
     442      */
    470443      /*
    471444      script.progress(95, "Copying result files to project archive");
Note: See TracChangeset for help on using the changeset viewer.