Changeset 5830


Ignore:
Timestamp:
Feb 21, 2020, 11:35:42 AM (22 months ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Added alignement with novoalign and merging of BAM files to a single BAM.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml

    r5829 r5830  
    202202          <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path>
    203203        </fgbio>
     204        <novoalign>
     205          <!-- full path to the novoalign file (required) -->
     206          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path>
     207        </novoalign>
     208        <novosort>
     209          <!-- full path to the novosort file (required) -->
     210          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path>
     211        </novosort>
    204212      </programs>
    205213     
     
    339347        </trimmomatic>
    340348       
     349        <amplicons>
     350          <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir>
     351          <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
     352          <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed>
     353          <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
     354          <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed>
     355          <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed>
     356        </amplicons>
     357       
     358        <novoalign>
     359          <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index>
     360          <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options>
     361        </novoalign>
    341362       
    342363      </align-mips>
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java

    r5829 r5830  
    1212import net.sf.basedb.core.DerivedBioAssay;
    1313import net.sf.basedb.core.File;
     14import net.sf.basedb.core.InvalidDataException;
    1415import net.sf.basedb.core.ItemList;
    1516import net.sf.basedb.core.ItemNotFoundException;
     
    3435import net.sf.basedb.reggie.dao.BiomaterialList;
    3536import net.sf.basedb.reggie.dao.Datafiletype;
     37import net.sf.basedb.reggie.dao.Dna;
    3638import net.sf.basedb.reggie.dao.DoNotUse;
    3739import net.sf.basedb.reggie.dao.Library;
     
    144146    String picard_path = cfg.getRequiredConfig("programs/picard/path", alignParameterSet);
    145147    String fgbio_path = cfg.getRequiredConfig("programs/fgbio/path", alignParameterSet);
     148    String novoalign_path = cfg.getRequiredConfig("programs/novoalign/path", alignParameterSet);
     149    String novosort_path = cfg.getRequiredConfig("programs/novosort/path", alignParameterSet);
    146150
    147151    // Options for the programs
     
    149153    String align_trimmomaticOptions1 = cfg.getRequiredConfig("align-mips/trimmomatic/step-1", alignParameterSet);
    150154    String align_trimmomaticOptions2 = cfg.getRequiredConfig("align-mips/trimmomatic/step-2", alignParameterSet);
     155    String ampliconsRoot = cfg.getRequiredConfig("align-mips/amplicons/main-dir", alignParameterSet);
     156    String novoalign_index = cfg.getRequiredConfig("align-mips/novoalign/index", alignParameterSet);
     157    String novoalign_options = cfg.getRequiredConfig("align-mips/novoalign/options", alignParameterSet);
    151158
    152159    // Load common items
     
    180187      Sample specimen =  (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN);
    181188      String fastQFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged);
     189     
     190      Dna dna = lib.getDna(dc, true);
     191      String mipsPanel = (String)Annotationtype.MIPS_PANEL.getAnnotationValue(dc, dna.getItem());
     192      if (mipsPanel == null)
     193      {
     194        throw new InvalidDataException("No value for '"+Annotationtype.MIPS_PANEL.getName() + "' on DNA: " + dna.getName());
     195      }
     196     
     197      String amplicons_bed = cfg.getRequiredConfig("align-mips/amplicons/bed[@panel='"+mipsPanel+"']", alignParameterSet);
    182198     
    183199      // Get all FASTQ files
     
    260276      script.cmd("TrimmomaticJAR="+trimmomatic_path);
    261277      script.cmd("FgBioJAR="+fgbio_path);
     278      script.cmd("NovoAlign="+novoalign_path);
     279      script.cmd("NovoSort="+novosort_path);
     280      script.cmd("ReferenceDir=" + referenceRoot);
     281      script.cmd("AmpliconsDir=" + ampliconsRoot);
     282      script.cmd("AmpliconsBed=${AmpliconsDir}/"+amplicons_bed);
     283      script.cmd("NovoIndex="+novoalign_index);
    262284      script.cmd("ArchiveRoot="+archiveRoot);
    263285      script.cmd("FastqFolder=${ArchiveRoot}"+fastQFolder);
     
    280302      script.cmd("cd ${TMPDIR}");
    281303      script.cmd("mkdir fastq");
    282       script.cmd("mkdir trimmomatic.1");
    283       script.cmd("mkdir trimmomatic.2");
    284       script.cmd("mkdir bam.1");
    285       script.cmd("mkdir bam.2");
     304      script.cmd("mkdir trim.adapter");
     305      script.cmd("mkdir trim.quality");
     306      script.cmd("mkdir bam");
     307      script.cmd("mkdir bam.umi");
     308      script.cmd("mkdir bam.aligned");
     309      script.cmd("mkdir bam.sorted");
     310      script.cmd("mkdir bam.merged");
    286311      script.cmd("cp ${ScriptDir}/stdwrap.sh .");
     312      script.cmd("cp ${ScriptDir}/stderrwrap.sh .");
    287313      script.cmd("cp ${ScriptDir}/mips_adapters.sh .");
    288314      script.cmd("cp ${ScriptDir}/picard2 .");
     
    309335      String trimCmd1 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
    310336      trimCmd1 += " -threads ${NumThreads}";
    311       trimCmd1 += " -phred33 -trimlog trimmomatic.1/${prefix}.log";
     337      trimCmd1 += " -phred33 -trimlog trim.adapter/${prefix}.log";
    312338      trimCmd1 += " fastq/${prefix}_R1.fastq.gz";
    313339      trimCmd1 += " fastq/${prefix}_R2.fastq.gz";
    314       trimCmd1 += " trimmomatic.1/${prefix}_R1.fastq";
    315       trimCmd1 += " trimmomatic.1/un_${prefix}_R1.fastq";
    316       trimCmd1 += " trimmomatic.1/${prefix}_R2.fastq";
    317       trimCmd1 += " trimmomatic.1/un_${prefix}_R2.fastq";
     340      trimCmd1 += " trim.adapter/${prefix}_R1.fastq";
     341      trimCmd1 += " trim.adapter/un_${prefix}_R1.fastq";
     342      trimCmd1 += " trim.adapter/${prefix}_R2.fastq";
     343      trimCmd1 += " trim.adapter/un_${prefix}_R2.fastq";
    318344      if (align_trimmomaticOptions1 != null)
    319345      {
    320346        trimCmd1 += " " + align_trimmomaticOptions1;
    321347      }
    322       trimCmd1 += " >> trimmomatic.1/trimmomatic.out";
     348      trimCmd1 += " >> trim.adapter/trimmomatic.out";
    323349     
    324350      String trimCmd2 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
    325351      trimCmd2 += " -threads ${NumThreads}";
    326       trimCmd2 += " -phred33 -trimlog trimmomatic.2/${prefix}.log";
    327       trimCmd2 += " trimmomatic.1/${prefix}_R1.fastq";
    328       trimCmd2 += " trimmomatic.1/${prefix}_R2.fastq";
    329       trimCmd2 += " trimmomatic.2/${prefix}_R1.fastq";
    330       trimCmd2 += " trimmomatic.2/un_${prefix}_R1.fastq";
    331       trimCmd2 += " trimmomatic.2/${prefix}_R2.fastq";
    332       trimCmd2 += " trimmomatic.2/un_${prefix}_R2.fastq";
     352      trimCmd2 += " -phred33 -trimlog trim.quality/${prefix}.log";
     353      trimCmd2 += " trim.adapter/${prefix}_R1.fastq";
     354      trimCmd2 += " trim.adapter/${prefix}_R2.fastq";
     355      trimCmd2 += " trim.quality/${prefix}_R1.fastq";
     356      trimCmd2 += " trim.quality/un_${prefix}_R1.fastq";
     357      trimCmd2 += " trim.quality/${prefix}_R2.fastq";
     358      trimCmd2 += " trim.quality/un_${prefix}_R2.fastq";
    333359      if (align_trimmomaticOptions2 != null)
    334360      {
    335361        trimCmd2 += " " + align_trimmomaticOptions2;
    336362      }
    337       trimCmd2 += " >> trimmomatic.2/trimmomatic.out";
     363      trimCmd2 += " >> trim.quality/trimmomatic.out";
    338364      script.cmd("   " + trimCmd1);
    339365      script.cmd("   " + trimCmd2);
    340366      script.cmd("done");
     367      script.newLine();
    341368     
    342369      script.comment("Run Picard FastqToSam and annotat the BAM files with UMI");
     
    349376      toBamCmd += " -QUALITY_FORMAT Standard";
    350377      toBamCmd += " ${rg_info}";
    351       toBamCmd += " -FASTQ trimmomatic.2/${prefix}_R1.fastq";
    352       toBamCmd += " -FASTQ2 trimmomatic.2/${prefix}_R2.fastq";
    353       toBamCmd += " -OUTPUT bam.1/${prefix}.bam";
     378      toBamCmd += " -FASTQ trim.quality/${prefix}_R1.fastq";
     379      toBamCmd += " -FASTQ2 trim.quality/${prefix}_R2.fastq";
     380      toBamCmd += " -OUTPUT bam/${prefix}.bam";
    354381      toBamCmd += " >> fastqtosam.out";
    355382     
    356383      String umiCmd = "./stdwrap.sh ${JAVA} -jar ${FgBioJAR} AnnotateBamWithUmis";
    357384      umiCmd += " --fail-fast true -t RX";
    358       umiCmd += " -i bam.1/${prefix}.bam";
     385      umiCmd += " -i bam/${prefix}.bam";
    359386      umiCmd += " -f fastq/${prefix}_UMI.fastq.gz";
    360       umiCmd += " -o bam.2/${prefix}.bam";
     387      umiCmd += " -o bam.umi/${prefix}.bam";
    361388      umiCmd += " >> annotatebam.out";
    362389     
     
    364391      script.cmd("   " + umiCmd);
    365392      script.cmd("done");
     393      script.newLine();
     394   
     395      script.cmd("min_insert=$(awk 'NR == 1 || $3 - $2 < min {min = $3 - $2}END{print min - 1}' \"${AmpliconsBed}\")");
     396      script.cmd("max_insert=$(awk 'NR == 1 || $3 - $2 > max {max = $3 - $2}END{print max + 1}' \"${AmpliconsBed}\")");
     397      script.comment("Run novoalign");
     398      script.cmd("AlignedBams=''");
     399      script.cmd("for prefix in ${FastqPrefix[@]} ; do");
     400     
     401      String alignCmd = "./stderrwrap.sh ${NovoAlign}";
     402      alignCmd += " -c ${NumThreads}";
     403      alignCmd += " " + novoalign_options;
     404      alignCmd += " -d ${NovoIndex} --tags MC,ZP -i PE ${min_insert}-${max_insert}";
     405      alignCmd += " --amplicons ${AmpliconsBed}";
     406      alignCmd += " 1 bam.aligned/${prefix}.novo_coverage.bed";
     407      alignCmd += " -f bam.umi/${prefix}.bam";
     408      alignCmd += " > bam.aligned/${prefix}.bam";
     409      alignCmd += " 3> bam.aligned/${prefix}.novo.log";
     410     
     411      String sortCmd = "./stderrwrap.sh ${NovoSort}";
     412      sortCmd += " -c ${NumThreads} -t . -i";
     413      sortCmd += " -o bam.sorted/${prefix}.bam";
     414      sortCmd += " bam.aligned/${prefix}.bam";
     415      sortCmd += " 3> bam.sorted/${prefix}.novo.log";
     416     
     417      script.cmd("   " + alignCmd);
     418      script.cmd("   " + sortCmd);
     419      script.cmd("   AlignedBams=\"${AlignedBams} -INPUT bam.sorted/${prefix}.bam\"");
     420      script.cmd("done");
     421      script.newLine();
     422     
     423      script.comment("Merge BAM files");
     424      String mergeCmd = "./stdwrap.sh ./picard2 MergeSamFiles";
     425      mergeCmd += " -SORT_ORDER coordinate -ASSUME_SORTED true";
     426      mergeCmd += " ${AlignedBams}";
     427      mergeCmd += " -OUTPUT bam.merged/novo.bam";
     428      mergeCmd += " > mergesam.out";
     429      script.cmd(mergeCmd);
     430
    366431      /*
    367432      script.progress(95, "Copying result files to project archive");
Note: See TracChangeset for help on using the changeset viewer.