Changeset 2555


Ignore:
Timestamp:
Aug 5, 2014, 2:39:32 PM (7 years ago)
Author:
Nicklas Nordborg
Message:

References #614: Improve error handling when executing jobs on the cluster

Re-organized the configuration file which should make it easier to re-use some configuration settings and also to understand when the different settings are used.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/reggie-ogs-hosts.xml

    r2553 r2555  
    1616    <!-- job-folder: must be a path that is common to the master and all nodes (required) -->
    1717    <job-folder>/path/to/job/folder</job-folder>
     18   
    1819    <!-- tmp-folder: can be a local path on the node but must exist on all nodes (required) -->
    1920    <tmp-folder>/tmp</tmp-folder>
    20     <!-- script-folder: folder where the pipeline scripts are located (required).
    21       Must be accessible from all nodes.
    22      -->
    23     <script-folder>/path/to/script/folder</script-folder>
     21   
     22    <!-- full path to the location where HiSeq data is stored (required) -->
     23    <run-archive>/path/to/run_archive</run-archive>
    2424   
    2525    <!-- Full path to the location where data files should be archived (required) -->
     
    3030    <!-- Do not include name of project -->
    3131    <reference-folder>/disk/reference</reference-folder>
     32   
     33    <!-- Information about programs used by reggie -->
     34    <!-- Unless otherwise noted, all paths must be the same on all nodes -->
     35    <programs>
     36      <pipeline-scripts>
     37        <!-- folder where the pipeline scripts are located (required). -->
     38        <path>/path/to/pipelinescripts</path>
     39      </pipeline-scripts>
     40      <picard>
     41        <!-- full path to the directory with Picard jar files (required) -->
     42        <path>/path/to/picard</path>
     43      </picard>
     44      <trimmomatic>
     45        <!-- full path to the JAR file with the Trimmomatic program (required) -->
     46        <path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</path>
     47        <!-- full path to the file with Illumina adapter information -->
     48        <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
     49      </trimmomatic>
     50      <bowtie2>
     51        <!-- full or partial path to bowtie2 (required) -->
     52        <path>bowtie2</path>
     53      </bowtie2>
     54      <tophat>
     55        <!-- full or partial path to tophat (required) -->
     56        <path>tophat</path>
     57      </tophat>
     58    </programs>
    3259   
    3360    <!-- priority values that are selectable in the web interface -->
     
    4269    <!-- settings for the demuxing step -->
    4370    <demux>
    44       <!-- full path to the directory with Picard jar files (required) -->
    45       <picard-dir></picard-dir>
    46       <!-- amount of memory to give to Picard (default is 50g)-->
    47       <picard-memory>50g</picard-memory>
    4871      <!-- parallel environment option to the queue system -->
    4972      <!-- the default setting use all slots on hosts with at least 8 slots available -->
    5073      <parallel-environment>smp 8-</parallel-environment>
    51       <!-- full path to the location where HiSeq data is stored (required) -->
    52       <run-archive>/path/to/run_archive</run-archive>
     74      <!-- amount of memory to give to Picard (default is 50g)-->
     75      <picard-memory>50g</picard-memory>
    5376      <!-- static options for the picard ExtractIlluminaBarcodes step -->
    5477      <extract-options>QUIET=true VERBOSITY=WARNING</extract-options>
     
    5780      <!-- number of tiles to process when debugging (default=16) -->
    5881      <debug-tile-limit>16</debug-tile-limit>
     82      <!-- static options for Trimmomatic -->
     83      <trimmomatic-options>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true LEADING:3 TRAILING:3 MAXINFO:40:0.9 MINLEN:20</trimmomatic-options>
    5984    </demux>
    6085
    61     <!-- Settings for the Trimmomatic step -->
    62     <trimmomatic>
    63       <!-- full path to the JAR file with the Trimmomatic program -->
    64       <jar-path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</jar-path>
    65       <!-- full path to the file with Illumina adapter information -->
    66       <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
    67       <!-- static options for Trimmomatic -->
    68       <options>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true LEADING:3 TRAILING:3 MAXINFO:40:0.9 MINLEN:20</options>
    69     </trimmomatic>
    70 
    71     <pe-filter>
    72       <!-- static options for bowtie -->
    73       <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
    74 
     86    <mask>
    7587      <!-- parallel environment option to the queue system -->
    7688      <!-- the default setting use all slots on hosts with at least 8 slots available -->
    7789      <parallel-environment>smp 8-</parallel-environment>
    7890
    79       <!-- relative path from <reference-folder> to the reference genome used for filtering -->
    80       <!-- TODO selectable in GUI? saved as annotation? -->
     91      <!-- relative path from <reference-folder> to the reference genome used for masking -->
     92      <!-- This is the -x option used for bowtie -->
    8193      <reference-name>scanb/ribo_phix_repeats_filter.bt2/ribo_phix_repeats_filter</reference-name>
    82     </pe-filter>
     94     
     95      <!-- static options for bowtie -->
     96      <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
     97    </mask>
    8398
    84     <tophat>
     99    <align>
    85100      <!-- relative path from <reference-folder> to the reference genome used for alignment -->
    86101      <!-- TODO selectable in GUI? saved as annotation? -->
    87102      <reference-gidx>scanb/hg19_1-22_XM_extra_b37_Y_decoy.bt2/hg19_1-22_XM_extra_b37_Y_decoy</reference-gidx>
    88103      <reference-tidx>scanb/UCSC_Human_hg19_knownGenes_GTF_appended_10sep2012v209.bt2/knownGenes</reference-tidx>
     104     
    89105      <!-- static options for tophat -->
    90       <options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search</options>
    91     </tophat>
     106      <tophat-options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search</tophat-options>
     107     
     108      <!-- static options for the picard MarkDuplicates step -->
     109      <mark-duplicates-options>REMOVE_DUPLICATES=false ASSUME_SORTED=true MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=2000 QUIET=true VERBOSITY=WARNING</mark-duplicates-options>
     110    </align>
    92111  </host>
    93112
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/OpenGridService.java

    r2534 r2555  
    192192        cluster.setJobFolder(cluster.getRequiredConfig("job-folder"));
    193193        cluster.setTmpFolder(cluster.getRequiredConfig("tmp-folder"));
    194         cluster.setScriptFolder(cluster.getRequiredConfig("script-folder"));
     194        cluster.setScriptFolder(cluster.getRequiredConfig("programs/pipeline-scripts/path"));
    195195        logger.debug("Loaded cluster " + cluster.getId());
    196196        clusters.put(cluster.getId(), cluster);
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/DemuxMergeServlet.java

    r2538 r2555  
    368368        // Connect to cluster and load configuration for illumina2bam software
    369369        ssh = cluster.connect(2);
    370         String demux_picardDir = cluster.getRequiredConfig("demux/picard-dir");
    371         String demux_picardMemory = cluster.getConfig("demux/picard-memory", "50g");
    372         String demux_parallelEnvironment = cluster.getConfig("demux/parallel-environment", "smp 8-");
    373         String demux_runArchive = cluster.getRequiredConfig("demux/run-archive");
    374        
    375         String demux_extractOptions = cluster.getConfig("demux/extract-options", null);
    376         String demux_fastqOptions = cluster.getConfig("demux/fastq-options", null);
    377        
    378         String debug_tileLimit = cluster.getConfig("demux/debug-tile-limit", "16");
    379        
    380         String trim_jarPath = cluster.getRequiredConfig("trimmomatic/jar-path");
    381         String trim_adapterFile = cluster.getRequiredConfig("trimmomatic/adapter-file");
    382         String trim_options = cluster.getRequiredConfig("trimmomatic/options");
    383        
     370        // Get global options
     371        String runArchive = cluster.getRequiredConfig("run-archive");
    384372        String projectRoot = cluster.getRequiredConfig("project-archive");
    385373        String projectFolder = projectName+(debug ? ".debug" : "");
    386374        String fastqArchive = projectRoot + "/" + projectFolder;
     375
     376        // Paths to programs used (picard and trimmomatic)
     377        String picard_path = cluster.getRequiredConfig("programs/picard/path");
     378        String trimmomatic_path = cluster.getRequiredConfig("programs/trimmomatic/path");
     379        String trimmomatic_adapterFile = cluster.getRequiredConfig("programs/trimmomatic/adapter-file");
     380       
     381        // Options for the programs when demuxing
     382        String demux_parallelEnvironment = cluster.getConfig("demux/parallel-environment", "smp 8-");
     383        String demux_picardMemory = cluster.getConfig("demux/picard-memory", "50g");
     384        String demux_extractOptions = cluster.getConfig("demux/extract-options", null);
     385        String demux_fastqOptions = cluster.getConfig("demux/fastq-options", null);
     386        String debug_tileLimit = cluster.getConfig("demux/debug-tile-limit", "16");
     387        String demux_trimmomaticOptions = cluster.getRequiredConfig("demux/trimmomatic-options");
    387388       
    388389        // Load common items
     
    434435            script.comment("Setting up scripting environment and copying script to tmp folder");
    435436            script.cmd("export ScriptDir=" + cluster.getScriptFolder());
    436             script.cmd("export PicardDir="+demux_picardDir);
     437            script.cmd("export PicardDir="+picard_path);
    437438            script.cmd("export PicardMemory="+demux_picardMemory);
    438             script.cmd("export TrimmomaticJAR="+trim_jarPath);
    439             script.cmd("export AdapterFile="+trim_adapterFile);
     439            script.cmd("export TrimmomaticJAR="+trimmomatic_path);
     440            script.cmd("export AdapterFile="+trimmomatic_adapterFile);
    440441            script.newLine();
    441442            script.cmd("mkdir -p " + tmpFolder);
     
    600601            String flowCellBarcode = (String)Annotationtype.FLOWCELL_ID.getAnnotationValue(dc, flowCell);
    601602            Integer runNumber = (Integer)Annotationtype.SEQUENCING_RUN_NUMBER.getAnnotationValue(dc, seqRun);
    602             script.cmd("RUN_ARCHIVE=`find " + demux_runArchive + " -maxdepth 2 -name " + dataFolder + " -type d -print -quit 2> /dev/null || true`");
     603            script.cmd("RUN_ARCHIVE=`find " + runArchive + " -maxdepth 2 -name " + dataFolder + " -type d -print -quit 2> /dev/null || true`");
    603604            script.cmd("if [ -z \"$RUN_ARCHIVE\" ]; then");
    604             script.cmd("echo \"Can't find data folder "+dataFolder+" in " + demux_runArchive + "\" 1>&2");
     605            script.cmd("echo \"Can't find data folder "+dataFolder+" in " + runArchive + "\" 1>&2");
    605606            script.cmd("exit 1");
    606607            script.cmd("fi");
     
    717718            trimCmd += " fastq.trimmomatic/un_"+R2_name;
    718719
    719             if (trim_options != null)
     720            if (demux_trimmomaticOptions != null)
    720721            {
    721               trimCmd += " " + trim_options;
     722              trimCmd += " " + demux_trimmomaticOptions;
    722723            }
    723724            trimCmd += " >> "+trimmomaticOut;
     
    10351036        if (total.warnings.size() > 0)
    10361037        {
    1037           msg += "; " + total.warnings.size() + " warnings!";
     1038          msg += total.warnings.size() + " warnings!";
    10381039        }
    10391040        return msg;
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/MaskAlignServlet.java

    r2554 r2555  
    283283        ssh = cluster.connect(2);
    284284       
     285        // Get global options
    285286        String projectRoot = cluster.getRequiredConfig("project-archive");
    286287        String referenceRoot = cluster.getRequiredConfig("reference-folder");
    287288       
    288         String pefilter_referenceName = cluster.getRequiredConfig("pe-filter/reference-name");
    289         String pefilter_parallel_environment = cluster.getConfig("pe-filter/parallel-environment", "smp 8-");
    290         String pefilter_bowtie_options = cluster.getConfig("pe-filter/bowtie-options", "");
    291 
    292         String tophat_tidx = cluster.getRequiredConfig("tophat/reference-tidx");
    293         String tophat_gidx = cluster.getRequiredConfig("tophat/reference-gidx");
    294         String tophat_options = cluster.getConfig("tophat/options", "");
     289        // Paths to programs used (bowtie , tophat, picards, and more...)
     290        String bowtie_path = cluster.getRequiredConfig("programs/bowtie2/path");
     291        String picard_path = cluster.getRequiredConfig("programs/picard/path");
     292        String tophat_path = cluster.getRequiredConfig("programs/tophat/path");
     293       
     294        // Options for the programs
     295        String mask_parallelEnvironment = cluster.getConfig("mask/parallel-environment", "smp 8-");
     296        String mask_referenceName = cluster.getRequiredConfig("mask/reference-name");
     297        String mask_bowtieOptions = cluster.getConfig("mask/bowtie-options", "");
     298
     299        String align_tidx = cluster.getRequiredConfig("align/reference-tidx");
     300        String align_gidx = cluster.getRequiredConfig("align/reference-gidx");
     301        String align_tophatOptions = cluster.getConfig("align/tophat-options", "");
     302        String align_markDuplicatesOptions = cluster.getConfig("align/mark-duplicates-options", "");
    295303     
    296304        // Load common items
     
    358366          script.comment("Setting up scripting environment and copying script to tmp folder");
    359367          script.cmd("ScriptDir=" + cluster.getScriptFolder());
     368          script.cmd("export PicardDir="+picard_path);
    360369          script.cmd("ReferenceDir=" + referenceRoot);
    361           script.cmd("RMidx=${ReferenceDir}/" + pefilter_referenceName);
    362           script.cmd("export Gidx=${ReferenceDir}/" + tophat_gidx);
    363           script.cmd("export Tidx=${ReferenceDir}/" + tophat_tidx);
     370          script.cmd("RMidx=${ReferenceDir}/" + mask_referenceName);
     371          script.cmd("Gidx=${ReferenceDir}/" + align_gidx);
     372          script.cmd("Tidx=${ReferenceDir}/" + align_tidx);
    364373          script.cmd("export TransferDir="+projectRoot+"/" + projectName);
    365374          script.cmd("export TophatDir="+tophatFolder);
     
    393402          script.progress(20, "Running Bowtie2");
    394403          script.cmd("mkdir fastq.masked");
    395           String maskCmd = "./stdwrap.sh bowtie2";
     404          String maskCmd = "./stdwrap.sh " + bowtie_path;
    396405          maskCmd += " -p ${NumThreads}";
    397           maskCmd += " " + pefilter_bowtie_options;
     406          maskCmd += " " + mask_bowtieOptions;
    398407            maskCmd += " --un-conc-gz fastq.masked/R%.fastq.gz";
    399408          maskCmd += " -x ${RMidx}";
     
    409418          script.cmd("mkdir fastq.masked.tophat");
    410419         
    411           String tophatCmd = "./stdwrap.sh tophat";
     420          String tophatCmd = "./stdwrap.sh " + tophat_path;
    412421          tophatCmd += " -p ${NumThreads}";
    413422          tophatCmd += " -o fastq.masked.tophat";
     
    415424          tophatCmd += " --mate-std-dev " + FlowCellSampleSheetExporter.DEFAULT_LIBRARY_WIDTH;
    416425          tophatCmd += " --transcriptome-index ${Tidx}";
    417           tophatCmd += " " + tophat_options;
     426          tophatCmd += " " + align_tophatOptions;
    418427          tophatCmd += " ${Gidx}";
    419428          tophatCmd += " fastq.masked/R1.fastq.gz";
     
    428437          picardCmd += " OUTPUT=fastq.masked.tophat/accepted_hits.bam.tmp_picard";
    429438          picardCmd += " METRICS_FILE=fastq.masked.tophat/accepted_hits_picardmetrics.csv";
    430           picardCmd += " REMOVE_DUPLICATES=false";
    431           picardCmd += " ASSUME_SORTED=true";
     439          picardCmd += " " + align_markDuplicatesOptions;
    432440          picardCmd += " > " + jobFolder + "/picard_MarkDuplicates.out";
    433441          script.cmd(picardCmd);
     
    472480         
    473481          JobDefinition jobDef = new JobDefinition(jobName, script.toString());
    474           jobDef.setParallelEnvironment(pefilter_parallel_environment);
     482          jobDef.setParallelEnvironment(mask_parallelEnvironment);
    475483          if (priority != null)
    476484          {
Note: See TracChangeset for help on using the changeset viewer.