source: extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml @ 5831

Last change on this file since 5831 was 5831, checked in by Nicklas Nordborg, 2 years ago

References #1218: Implement MIPs alignment

Final steps in the alignment pipeline. Re-headers, splitting and marking duplicates. All critical work has been done. There are still some collections of metrics to implement, as well as error handling and storing result files back to the project archive.

File size: 23.4 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<reggie>
3
4  <!-- Section for enabling/disabling experimental features -->
5  <!-- The list of feature that are considered experimental may change over time -->
6  <!-- 0=The feature is disabled, 1=The feature is enabled -->
7  <experimental-features>
8  </experimental-features>
9 
10  <!-- Configuration options related to how external samples (RNA or DNA) are handled -->
11  <external-samples>
12    <!-- Files generated in the secondary analysis can be shared with read permission to -->
13    <!-- a group if this is specified here. The prefix attribute is the sample name prefix -->
14    <!-- and the value is the group name. This translates to a 'chgrp' command in the secondary -->
15    <!-- analysis. Samples with a prefix that is not mapped here are not shared to other groups. -->
16    <!--  <groupname prefix="BR">brcalab</groupname> -->
17  </external-samples>
18
19  <!-- Settings for the Activity log that is displayed on the Reggie start page -->
20  <activity-log>
21    <!-- Max number of entries to display in the log (exception: all events within the last two days are always displayed) -->
22    <max-entries>35</max-entries>
23    <!-- Max age (in days) of entries to display (even if the max number hasn't been reached) -->
24    <max-age-in-days>14</max-age-in-days>
25    <quote-of-the-day>
26      <!-- URL to quote-of-the-day endpoint (optional, set an empty URL to disable this feature) -->
27      <url>https://quotes.rest/qod.json</url>
28      <!-- Default is 12 hours; do not set to less than 3600 since the external API has a limit -->
29      <max-age-in-seconds>43200</max-age-in-seconds>
30    </quote-of-the-day>
31  </activity-log>
32
33  <!-- Options related to R that is executed on the local server -->
34  <rscript>
35    <!-- Full or partial path to 'Rscript' executable -->
36    <path>Rscript</path>
37    <!-- Set the locale to use when running R -->
38    <!-- If not set, use whatever locale the operating system provides -->
39    <locale>en_US.UTF-8</locale>
40   
41    <!-- options for the 'geneReport' script -->
42    <gene-report>
43      <!-- full path to the R script -->
44      <path>/path/to/R_RNAseq_scanb_geneReport.R</path>
45      <!-- full path to directory with SCAN-B reference data -->
46      <!-- default is same directory as the R script -->
47      <ref-dir-scanb></ref-dir-scanb>
48      <!-- full path to directory with validation reference data -->
49      <!-- default is same directory as the R script -->
50      <ref-dir-validation></ref-dir-validation>
51      <!-- full path to the PDF template -->
52      <!-- default is 'template.pdf' in the same directory as the R script -->
53      <template></template>
54      <!-- file name in BASE for storing the generated report  -->
55      <pdf-name>genereport.pdf</pdf-name>
56    </gene-report>
57   
58    <!-- options for the 'pilot report' script -->
59    <pilot-report>
60      <!-- full path to the R script -->
61      <path>/path/to/pilot-report.R</path>
62      <!-- full path to directory with reference data -->
63      <!-- default is 'referenceData' directory inside -->
64      <!-- the same directory as the R script -->
65      <ref-dir></ref-dir>
66      <!-- full path to directory with source code -->
67      <!-- default is 'source' directory inside -->
68      <!-- the same directory as the R script -->
69      <source-dir></source-dir>
70      <!-- full path to the PDF template -->
71      <!-- default is 'template.pdf' in the same directory as the R script -->
72      <template></template>
73      <!-- file name in BASE for storing the generated report  -->
74      <pdf-name>pilotreport.pdf</pdf-name>
75    </pilot-report>
76   
77  </rscript>
78
79  <!-- Logotype information for the different sites -->
80  <!-- Uncomment as needed and set full path to image file -->
81  <!-- Supported file formats: WMF, PNG, JPG (and possible more) -->
82  <logos>
83    <!-- <region-skåne></region-skåne>  -->
84    <!-- <landstinget-kronoberg></landstinget-kronoberg>  -->
85    <!-- <uppsala-landsting></uppsala-landsting>  -->
86    <!-- <region-halland></region-halland>  -->
87    <!-- <landstinget-blekinge></landstinget-blekinge>  -->
88    <!-- <jönköpings-län></jönköpings-län>  -->
89  </logos>
90
91  <remote-hosts>
92    <!-- one or more hosts entries. Each entry should match an -->
93    <!-- entry in the opengrid-config.xml. The 'ID' of an Open Grid cluster -->
94    <!-- is a combination of the username, address and port: user@host:port -->
95    <!-- A comma-separated list is allowed -->
96    <!-- Note that the default port number (22) must be included in the ID  -->
97    <!-- even if it is not specified in the opengrid-config.xml file. -->
98 
99    <host 
100      id="user@address:port in opengrid-config.xml (one or more separated by comma)"
101      >
102     
103      <!-- full path to the location where HiSeq/NextSeq data is stored (required) -->
104      <run-archive>/casa2/run_archive</run-archive>
105      <!-- Alternate paths in search order in case data is not found in the primary -->
106      <!-- run archive. Add more entries as needed, but it is important that they -->
107      <!-- are numbered in strictly increasing order from '2' and up. -->
108      <run-archive-2></run-archive-2>
109     
110      <!-- Full path to the location where data files should be archived (required) -->
111      <!-- The path should include the name of the project -->
112      <project-archive>/casa4/project_archive/scanb</project-archive>
113      <!-- Full path to the location where external data files should be archive (optional) -->
114      <!-- If not specified, the 'project-archive' path is used -->
115      <external-archive></external-archive>
116     
117      <!-- Full path to the root location where reference genomes are located -->
118      <!-- Do not include name of project -->
119      <reference-folder>/reference</reference-folder>
120     
121      <!-- Information about programs used by reggie -->
122      <!-- Unless otherwise noted, all paths must be the same on all nodes -->
123      <programs>
124        <java>
125          <!-- full path to java binary to use (1.8 is required by GATK!) -->
126          <path>/usr/local/packages/jre/8.0_144/bin/java</path>
127        </java>
128        <pipeline-scripts>
129          <!-- folder where the pipeline scripts are located (required). -->
130          <path>/home/scanb/lorry-pipeline/pipeline-2.16</path>
131        </pipeline-scripts>
132        <picard>
133          <!-- full path to the directory with Picard jar files (required) -->
134          <path>/usr/local/packages/picard-tools/2.20.8</path>
135        </picard>
136        <genseq>
137          <!-- full path to the genseq_check_illumina_dir.pl script (required) -->
138          <path>/usr/local/packages/genseq_tools/v0.01/genseq_check_illumina_dir.pl</path>
139        </genseq>
140        <trimmomatic>
141          <!-- full path to the JAR file with the Trimmomatic program (required) -->
142          <path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</path>
143          <!-- full path to the file with Illumina adapter information -->
144          <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
145        </trimmomatic>
146        <bowtie2>
147          <!-- full or partial path to bowtie2 (required) -->
148          <path>/usr/local/packages/bowtie/2.2.4/bin/bowtie2</path>
149        </bowtie2>
150        <tophat>
151          <!-- full or partial path to tophat (required) -->
152          <path>/usr/local/packages/tophat/2.0.12/bin/tophat</path>
153        </tophat>
154        <hisat>
155          <!-- full or partial path to hisat (required) -->
156          <path>/usr/local/packages/hisat/2.1.0/bin/hisat2</path>
157        </hisat>
158        <samtools>
159          <!-- full or partial path to samtools (required) -->
160          <path>/usr/local/packages/samtools/1.4/samtools</path>
161        </samtools>
162        <bedtools>
163          <!-- full or partial path to bedtools (required) -->
164          <path>/usr/local/packages/bedtools/2.26.0/bin/bedtools</path>
165        </bedtools>
166        <cufflinks>
167          <!-- full or partial path to cufflinks (required) -->
168          <path>/usr/local/packages/cufflinks/2.2.1/bin/cufflinks</path>
169        </cufflinks>
170        <stringtie>
171          <!-- full or partial path to stringtie (required) -->
172          <path>/usr/local/packages/stringtie/1.3.3b/bin/stringtie</path>
173        </stringtie>
174        <gatk>
175          <!-- full path to GenomeAnalysisToolkit JAR file (required) -->
176          <path>/usr/local/packages/GenomeAnalysisTK/3.8/GenomeAnalysisTK.jar</path>
177        </gatk>
178        <mosdepth>
179          <!-- full or partial path to mosdepth (required) -->
180          <path>/usr/local/packages/mosdepth/0.2.6/bin/mosdepth</path>
181        </mosdepth>
182        <vardict>
183          <!-- path to the directory where VarDict is installed -->
184          <!-- NOTE! not including the 'bin/VarDict' part since -->
185          <!-- that will be added automatically -->
186          <path>/usr/local/packages/vardict/1.6.0</path>
187        </vardict>
188        <vcfanno>
189          <!-- full or partial path to vcfanno (required) -->
190          <path>/usr/local/packages/vcfanno/0.3.2/bin/vcfanno</path>
191        </vcfanno>
192        <snpeff>
193          <!-- full path to the snpEff.jar file (required) -->
194          <path>/usr/local/packages/snpeff/4.3s/snpEff.jar</path>
195        </snpeff>
196        <snpsift>
197          <!-- full path to the SnpSift.jar file (required) -->
198          <path>/usr/local/packages/snpeff/4.3s/SnpSift.jar</path>
199        </snpsift>
200        <fgbio>
201          <!-- full path to the fgbio.jar file (required) -->
202          <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path>
203        </fgbio>
204        <novoalign>
205          <!-- full path to the novoalign file (required) -->
206          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path>
207        </novoalign>
208        <novosort>
209          <!-- full path to the novosort file (required) -->
210          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path>
211        </novosort>
212      </programs>
213     
214      <!-- priority values that are selectable in the web interface -->
215      <!-- allowed range is -1023 to 1024 -->
216      <!-- NOTE! positive values require special permissions on the cluster -->
217      <priorities>
218        <!-- <priority name="high" value="500" /> -->
219        <priority name="normal" value="0" default="true" />
220        <priority name="low" value="-500" />
221      </priorities>
222     
223      <!-- settings for the demuxing step (RNAseq) -->
224      <demux>
225        <!-- parallel environment option to the queue system -->
226        <!-- the default setting requests 4 slots -->
227        <parallel-environment>smp 4-4</parallel-environment>
228        <!-- Number of open files to set with 'ulimit -n' command -->
229        <!-- if not specified, the default on the server is used -->
230        <ulimit></ulimit>
231        <!-- amount of memory to give to Picard (default is 50g)-->
232        <picard-memory>50g</picard-memory>
233        <!-- static options for the picard ExtractIlluminaBarcodes step -->
234        <extract-options>-QUIET true -VERBOSITY WARNING</extract-options>
235        <!-- static options for the picard IlluminaBasecallsToFastq step -->
236        <fastq-options>-INCLUDE_NON_PF_READS false -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
237        <!-- number of tiles to process when debugging (default=2 (HiSeq), 16 (NextSeq)) -->
238        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
239        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
240        <!-- static options for Bowtie when used for estimating fragment size -->
241        <bowtie-options>-q --fr -k 1 --phred33 --local --no-hd --no-unal -t -u 100000</bowtie-options>
242        <!-- the smallest number of fragments that must have been used in the fragment -->
243        <!-- size estimation, or we will set FragmentSizeAvg and FragmentSizeStdev to -1 -->
244        <bowtie-fragment-count-limit>20000</bowtie-fragment-count-limit>
245        <!-- static options for Trimmomatic -->
246        <trimmomatic-options>
247          <!-- The first step should ONLY filter Illumina adapters-->
248          <step-1>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true MINLEN:20</step-1>
249          <!-- The second step is for all other filters -->
250          <step-2>MAXINFO:40:0.9 MINLEN:20</step-2>
251        </trimmomatic-options>
252        <!-- static options for gzip compression with pigz (default=-5) -->
253        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
254        <pigz-options>-5</pigz-options>
255      </demux>
256 
257      <!-- settings for the demuxing step (MIPs) -->
258      <demux-mips>
259        <!-- parallel environment option to the queue system -->
260        <!-- the default setting requests 8-16 slots -->
261        <parallel-environment>smp 8-16</parallel-environment>
262        <!-- amount of memory to give to Picard (default is 50g)-->
263        <picard-memory>50g</picard-memory>
264        <!-- static options for the picard ExtractIlluminaBarcodes step -->
265        <extract-options>-MINIMUM_BASE_QUALITY 0 -MINIMUM_QUALITY 2 -MAX_MISMATCHES 2 -MIN_MISMATCH_DELTA 2 -MAX_NO_CALLS 2 -QUIET true -VERBOSITY WARNING</extract-options>
266        <!-- static options for the picard IlluminaBasecallsToFastq step -->
267        <fastq-options>-INCLUDE_NON_PF_READS false -APPLY_EAMSS_FILTER false -MINIMUM_QUALITY 2 -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
268        <!-- static options to put into the "Read group" files -->
269        <readgroup-options>PL=ILLUMINA CN=BRCAlab</readgroup-options>
270        <!-- number of tiles to process when debugging (default=2 (HiSeq), 16 (NextSeq)) -->
271        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
272        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
273        <!-- static options for gzip compression with pigz (default=-5) -->
274        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
275        <pigz-options>-5</pigz-options>
276      </demux-mips>
277 
278      <mask>
279        <!-- relative path from <reference-folder> to the reference genome used for masking -->
280        <!-- This is the -x option used for bowtie -->
281        <reference-name>scanb/ribo_phix_repeats_filter/ribo_phix_repeats_filter</reference-name>
282       
283        <!-- static options for bowtie -->
284        <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
285       
286        <!-- max number of sequences to align when running in debug mode (default=2 millions)-->
287        <debug-max-align>2000000</debug-max-align>
288      </mask>
289 
290      <align>
291        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
292        <!-- TODO selectable in GUI? saved as annotation? -->
293        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet</reference-gidx>
294        <reference-tidx>hg38/UCSC_hg38_knownGenes_22sep2014/knownGenes.vs.hg38.analysisSet</reference-tidx>
295       
296        <!-- static options for tophat -->
297        <tophat-options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search --max-insertion-length 20 --max-deletion-length 20 --read-gap-length 20 --read-edit-dist 22</tophat-options>
298        <!-- adjustment values for the 'mate-inner-dist' and 'mate-std-dev' -->
299        <!-- parameters to tophat. The specified values are added to those -->
300        <!-- calculated by bowtie -->
301        <adjust-mate-inner-dist>13</adjust-mate-inner-dist>
302        <adjust-mate-std-dev>10</adjust-mate-std-dev>
303       
304        <!-- static options for the picard MarkDuplicates step -->
305        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
306      </align>
307     
308      <!-- settings for aligning with Hisat -->
309      <align-hisat>
310        <!-- parallel environment option to the queue system -->
311        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
312        <parallel-environment>smp 8-16</parallel-environment>
313       
314        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
315        <reference-tidx>hg38/hg38.analysisSet_gencode27_snp150/genome_snp_tran</reference-tidx>
316       
317        <!-- static options for hisat -->
318        <hisat-options>-q --fr --phred33 -t --dta --dta-cufflink --new-summary --no-unal --non-deterministic --novel-splicesite-outfile aligned/splicesites.tsv --rna-strandness RF --summary-file aligned/summary.txt --rg PL:Illumina --rg CN:SCANB-prim</hisat-options>
319       
320        <!-- static options for the picard MarkDuplicates step -->
321        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
322       
323        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
324        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
325       
326        <!-- relative path from <reference-folder> to VCF file with SNP that we should look for -->
327        <haplotypecaller-dbsnp>scanb/genotyping-213-snp_feb2018.vcf</haplotypecaller-dbsnp>
328       
329        <!-- static options for the HaplotypeCaller step -->
330        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --annotation AlleleBalance --no_cmdline_in_header</haplotypecaller-options>
331      </align-hisat>
332     
333      <!-- settings for aligning MIPs sequencing -->
334      <align-mips>
335        <!-- parallel environment option to the queue system -->
336        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
337        <parallel-environment>smp 8-16</parallel-environment>
338       
339        <!-- Options for Trimmomatic -->
340        <trimmomatic>
341          <!-- Optional path to Trimmomatic, if not specified the default in the 'programs' section is used -->
342          <path>/usr/local/packages/trimmomatic/0.39/trimmomatic.jar</path>
343          <!-- The first step should filter Illumina adapters-->
344          <step-1>ILLUMINACLIP:adapter.fa:3:12:7:1:true MINLEN:30</step-1>
345          <!-- The second step is for all other filters -->
346          <step-2>MAXINFO:30:0.25 MINLEN:30</step-2>
347        </trimmomatic>
348       
349        <amplicons>
350          <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir>
351          <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
352          <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed>
353          <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
354          <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed>
355          <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed>
356        </amplicons>
357       
358        <novoalign>
359          <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index>
360          <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options>
361        </novoalign>
362       
363        <genome-dict>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.dict</genome-dict>
364        <genome-fasta>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.fasta</genome-fasta>
365       
366        <mark-duplicate-options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicate-options>
367       
368      </align-mips>
369     
370      <mbaf>
371        <!-- parallel environment option to the queue system -->
372        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
373        <parallel-environment>smp 8-16</parallel-environment>
374       
375        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
376        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
377        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
378       
379        <!-- relative path from <reference-folder> to VCF file with SNP:s that we should look for -->
380        <haplotypecaller-dbsnp>scanb/genotyping-mbaf-snp_oct2018.vcf</haplotypecaller-dbsnp>
381       
382        <!-- static options for the HaplotypeCaller step -->
383        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --no_cmdline_in_header</haplotypecaller-options>
384      </mbaf>
385     
386      <!-- settings for variant calling -->
387      <variant-call>
388        <!-- parallel environment option to the queue system -->
389        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
390        <parallel-environment>smp 8-16</parallel-environment>
391
392        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
393        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
394        <genome-fasta>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</genome-fasta>
395       
396        <!-- Full path to base directory with databases and other stuff needed by the pipeline -->
397        <!-- This value can be used in other options as ${BaseDir} -->
398        <base-dir>${ReferenceDir}/scanb/rnaseqvarcall-feb2020</base-dir>
399       
400        <!-- static options for 'mosdepth' for regular and debug modes (optional) -->
401        <mosdepth-options></mosdepth-options>
402        <mosdepth-options-debug>-c chr6</mosdepth-options-debug>
403       
404        <!-- the required depth for a base to be callable for variants (optional, default=5) -->
405        <min-depth>5</min-depth>
406       
407        <!-- static options for VarDict (required) -->
408        <vardict-options>-f 0.02 -c 1 -S 2 -E 3 -g 4 -Q 20 -r 2 -q 20 --nosv</vardict-options>
409       
410        <!-- static options for var2vcf_valid.pl (required) -->
411        <var2vcf-options>-A -f 0.02</var2vcf-options>
412       
413        <!--static options for vcfanno command line (required) -->
414        <!-- See https://github.com/brentp/vcfanno for more information -->
415        <vcfanno-options>-p 8 -lua ${BaseDir}/vcfanno.lua -base-path ${BaseDir} ${BaseDir}/allDbs.toml</vcfanno-options>
416       
417        <!-- static options for the snpEff command (required) -->
418        <snpeff-options>-configOption data.dir=${BaseDir}/snpEff_v4_3_hg38/data -noLog -noStats -canon hg38</snpeff-options>
419
420        <!-- static options for the SnpSift command (required) -->
421        <snpsift-options>-s ${BaseDir}/rna_chr_set.txt -s ${BaseDir}/intogen-BRCA-genes-list_patch.txt -e ${BaseDir}/filter_expression.txt</snpsift-options>
422       
423        <!-- path to the COSMIC mutation signature data -->
424        <mutation-signature>${BaseDir}/COSMIC_Cancer_signatures_probabilities.RData</mutation-signature>
425      </variant-call>
426     
427      <cufflinks>
428        <!-- parallel environment option to the queue system -->
429        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
430        <parallel-environment>smp 8-16</parallel-environment>
431 
432        <!-- relative path from <reference-folder> to the reference genome used by cufflinks -->
433        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet.fa</reference-gidx>
434        <reference-gtf>hg38/UCSC_hg38_knownGenes_22sep2014.gtf</reference-gtf>
435       
436        <!-- static options for cufflinks -->
437        <options>--multi-read-correct --library-type fr-firststrand --total-hits-norm --max-bundle-frags 10000000 --no-update-check --quiet</options>
438       
439        <!-- if the aligned sequences item has more reads than this limit (when running in debug mode) -->
440        <!-- the accepted_hits.bam will be  limited to chr1 before running cufflinks -->
441        <debug-max-aligned>2000000</debug-max-aligned>
442       
443        <!-- path to a file containing pairs of tracking_id values -->
444        <!-- *.fpkm_tracking files are searched and values from the -->
445        <!-- second column are replaced with values in the first column -->
446        <!-- If no mapping file is specified, no replacement is done -->
447        <tracking-id-map>hg38/UCSC_hg38_knownGenes_22sep2014_duplicate_transcript_id.txt</tracking-id-map>
448      </cufflinks>
449     
450      <stringtie>
451        <!-- parallel environment option to the queue system -->
452        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
453        <parallel-environment>smp 8-16</parallel-environment>
454       
455        <!-- relative path from <reference-folder> to the reference genome used by stringtie -->
456        <reference-gtf>hg38/hg38.analysisSet_gencode27_snp150/gencode.v27.primary_assembly.annotation_subset_transcripttype_proteincoding.gtf</reference-gtf>
457
458        <!-- static options for stringtie -->
459        <options>--rf -B -e</options>
460
461      </stringtie>
462    </host>
463 
464   
465  </remote-hosts>
466
467</reggie>
Note: See TracBrowser for help on using the repository browser.