source: extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml @ 5865

Last change on this file since 5865 was 5865, checked in by Nicklas Nordborg, 23 months ago

References #1231: Add support for sequencing with NovaSeq?

Demuxing NovaSeq? data should now work.

Introduced the BarcodeSet annotation to be used on barcodes for grouping possible barcodes that belong together. The main reason for this is so that we can output a logical set of barcodes for the UNUSED tag when demuxing to help us catch errors with incorrectly barcoded libraries.

There are currently two possible values for the BarcodeSet annotation on the RNAseq pipeline:

  • TruSeqSingle: Used by the regular RNA-seq pipeline thas is sequenced on a NextSeq
  • TruSeqUniqueDual: Used by the "external" pipeline that is sequenced on the a NovaSeq


The MIPs pipline currently doesn't need this annotation.

File size: 24.4 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<reggie>
3
4  <!-- Section for enabling/disabling experimental features -->
5  <!-- The list of feature that are considered experimental may change over time -->
6  <!-- 0=The feature is disabled, 1=The feature is enabled -->
7  <experimental-features>
8  </experimental-features>
9 
10  <!-- Configuration options related to how external samples (RNA or DNA) are handled -->
11  <external-samples>
12    <!-- Files generated in the secondary analysis can be shared with read permission to -->
13    <!-- a group if this is specified here. The prefix attribute is the sample name prefix -->
14    <!-- and the value is the group name. This translates to a 'chgrp' command in the secondary -->
15    <!-- analysis. Samples with a prefix that is not mapped here are not shared to other groups. -->
16    <!--  <groupname prefix="BR">brcalab</groupname> -->
17  </external-samples>
18
19  <!-- Settings for the Activity log that is displayed on the Reggie start page -->
20  <activity-log>
21    <!-- Max number of entries to display in the log (exception: all events within the last two days are always displayed) -->
22    <max-entries>35</max-entries>
23    <!-- Max age (in days) of entries to display (even if the max number hasn't been reached) -->
24    <max-age-in-days>14</max-age-in-days>
25    <quote-of-the-day>
26      <!-- URL to quote-of-the-day endpoint (optional, set an empty URL to disable this feature) -->
27      <url>https://quotes.rest/qod.json</url>
28      <!-- Default is 12 hours; do not set to less than 3600 since the external API has a limit -->
29      <max-age-in-seconds>43200</max-age-in-seconds>
30    </quote-of-the-day>
31  </activity-log>
32
33  <!-- Options related to R that is executed on the local server -->
34  <rscript>
35    <!-- Full or partial path to 'Rscript' executable -->
36    <path>Rscript</path>
37    <!-- Set the locale to use when running R -->
38    <!-- If not set, use whatever locale the operating system provides -->
39    <locale>en_US.UTF-8</locale>
40   
41    <!-- options for the 'geneReport' script -->
42    <gene-report>
43      <!-- full path to the R script -->
44      <path>/path/to/R_RNAseq_scanb_geneReport.R</path>
45      <!-- full path to directory with SCAN-B reference data -->
46      <!-- default is same directory as the R script -->
47      <ref-dir-scanb></ref-dir-scanb>
48      <!-- full path to directory with validation reference data -->
49      <!-- default is same directory as the R script -->
50      <ref-dir-validation></ref-dir-validation>
51      <!-- full path to the PDF template -->
52      <!-- default is 'template.pdf' in the same directory as the R script -->
53      <template></template>
54      <!-- file name in BASE for storing the generated report  -->
55      <pdf-name>genereport.pdf</pdf-name>
56    </gene-report>
57   
58    <!-- options for the 'pilot report' script -->
59    <pilot-report>
60      <!-- full path to the R script -->
61      <path>/path/to/pilot-report.R</path>
62      <!-- full path to directory with reference data -->
63      <!-- default is 'referenceData' directory inside -->
64      <!-- the same directory as the R script -->
65      <ref-dir></ref-dir>
66      <!-- full path to directory with source code -->
67      <!-- default is 'source' directory inside -->
68      <!-- the same directory as the R script -->
69      <source-dir></source-dir>
70      <!-- full path to the PDF template -->
71      <!-- default is 'template.pdf' in the same directory as the R script -->
72      <template></template>
73      <!-- file name in BASE for storing the generated report  -->
74      <pdf-name>pilotreport.pdf</pdf-name>
75    </pilot-report>
76   
77  </rscript>
78
79  <!-- Logotype information for the different sites -->
80  <!-- Uncomment as needed and set full path to image file -->
81  <!-- Supported file formats: WMF, PNG, JPG (and possible more) -->
82  <logos>
83    <!-- <region-skåne></region-skåne>  -->
84    <!-- <landstinget-kronoberg></landstinget-kronoberg>  -->
85    <!-- <uppsala-landsting></uppsala-landsting>  -->
86    <!-- <region-halland></region-halland>  -->
87    <!-- <landstinget-blekinge></landstinget-blekinge>  -->
88    <!-- <jönköpings-län></jönköpings-län>  -->
89  </logos>
90
91  <remote-hosts>
92    <!-- one or more hosts entries. Each entry should match an -->
93    <!-- entry in the opengrid-config.xml. The 'ID' of an Open Grid cluster -->
94    <!-- is a combination of the username, address and port: user@host:port -->
95    <!-- A comma-separated list is allowed -->
96    <!-- Note that the default port number (22) must be included in the ID  -->
97    <!-- even if it is not specified in the opengrid-config.xml file. -->
98 
99    <host 
100      id="user@address:port in opengrid-config.xml (one or more separated by comma)"
101      >
102     
103      <!-- full path to the location where HiSeq/NextSeq data is stored (required) -->
104      <run-archive>/casa2/run_archive</run-archive>
105      <!-- Alternate paths in search order in case data is not found in the primary -->
106      <!-- run archive. Add more entries as needed, but it is important that they -->
107      <!-- are numbered in strictly increasing order from '2' and up. -->
108      <run-archive-2></run-archive-2>
109     
110      <!-- Full path to the location where data files should be archived (required) -->
111      <!-- The path should include the name of the project -->
112      <project-archive>/casa4/project_archive/scanb</project-archive>
113      <!-- Full path to the location where external data files should be archive (optional) -->
114      <!-- If not specified, the 'project-archive' path is used -->
115      <external-archive></external-archive>
116     
117      <!-- Full path to the root location where reference genomes are located -->
118      <!-- Do not include name of project -->
119      <reference-folder>/reference</reference-folder>
120     
121      <!-- Information about programs used by reggie -->
122      <!-- Unless otherwise noted, all paths must be the same on all nodes -->
123      <programs>
124        <java>
125          <!-- full path to java binary to use (1.8 is required by GATK!) -->
126          <path>/usr/local/packages/jre/8.0_144/bin/java</path>
127        </java>
128        <pipeline-scripts>
129          <!-- folder where the pipeline scripts are located (required). -->
130          <path>/home/scanb/lorry-pipeline/pipeline-2.16</path>
131        </pipeline-scripts>
132        <picard>
133          <!-- full path to the directory with Picard jar files (required) -->
134          <path>/usr/local/packages/picard-tools/2.20.8</path>
135        </picard>
136        <genseq>
137          <!-- full path to the genseq_check_illumina_dir.pl script (required) -->
138          <path>/usr/local/packages/genseq_tools/v0.01/genseq_check_illumina_dir.pl</path>
139        </genseq>
140        <trimmomatic>
141          <!-- full path to the JAR file with the Trimmomatic program (required) -->
142          <path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</path>
143          <!-- full path to the file with Illumina adapter information -->
144          <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
145        </trimmomatic>
146        <bowtie2>
147          <!-- full or partial path to bowtie2 (required) -->
148          <path>/usr/local/packages/bowtie/2.2.4/bin/bowtie2</path>
149        </bowtie2>
150        <tophat>
151          <!-- full or partial path to tophat (required) -->
152          <path>/usr/local/packages/tophat/2.0.12/bin/tophat</path>
153        </tophat>
154        <hisat>
155          <!-- full or partial path to hisat (required) -->
156          <path>/usr/local/packages/hisat/2.1.0/bin/hisat2</path>
157        </hisat>
158        <samtools>
159          <!-- full or partial path to samtools (required) -->
160          <path>/usr/local/packages/samtools/1.4/samtools</path>
161        </samtools>
162        <bedtools>
163          <!-- full or partial path to bedtools (required) -->
164          <path>/usr/local/packages/bedtools/2.26.0/bin/bedtools</path>
165        </bedtools>
166        <cufflinks>
167          <!-- full or partial path to cufflinks (required) -->
168          <path>/usr/local/packages/cufflinks/2.2.1/bin/cufflinks</path>
169        </cufflinks>
170        <stringtie>
171          <!-- full or partial path to stringtie (required) -->
172          <path>/usr/local/packages/stringtie/1.3.3b/bin/stringtie</path>
173        </stringtie>
174        <gatk>
175          <!-- full path to GenomeAnalysisToolkit JAR file (required) -->
176          <path>/usr/local/packages/GenomeAnalysisTK/3.8/GenomeAnalysisTK.jar</path>
177        </gatk>
178        <mosdepth>
179          <!-- full or partial path to mosdepth (required) -->
180          <path>/usr/local/packages/mosdepth/0.2.6/bin/mosdepth</path>
181        </mosdepth>
182        <vardict>
183          <!-- path to the directory where VarDict is installed -->
184          <!-- NOTE! not including the 'bin/VarDict' part since -->
185          <!-- that will be added automatically -->
186          <path>/usr/local/packages/vardict/1.6.0</path>
187        </vardict>
188        <vcfanno>
189          <!-- full or partial path to vcfanno (required) -->
190          <path>/usr/local/packages/vcfanno/0.3.2/bin/vcfanno</path>
191        </vcfanno>
192        <snpeff>
193          <!-- full path to the snpEff.jar file (required) -->
194          <path>/usr/local/packages/snpeff/4.3s/snpEff.jar</path>
195        </snpeff>
196        <snpsift>
197          <!-- full path to the SnpSift.jar file (required) -->
198          <path>/usr/local/packages/snpeff/4.3s/SnpSift.jar</path>
199        </snpsift>
200        <fgbio>
201          <!-- full path to the fgbio.jar file (required) -->
202          <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path>
203        </fgbio>
204        <novoalign>
205          <!-- full path to the novoalign file (required) -->
206          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path>
207        </novoalign>
208        <novosort>
209          <!-- full path to the novosort file (required) -->
210          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path>
211        </novosort>
212      </programs>
213     
214      <!-- priority values that are selectable in the web interface -->
215      <!-- allowed range is -1023 to 1024 -->
216      <!-- NOTE! positive values require special permissions on the cluster -->
217      <priorities>
218        <!-- <priority name="high" value="500" /> -->
219        <priority name="normal" value="0" default="true" />
220        <priority name="low" value="-500" />
221      </priorities>
222     
223      <!-- settings for the demuxing step (RNAseq) -->
224      <demux>
225        <!-- parallel environment option to the queue system -->
226        <!-- the default setting requests 4 slots -->
227        <parallel-environment>smp 4-4</parallel-environment>
228        <!-- Number of open files to set with 'ulimit -n' command -->
229        <!-- if not specified, the default on the server is used -->
230        <ulimit></ulimit>
231        <!-- amount of memory to give to Picard (default is 50g)-->
232        <picard-memory>50g</picard-memory>
233        <!-- static options for the picard ExtractIlluminaBarcodes step -->
234        <extract-options>-QUIET true -VERBOSITY WARNING</extract-options>
235        <!-- static options for the picard IlluminaBasecallsToFastq step -->
236        <fastq-options>-INCLUDE_NON_PF_READS false -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
237        <!-- number of tiles to process when debugging (default=2 (HiSeq, NovaSeq), 16 (NextSeq)) -->
238        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
239        <debug-tile-limit-novaseq>2</debug-tile-limit-novaseq>
240        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
241        <!-- static options for Bowtie when used for estimating fragment size -->
242        <bowtie-options>-q --fr -k 1 --phred33 --local --no-hd --no-unal -t -u 100000</bowtie-options>
243        <!-- the smallest number of fragments that must have been used in the fragment -->
244        <!-- size estimation, or we will set FragmentSizeAvg and FragmentSizeStdev to -1 -->
245        <bowtie-fragment-count-limit>20000</bowtie-fragment-count-limit>
246        <!-- static options for Trimmomatic -->
247        <trimmomatic-options>
248          <!-- The first step should ONLY filter Illumina adapters-->
249          <step-1>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true MINLEN:20</step-1>
250          <!-- The second step is for all other filters -->
251          <step-2>MAXINFO:40:0.9 MINLEN:20</step-2>
252        </trimmomatic-options>
253        <!-- static options for gzip compression with pigz (default=-5) -->
254        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
255        <pigz-options>-5</pigz-options>
256      </demux>
257 
258      <!-- settings for the demuxing step (MIPs) -->
259      <demux-mips>
260        <!-- parallel environment option to the queue system -->
261        <!-- the default setting requests 8-16 slots -->
262        <parallel-environment>smp 8-16</parallel-environment>
263        <!-- amount of memory to give to Picard (default is 50g)-->
264        <picard-memory>50g</picard-memory>
265        <!-- static options for the picard ExtractIlluminaBarcodes step -->
266        <extract-options>-MINIMUM_BASE_QUALITY 0 -MINIMUM_QUALITY 2 -MAX_MISMATCHES 2 -MIN_MISMATCH_DELTA 2 -MAX_NO_CALLS 2 -QUIET true -VERBOSITY WARNING</extract-options>
267        <!-- static options for the picard IlluminaBasecallsToFastq step -->
268        <fastq-options>-INCLUDE_NON_PF_READS false -APPLY_EAMSS_FILTER false -MINIMUM_QUALITY 2 -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
269        <!-- static options to put into the "Read group" files -->
270        <readgroup-options>PL=ILLUMINA CN=BRCAlab</readgroup-options>
271        <!-- number of tiles to process when debugging (default=2 (HiSeq), 16 (NextSeq)) -->
272        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
273        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
274        <!-- static options for gzip compression with pigz (default=-5) -->
275        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
276        <pigz-options>-5</pigz-options>
277      </demux-mips>
278 
279      <mask>
280        <!-- relative path from <reference-folder> to the reference genome used for masking -->
281        <!-- This is the -x option used for bowtie -->
282        <reference-name>scanb/ribo_phix_repeats_filter/ribo_phix_repeats_filter</reference-name>
283       
284        <!-- static options for bowtie -->
285        <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
286       
287        <!-- max number of sequences to align when running in debug mode (default=2 millions)-->
288        <debug-max-align>2000000</debug-max-align>
289      </mask>
290 
291      <align>
292        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
293        <!-- TODO selectable in GUI? saved as annotation? -->
294        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet</reference-gidx>
295        <reference-tidx>hg38/UCSC_hg38_knownGenes_22sep2014/knownGenes.vs.hg38.analysisSet</reference-tidx>
296       
297        <!-- static options for tophat -->
298        <tophat-options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search --max-insertion-length 20 --max-deletion-length 20 --read-gap-length 20 --read-edit-dist 22</tophat-options>
299        <!-- adjustment values for the 'mate-inner-dist' and 'mate-std-dev' -->
300        <!-- parameters to tophat. The specified values are added to those -->
301        <!-- calculated by bowtie -->
302        <adjust-mate-inner-dist>13</adjust-mate-inner-dist>
303        <adjust-mate-std-dev>10</adjust-mate-std-dev>
304       
305        <!-- static options for the picard MarkDuplicates step -->
306        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
307      </align>
308     
309      <!-- settings for aligning with Hisat -->
310      <align-hisat>
311        <!-- parallel environment option to the queue system -->
312        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
313        <parallel-environment>smp 8-16</parallel-environment>
314       
315        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
316        <reference-tidx>hg38/hg38.analysisSet_gencode27_snp150/genome_snp_tran</reference-tidx>
317       
318        <!-- static options for hisat -->
319        <hisat-options>-q --fr --phred33 -t --dta --dta-cufflink --new-summary --no-unal --non-deterministic --novel-splicesite-outfile aligned/splicesites.tsv --rna-strandness RF --summary-file aligned/summary.txt --rg PL:Illumina --rg CN:SCANB-prim</hisat-options>
320       
321        <!-- static options for the picard MarkDuplicates step -->
322        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
323       
324        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
325        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
326       
327        <!-- relative path from <reference-folder> to VCF file with SNP that we should look for -->
328        <haplotypecaller-dbsnp>scanb/genotyping-213-snp_feb2018.vcf</haplotypecaller-dbsnp>
329       
330        <!-- static options for the HaplotypeCaller step -->
331        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --annotation AlleleBalance --no_cmdline_in_header</haplotypecaller-options>
332      </align-hisat>
333     
334      <!-- settings for aligning MIPs sequencing -->
335      <align-mips>
336        <!-- parallel environment option to the queue system -->
337        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
338        <parallel-environment>smp 8-16</parallel-environment>
339       
340        <!-- Options for Trimmomatic -->
341        <trimmomatic>
342          <!-- Optional path to Trimmomatic, if not specified the default in the 'programs' section is used -->
343          <path>/usr/local/packages/trimmomatic/0.39/trimmomatic.jar</path>
344          <!-- The first step should filter Illumina adapters-->
345          <step-1>ILLUMINACLIP:adapter.fa:3:12:7:1:true MINLEN:30</step-1>
346          <!-- The second step is for all other filters -->
347          <step-2>MAXINFO:30:0.25 MINLEN:30</step-2>
348        </trimmomatic>
349       
350        <!-- The amplicons BED files are used by novoalign -->
351        <!-- The main-dir setting should point to a directory with the BED files -->
352        <!-- Which BED file to use is determined by matching the 'panel' attribute with -->
353        <!-- the MIPS_Panel annotation from the DNA item. If no match is found the -->
354        <!-- alignment will fail. -->
355        <amplicons>
356          <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir>
357          <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
358          <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed>
359          <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
360          <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed>
361          <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed>
362        </amplicons>
363       
364        <!-- Parameters that are needed by novoalign -->
365        <novoalign>
366          <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index>
367          <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options>
368        </novoalign>
369       
370        <!-- The following genome references are needed to extract some metrics after the alignment -->
371        <genome-dict>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.dict</genome-dict>
372        <genome-fasta>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.fasta</genome-fasta>
373       
374        <!-- Options for the picard UmiAwareMarkDuplicatesWithMateCigar step -->
375        <mark-duplicates-options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicates-options>
376
377        <!-- Options for the picard CollectTargetedPcrMetrics step -->
378        <pcr-metrics-options>-METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL ALL_READS -MINIMUM_MAPPING_QUALITY 1 -MINIMUM_BASE_QUALITY 20 -CLIP_OVERLAPPING_READS true -COVERAGE_CAP 500 -NEAR_DISTANCE 5</pcr-metrics-options>
379      </align-mips>
380     
381      <mbaf>
382        <!-- parallel environment option to the queue system -->
383        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
384        <parallel-environment>smp 8-16</parallel-environment>
385       
386        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
387        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
388        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
389       
390        <!-- relative path from <reference-folder> to VCF file with SNP:s that we should look for -->
391        <haplotypecaller-dbsnp>scanb/genotyping-mbaf-snp_oct2018.vcf</haplotypecaller-dbsnp>
392       
393        <!-- static options for the HaplotypeCaller step -->
394        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --no_cmdline_in_header</haplotypecaller-options>
395      </mbaf>
396     
397      <!-- settings for variant calling -->
398      <variant-call>
399        <!-- parallel environment option to the queue system -->
400        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
401        <parallel-environment>smp 8-16</parallel-environment>
402
403        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
404        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
405        <genome-fasta>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</genome-fasta>
406       
407        <!-- Full path to base directory with databases and other stuff needed by the pipeline -->
408        <!-- This value can be used in other options as ${BaseDir} -->
409        <base-dir>${ReferenceDir}/scanb/rnaseqvarcall-feb2020</base-dir>
410       
411        <!-- static options for 'mosdepth' for regular and debug modes (optional) -->
412        <mosdepth-options></mosdepth-options>
413        <mosdepth-options-debug>-c chr6</mosdepth-options-debug>
414       
415        <!-- the required depth for a base to be callable for variants (optional, default=5) -->
416        <min-depth>5</min-depth>
417       
418        <!-- static options for VarDict (required) -->
419        <vardict-options>-f 0.02 -c 1 -S 2 -E 3 -g 4 -Q 20 -r 2 -q 20 --nosv</vardict-options>
420       
421        <!-- static options for var2vcf_valid.pl (required) -->
422        <var2vcf-options>-A -f 0.02</var2vcf-options>
423       
424        <!--static options for vcfanno command line (required) -->
425        <!-- See https://github.com/brentp/vcfanno for more information -->
426        <vcfanno-options>-p 8 -lua ${BaseDir}/vcfanno.lua -base-path ${BaseDir} ${BaseDir}/allDbs.toml</vcfanno-options>
427       
428        <!-- static options for the snpEff command (required) -->
429        <snpeff-options>-configOption data.dir=${BaseDir}/snpEff_v4_3_hg38/data -noLog -noStats -canon hg38</snpeff-options>
430
431        <!-- static options for the SnpSift command (required) -->
432        <snpsift-options>-s ${BaseDir}/rna_chr_set.txt -s ${BaseDir}/intogen-BRCA-genes-list_patch.txt -e ${BaseDir}/filter_expression.txt</snpsift-options>
433       
434        <!-- path to the COSMIC mutation signature data -->
435        <mutation-signature>${BaseDir}/COSMIC_Cancer_signatures_probabilities.RData</mutation-signature>
436      </variant-call>
437     
438      <cufflinks>
439        <!-- parallel environment option to the queue system -->
440        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
441        <parallel-environment>smp 8-16</parallel-environment>
442 
443        <!-- relative path from <reference-folder> to the reference genome used by cufflinks -->
444        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet.fa</reference-gidx>
445        <reference-gtf>hg38/UCSC_hg38_knownGenes_22sep2014.gtf</reference-gtf>
446       
447        <!-- static options for cufflinks -->
448        <options>--multi-read-correct --library-type fr-firststrand --total-hits-norm --max-bundle-frags 10000000 --no-update-check --quiet</options>
449       
450        <!-- if the aligned sequences item has more reads than this limit (when running in debug mode) -->
451        <!-- the accepted_hits.bam will be  limited to chr1 before running cufflinks -->
452        <debug-max-aligned>2000000</debug-max-aligned>
453       
454        <!-- path to a file containing pairs of tracking_id values -->
455        <!-- *.fpkm_tracking files are searched and values from the -->
456        <!-- second column are replaced with values in the first column -->
457        <!-- If no mapping file is specified, no replacement is done -->
458        <tracking-id-map>hg38/UCSC_hg38_knownGenes_22sep2014_duplicate_transcript_id.txt</tracking-id-map>
459      </cufflinks>
460     
461      <stringtie>
462        <!-- parallel environment option to the queue system -->
463        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
464        <parallel-environment>smp 8-16</parallel-environment>
465       
466        <!-- relative path from <reference-folder> to the reference genome used by stringtie -->
467        <reference-gtf>hg38/hg38.analysisSet_gencode27_snp150/gencode.v27.primary_assembly.annotation_subset_transcripttype_proteincoding.gtf</reference-gtf>
468
469        <!-- static options for stringtie -->
470        <options>--rf -B -e</options>
471
472      </stringtie>
473    </host>
474 
475   
476  </remote-hosts>
477
478</reggie>
Note: See TracBrowser for help on using the repository browser.