source: extensions/net.sf.basedb.reggie/branches/4.27-stable/config/reggie-config.xml @ 5953

Last change on this file since 5953 was 5953, checked in by Nicklas Nordborg, 3 years ago

Fixes #1248: Translations for SSP plug-in should be configured in reggie-config.xml

File size: 25.8 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<reggie>
3
4  <!-- Section for enabling/disabling experimental features -->
5  <!-- The list of feature that are considered experimental may change over time -->
6  <!-- 0=The feature is disabled, 1=The feature is enabled -->
7  <experimental-features>
8  </experimental-features>
9 
10  <!-- Configuration options related to how external samples (RNA or DNA) are handled -->
11  <external-samples>
12    <!-- Files generated in the secondary analysis can be shared with read permission to -->
13    <!-- a group if this is specified here. The prefix attribute is the sample name prefix -->
14    <!-- and the value is the group name. This translates to a 'chgrp' command in the secondary -->
15    <!-- analysis. Samples with a prefix that is not mapped here are not shared to other groups. -->
16    <!--  <groupname prefix="BR">brcalab</groupname> -->
17  </external-samples>
18
19  <!-- Settings for the Activity log that is displayed on the Reggie start page -->
20  <activity-log>
21    <!-- Max number of entries to display in the log (exception: all events within the last two days are always displayed) -->
22    <max-entries>35</max-entries>
23    <!-- Max age (in days) of entries to display (even if the max number hasn't been reached) -->
24    <max-age-in-days>14</max-age-in-days>
25    <quote-of-the-day>
26      <!-- URL to quote-of-the-day endpoint (optional, set an empty URL to disable this feature) -->
27      <url>https://quotes.rest/qod.json</url>
28      <!-- Default is 12 hours; do not set to less than 3600 since the external API has a limit -->
29      <max-age-in-seconds>43200</max-age-in-seconds>
30    </quote-of-the-day>
31  </activity-log>
32
33  <!-- Options related to R that is executed on the local server -->
34  <rscript>
35    <!-- Full or partial path to 'Rscript' executable -->
36    <path>Rscript</path>
37    <!-- Set the locale to use when running R -->
38    <!-- If not set, use whatever locale the operating system provides -->
39    <locale>en_US.UTF-8</locale>
40   
41    <!-- options for the 'geneReport' script -->
42    <gene-report>
43      <!-- full path to the R script -->
44      <path>/path/to/R_RNAseq_scanb_geneReport.R</path>
45      <!-- full path to directory with SCAN-B reference data -->
46      <!-- default is same directory as the R script -->
47      <ref-dir-scanb></ref-dir-scanb>
48      <!-- full path to directory with validation reference data -->
49      <!-- default is same directory as the R script -->
50      <ref-dir-validation></ref-dir-validation>
51      <!-- full path to the PDF template -->
52      <!-- default is 'template.pdf' in the same directory as the R script -->
53      <template></template>
54      <!-- file name in BASE for storing the generated report  -->
55      <pdf-name>genereport.pdf</pdf-name>
56    </gene-report>
57   
58    <!-- options for the 'pilot report' script -->
59    <pilot-report>
60      <!-- full path to the R script -->
61      <path>/path/to/pilot-report.R</path>
62      <!-- full path to directory with reference data -->
63      <!-- default is 'referenceData' directory inside -->
64      <!-- the same directory as the R script -->
65      <ref-dir></ref-dir>
66      <!-- full path to directory with source code -->
67      <!-- default is 'source' directory inside -->
68      <!-- the same directory as the R script -->
69      <source-dir></source-dir>
70      <!-- full path to the PDF template -->
71      <!-- default is 'template.pdf' in the same directory as the R script -->
72      <template></template>
73      <!-- file name in BASE for storing the generated report  -->
74      <pdf-name>pilotreport.pdf</pdf-name>
75    </pilot-report>
76   
77    <ssp>
78      <!-- full path to the directory with SSP scripts (SSP_functions.R, and more...) -->
79      <path>/path/to/ssp-dir</path>
80      <!-- full path to directory with models -->
81      <!-- default is 'models' directory inside -->
82      <!-- the same directory as the R script -->
83      <models-dir></models-dir>
84     
85      <!-- List all models that should be used -->
86      <models>
87        <!-- Each entry should be a filename of the *.RData object representing the model. -->
88        <!-- Each entry should have a 'name' and an associated 'annotation-type'. -->
89        <!-- The 'annotation-type-scores' is optional. If provided it is used to store list with all classes and scores -->
90        <!-- The annotation types need to be created manually. -->
91        <model name="Subtype" annotation-type="SSP_Subtype" annotation-type-scores="SSP_Subtype_Scores">
92          Training_Run19081Genes_noNorm_SSP.subtypeMost.Fcc15_5x5foldCV.num.rules.50_24.selRules.AIMS.GS.RData
93        </model>
94      </models>
95     
96      <!-- Translations that should be applied to results from SSP models before stored as annotations in BASE -->
97      <translations>
98        <text from="Positiv" to="Positive" />
99        <text from="Negativ" to="Negative" />
100        <text from="Hög" to="High" />
101        <text from="Hog" to="High" />
102        <text from="Låg" to="Low" />
103        <text from="Lag" to="Low" />
104        <text from="Grad 1" to="Grade1" />
105        <text from="Grad 2" to="Grade2" />
106        <text from="Grad 3" to="Grade3" />
107      </translations>
108    </ssp>
109   
110  </rscript>
111
112  <!-- Logotype information for the different sites -->
113  <!-- Uncomment as needed and set full path to image file -->
114  <!-- Supported file formats: WMF, PNG, JPG (and possible more) -->
115  <logos>
116    <!-- <region-skåne></region-skåne>  -->
117    <!-- <landstinget-kronoberg></landstinget-kronoberg>  -->
118    <!-- <uppsala-landsting></uppsala-landsting>  -->
119    <!-- <region-halland></region-halland>  -->
120    <!-- <landstinget-blekinge></landstinget-blekinge>  -->
121    <!-- <jönköpings-län></jönköpings-län>  -->
122  </logos>
123
124  <remote-hosts>
125    <!-- one or more hosts entries. Each entry should match an -->
126    <!-- entry in the opengrid-config.xml. The 'ID' of an Open Grid cluster -->
127    <!-- is a combination of the username, address and port: user@host:port -->
128    <!-- A comma-separated list is allowed -->
129    <!-- Note that the default port number (22) must be included in the ID  -->
130    <!-- even if it is not specified in the opengrid-config.xml file. -->
131 
132    <host 
133      id="user@address:port in opengrid-config.xml (one or more separated by comma)"
134      >
135     
136      <!-- full path to the location where HiSeq/NextSeq data is stored (required) -->
137      <run-archive>/casa2/run_archive</run-archive>
138      <!-- Alternate paths in search order in case data is not found in the primary -->
139      <!-- run archive. Add more entries as needed, but it is important that they -->
140      <!-- are numbered in strictly increasing order from '2' and up. -->
141      <run-archive-2></run-archive-2>
142     
143      <!-- Full path to the location where data files should be archived (required) -->
144      <!-- The path should include the name of the project -->
145      <project-archive>/casa4/project_archive/scanb</project-archive>
146      <!-- Full path to the location where external data files should be archive (optional) -->
147      <!-- If not specified, the 'project-archive' path is used -->
148      <external-archive></external-archive>
149     
150      <!-- Full path to the root location where reference genomes are located -->
151      <!-- Do not include name of project -->
152      <reference-folder>/reference</reference-folder>
153     
154      <!-- Information about programs used by reggie -->
155      <!-- Unless otherwise noted, all paths must be the same on all nodes -->
156      <programs>
157        <java>
158          <!-- full path to java binary to use (1.8 is required by GATK!) -->
159          <path>/usr/local/packages/jre/8.0_144/bin/java</path>
160        </java>
161        <pipeline-scripts>
162          <!-- folder where the pipeline scripts are located (required). -->
163          <path>/home/scanb/lorry-pipeline/pipeline-2.16</path>
164        </pipeline-scripts>
165        <picard>
166          <!-- full path to the directory with Picard jar files (required) -->
167          <path>/usr/local/packages/picard-tools/2.22.3</path>
168        </picard>
169        <genseq>
170          <!-- full path to the genseq_check_illumina_dir.pl script (required) -->
171          <path>/usr/local/packages/genseq_tools/v0.01/genseq_check_illumina_dir.pl</path>
172        </genseq>
173        <trimmomatic>
174          <!-- full path to the JAR file with the Trimmomatic program (required) -->
175          <path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</path>
176          <!-- full path to the file with Illumina adapter information -->
177          <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
178        </trimmomatic>
179        <bowtie2>
180          <!-- full or partial path to bowtie2 (required) -->
181          <path>/usr/local/packages/bowtie/2.2.4/bin/bowtie2</path>
182        </bowtie2>
183        <tophat>
184          <!-- full or partial path to tophat (required) -->
185          <path>/usr/local/packages/tophat/2.0.12/bin/tophat</path>
186        </tophat>
187        <hisat>
188          <!-- full or partial path to hisat (required) -->
189          <path>/usr/local/packages/hisat/2.1.0/bin/hisat2</path>
190        </hisat>
191        <samtools>
192          <!-- full or partial path to samtools (required) -->
193          <path>/usr/local/packages/samtools/1.4/samtools</path>
194        </samtools>
195        <bedtools>
196          <!-- full or partial path to bedtools (required) -->
197          <path>/usr/local/packages/bedtools/2.26.0/bin/bedtools</path>
198        </bedtools>
199        <cufflinks>
200          <!-- full or partial path to cufflinks (required) -->
201          <path>/usr/local/packages/cufflinks/2.2.1/bin/cufflinks</path>
202        </cufflinks>
203        <stringtie>
204          <!-- full or partial path to stringtie (required) -->
205          <path>/usr/local/packages/stringtie/1.3.3b/bin/stringtie</path>
206        </stringtie>
207        <gatk>
208          <!-- full path to GenomeAnalysisToolkit JAR file (required) -->
209          <path>/usr/local/packages/GenomeAnalysisTK/3.8/GenomeAnalysisTK.jar</path>
210        </gatk>
211        <mosdepth>
212          <!-- full or partial path to mosdepth (required) -->
213          <path>/usr/local/packages/mosdepth/0.2.6/bin/mosdepth</path>
214        </mosdepth>
215        <vardict>
216          <!-- path to the directory where VarDict is installed -->
217          <!-- NOTE! not including the 'bin/VarDict' part since -->
218          <!-- that will be added automatically -->
219          <path>/usr/local/packages/vardict/1.6.0</path>
220        </vardict>
221        <vcfanno>
222          <!-- full or partial path to vcfanno (required) -->
223          <path>/usr/local/packages/vcfanno/0.3.2/bin/vcfanno</path>
224        </vcfanno>
225        <snpeff>
226          <!-- full path to the snpEff.jar file (required) -->
227          <path>/usr/local/packages/snpeff/4.3s/snpEff.jar</path>
228        </snpeff>
229        <snpsift>
230          <!-- full path to the SnpSift.jar file (required) -->
231          <path>/usr/local/packages/snpeff/4.3s/SnpSift.jar</path>
232        </snpsift>
233        <fgbio>
234          <!-- full path to the fgbio.jar file (required) -->
235          <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path>
236        </fgbio>
237        <novoalign>
238          <!-- full path to the novoalign file (required) -->
239          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path>
240        </novoalign>
241        <novosort>
242          <!-- full path to the novosort file (required) -->
243          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path>
244        </novosort>
245      </programs>
246     
247      <!-- priority values that are selectable in the web interface -->
248      <!-- allowed range is -1023 to 1024 -->
249      <!-- NOTE! positive values require special permissions on the cluster -->
250      <priorities>
251        <!-- <priority name="high" value="500" /> -->
252        <priority name="normal" value="0" default="true" />
253        <priority name="low" value="-500" />
254      </priorities>
255     
256      <!-- settings for the demuxing step (RNAseq) -->
257      <demux>
258        <!-- parallel environment option to the queue system -->
259        <!-- the default setting requests 4 slots -->
260        <parallel-environment>smp 4-4</parallel-environment>
261        <!-- Number of open files to set with 'ulimit -n' command -->
262        <!-- if not specified, the default on the server is used -->
263        <ulimit></ulimit>
264        <!-- amount of memory to give to Picard (default is 50g)-->
265        <picard-memory>50g</picard-memory>
266        <!-- static options for the picard ExtractIlluminaBarcodes step -->
267        <extract-options>-QUIET true -VERBOSITY WARNING</extract-options>
268        <!-- static options for the picard IlluminaBasecallsToFastq step -->
269        <fastq-options>-INCLUDE_NON_PF_READS false -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
270        <!-- number of tiles to process when debugging (default=2 (HiSeq, NovaSeq), 16 (NextSeq)) -->
271        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
272        <debug-tile-limit-novaseq>2</debug-tile-limit-novaseq>
273        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
274        <!-- static options for Bowtie when used for estimating fragment size -->
275        <bowtie-options>-q --fr -k 1 --phred33 --local --no-hd --no-unal -t -u 100000</bowtie-options>
276        <!-- the smallest number of fragments that must have been used in the fragment -->
277        <!-- size estimation, or we will set FragmentSizeAvg and FragmentSizeStdev to -1 -->
278        <bowtie-fragment-count-limit>20000</bowtie-fragment-count-limit>
279        <!-- static options for Trimmomatic -->
280        <trimmomatic-options>
281          <!-- The first step should ONLY filter Illumina adapters-->
282          <step-1>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true MINLEN:20</step-1>
283          <!-- The second step is for all other filters -->
284          <step-2>MAXINFO:40:0.9 MINLEN:20</step-2>
285        </trimmomatic-options>
286        <!-- static options for gzip compression with pigz (default=-5) -->
287        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
288        <pigz-options>-5</pigz-options>
289      </demux>
290 
291      <!-- settings for the demuxing step (MIPs) -->
292      <demux-mips>
293        <!-- parallel environment option to the queue system -->
294        <!-- the default setting requests 8-16 slots -->
295        <parallel-environment>smp 8-16</parallel-environment>
296        <!-- amount of memory to give to Picard (default is 50g)-->
297        <picard-memory>50g</picard-memory>
298        <!-- static options for the picard ExtractIlluminaBarcodes step -->
299        <extract-options>-MINIMUM_BASE_QUALITY 0 -MINIMUM_QUALITY 2 -MAX_MISMATCHES 2 -MIN_MISMATCH_DELTA 2 -MAX_NO_CALLS 2 -QUIET true -VERBOSITY WARNING</extract-options>
300        <!-- static options for the picard IlluminaBasecallsToFastq step -->
301        <fastq-options>-INCLUDE_NON_PF_READS false -APPLY_EAMSS_FILTER false -MINIMUM_QUALITY 2 -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
302        <!-- static options to put into the "Read group" files -->
303        <readgroup-options>PL=ILLUMINA CN=BRCAlab</readgroup-options>
304        <!-- number of tiles to process when debugging (default=2 (HiSeq), 16 (NextSeq)) -->
305        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
306        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
307        <!-- static options for gzip compression with pigz (default=-5) -->
308        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
309        <pigz-options>-5</pigz-options>
310      </demux-mips>
311 
312      <mask>
313        <!-- relative path from <reference-folder> to the reference genome used for masking -->
314        <!-- This is the -x option used for bowtie -->
315        <reference-name>scanb/ribo_phix_repeats_filter/ribo_phix_repeats_filter</reference-name>
316       
317        <!-- static options for bowtie -->
318        <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
319       
320        <!-- max number of sequences to align when running in debug mode (default=2 millions)-->
321        <debug-max-align>2000000</debug-max-align>
322      </mask>
323 
324      <align>
325        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
326        <!-- TODO selectable in GUI? saved as annotation? -->
327        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet</reference-gidx>
328        <reference-tidx>hg38/UCSC_hg38_knownGenes_22sep2014/knownGenes.vs.hg38.analysisSet</reference-tidx>
329       
330        <!-- static options for tophat -->
331        <tophat-options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search --max-insertion-length 20 --max-deletion-length 20 --read-gap-length 20 --read-edit-dist 22</tophat-options>
332        <!-- adjustment values for the 'mate-inner-dist' and 'mate-std-dev' -->
333        <!-- parameters to tophat. The specified values are added to those -->
334        <!-- calculated by bowtie -->
335        <adjust-mate-inner-dist>13</adjust-mate-inner-dist>
336        <adjust-mate-std-dev>10</adjust-mate-std-dev>
337       
338        <!-- static options for the picard MarkDuplicates step -->
339        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
340      </align>
341     
342      <!-- settings for aligning with Hisat -->
343      <align-hisat>
344        <!-- parallel environment option to the queue system -->
345        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
346        <parallel-environment>smp 8-16</parallel-environment>
347       
348        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
349        <reference-tidx>hg38/hg38.analysisSet_gencode27_snp150/genome_snp_tran</reference-tidx>
350       
351        <!-- static options for hisat -->
352        <hisat-options>-q --fr --phred33 -t --dta --dta-cufflink --new-summary --no-unal --non-deterministic --novel-splicesite-outfile aligned/splicesites.tsv --rna-strandness RF --summary-file aligned/summary.txt --rg PL:Illumina --rg CN:SCANB-prim</hisat-options>
353       
354        <!-- static options for the picard MarkDuplicates step -->
355        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
356       
357        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
358        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
359       
360        <!-- relative path from <reference-folder> to VCF file with SNP that we should look for -->
361        <haplotypecaller-dbsnp>scanb/genotyping-213-snp_feb2018.vcf</haplotypecaller-dbsnp>
362       
363        <!-- static options for the HaplotypeCaller step -->
364        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --annotation AlleleBalance --no_cmdline_in_header</haplotypecaller-options>
365      </align-hisat>
366     
367      <!-- settings for aligning MIPs sequencing -->
368      <align-mips>
369        <!-- parallel environment option to the queue system -->
370        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
371        <parallel-environment>smp 8-16</parallel-environment>
372       
373        <!-- Options for Trimmomatic -->
374        <trimmomatic>
375          <!-- Optional path to Trimmomatic, if not specified the default in the 'programs' section is used -->
376          <path>/usr/local/packages/trimmomatic/0.39/trimmomatic.jar</path>
377          <!-- The first step should filter Illumina adapters-->
378          <step-1>ILLUMINACLIP:adapter.fa:3:12:7:1:true MINLEN:30</step-1>
379          <!-- The second step is for all other filters -->
380          <step-2>MAXINFO:30:0.25 MINLEN:30</step-2>
381        </trimmomatic>
382       
383        <!-- The amplicons BED files are used by novoalign -->
384        <!-- The main-dir setting should point to a directory with the BED files -->
385        <!-- Which BED file to use is determined by matching the 'panel' attribute with -->
386        <!-- the MIPS_Panel annotation from the DNA item. If no match is found the -->
387        <!-- alignment will fail. -->
388        <amplicons>
389          <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir>
390          <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
391          <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed>
392          <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
393          <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed>
394          <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed>
395        </amplicons>
396       
397        <!-- Parameters that are needed by novoalign -->
398        <novoalign>
399          <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index>
400          <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options>
401        </novoalign>
402       
403        <!-- The following genome references are needed to extract some metrics after the alignment -->
404        <genome-dict>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.dict</genome-dict>
405        <genome-fasta>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.fasta</genome-fasta>
406       
407        <!-- Options for the picard UmiAwareMarkDuplicatesWithMateCigar step -->
408        <mark-duplicates-options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicates-options>
409
410        <!-- Options for the picard CollectTargetedPcrMetrics step -->
411        <pcr-metrics-options>-METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL ALL_READS -MINIMUM_MAPPING_QUALITY 1 -MINIMUM_BASE_QUALITY 20 -CLIP_OVERLAPPING_READS true -COVERAGE_CAP 500 -NEAR_DISTANCE 5</pcr-metrics-options>
412      </align-mips>
413     
414      <mbaf>
415        <!-- parallel environment option to the queue system -->
416        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
417        <parallel-environment>smp 8-16</parallel-environment>
418       
419        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
420        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
421        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
422       
423        <!-- relative path from <reference-folder> to VCF file with SNP:s that we should look for -->
424        <haplotypecaller-dbsnp>scanb/genotyping-mbaf-snp_oct2018.vcf</haplotypecaller-dbsnp>
425       
426        <!-- static options for the HaplotypeCaller step -->
427        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --no_cmdline_in_header</haplotypecaller-options>
428      </mbaf>
429     
430      <!-- settings for variant calling -->
431      <variant-call>
432        <!-- parallel environment option to the queue system -->
433        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
434        <parallel-environment>smp 8-16</parallel-environment>
435
436        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
437        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
438        <genome-fasta>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</genome-fasta>
439       
440        <!-- Full path to base directory with databases and other stuff needed by the pipeline -->
441        <!-- This value can be used in other options as ${BaseDir} -->
442        <base-dir>${ReferenceDir}/scanb/rnaseqvarcall-feb2020</base-dir>
443       
444        <!-- static options for 'mosdepth' for regular and debug modes (optional) -->
445        <mosdepth-options></mosdepth-options>
446        <mosdepth-options-debug>-c chr6</mosdepth-options-debug>
447       
448        <!-- the required depth for a base to be callable for variants (optional, default=5) -->
449        <min-depth>5</min-depth>
450       
451        <!-- static options for VarDict (required) -->
452        <vardict-options>-f 0.02 -c 1 -S 2 -E 3 -g 4 -Q 20 -r 2 -q 20 --nosv</vardict-options>
453       
454        <!-- static options for var2vcf_valid.pl (required) -->
455        <var2vcf-options>-A -f 0.02</var2vcf-options>
456       
457        <!--static options for vcfanno command line (required) -->
458        <!-- See https://github.com/brentp/vcfanno for more information -->
459        <vcfanno-options>-p 8 -lua ${BaseDir}/vcfanno.lua -base-path ${BaseDir} ${BaseDir}/allDbs.toml</vcfanno-options>
460       
461        <!-- static options for the snpEff command (required) -->
462        <snpeff-options>-configOption data.dir=${BaseDir}/snpEff_v4_3_hg38/data -noLog -noStats -canon hg38</snpeff-options>
463
464        <!-- static options for the SnpSift command (required) -->
465        <snpsift-options>-s ${BaseDir}/rna_chr_set.txt -s ${BaseDir}/intogen-BRCA-genes-list_patch.txt -e ${BaseDir}/filter_expression.txt</snpsift-options>
466       
467        <!-- path to the COSMIC mutation signature data -->
468        <mutation-signature>${BaseDir}/COSMIC_Cancer_signatures_probabilities.RData</mutation-signature>
469      </variant-call>
470     
471      <cufflinks>
472        <!-- parallel environment option to the queue system -->
473        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
474        <parallel-environment>smp 8-16</parallel-environment>
475 
476        <!-- relative path from <reference-folder> to the reference genome used by cufflinks -->
477        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet.fa</reference-gidx>
478        <reference-gtf>hg38/UCSC_hg38_knownGenes_22sep2014.gtf</reference-gtf>
479       
480        <!-- static options for cufflinks -->
481        <options>--multi-read-correct --library-type fr-firststrand --total-hits-norm --max-bundle-frags 10000000 --no-update-check --quiet</options>
482       
483        <!-- if the aligned sequences item has more reads than this limit (when running in debug mode) -->
484        <!-- the accepted_hits.bam will be  limited to chr1 before running cufflinks -->
485        <debug-max-aligned>2000000</debug-max-aligned>
486       
487        <!-- path to a file containing pairs of tracking_id values -->
488        <!-- *.fpkm_tracking files are searched and values from the -->
489        <!-- second column are replaced with values in the first column -->
490        <!-- If no mapping file is specified, no replacement is done -->
491        <tracking-id-map>hg38/UCSC_hg38_knownGenes_22sep2014_duplicate_transcript_id.txt</tracking-id-map>
492      </cufflinks>
493     
494      <stringtie>
495        <!-- parallel environment option to the queue system -->
496        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
497        <parallel-environment>smp 8-16</parallel-environment>
498       
499        <!-- relative path from <reference-folder> to the reference genome used by stringtie -->
500        <reference-gtf>hg38/hg38.analysisSet_gencode27_snp150/gencode.v27.primary_assembly.annotation_subset_transcripttype_proteincoding.gtf</reference-gtf>
501
502        <!-- static options for stringtie -->
503        <options>--rf -B -e</options>
504
505      </stringtie>
506    </host>
507 
508   
509  </remote-hosts>
510
511</reggie>
Note: See TracBrowser for help on using the repository browser.