source: extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml @ 5941

Last change on this file since 5941 was 5941, checked in by Nicklas Nordborg, 3 years ago

References #1239: Include Single Sample Predictor models in the StringTie? pipeline

Added some checks for configured models before they are used:

  • That an annotation type exists
  • That the .Rdata file exists


If a check fails a warning messge is displayed and the model can't be used.

File size: 25.3 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<reggie>
3
4  <!-- Section for enabling/disabling experimental features -->
5  <!-- The list of feature that are considered experimental may change over time -->
6  <!-- 0=The feature is disabled, 1=The feature is enabled -->
7  <experimental-features>
8  </experimental-features>
9 
10  <!-- Configuration options related to how external samples (RNA or DNA) are handled -->
11  <external-samples>
12    <!-- Files generated in the secondary analysis can be shared with read permission to -->
13    <!-- a group if this is specified here. The prefix attribute is the sample name prefix -->
14    <!-- and the value is the group name. This translates to a 'chgrp' command in the secondary -->
15    <!-- analysis. Samples with a prefix that is not mapped here are not shared to other groups. -->
16    <!--  <groupname prefix="BR">brcalab</groupname> -->
17  </external-samples>
18
19  <!-- Settings for the Activity log that is displayed on the Reggie start page -->
20  <activity-log>
21    <!-- Max number of entries to display in the log (exception: all events within the last two days are always displayed) -->
22    <max-entries>35</max-entries>
23    <!-- Max age (in days) of entries to display (even if the max number hasn't been reached) -->
24    <max-age-in-days>14</max-age-in-days>
25    <quote-of-the-day>
26      <!-- URL to quote-of-the-day endpoint (optional, set an empty URL to disable this feature) -->
27      <url>https://quotes.rest/qod.json</url>
28      <!-- Default is 12 hours; do not set to less than 3600 since the external API has a limit -->
29      <max-age-in-seconds>43200</max-age-in-seconds>
30    </quote-of-the-day>
31  </activity-log>
32
33  <!-- Options related to R that is executed on the local server -->
34  <rscript>
35    <!-- Full or partial path to 'Rscript' executable -->
36    <path>Rscript</path>
37    <!-- Set the locale to use when running R -->
38    <!-- If not set, use whatever locale the operating system provides -->
39    <locale>en_US.UTF-8</locale>
40   
41    <!-- options for the 'geneReport' script -->
42    <gene-report>
43      <!-- full path to the R script -->
44      <path>/path/to/R_RNAseq_scanb_geneReport.R</path>
45      <!-- full path to directory with SCAN-B reference data -->
46      <!-- default is same directory as the R script -->
47      <ref-dir-scanb></ref-dir-scanb>
48      <!-- full path to directory with validation reference data -->
49      <!-- default is same directory as the R script -->
50      <ref-dir-validation></ref-dir-validation>
51      <!-- full path to the PDF template -->
52      <!-- default is 'template.pdf' in the same directory as the R script -->
53      <template></template>
54      <!-- file name in BASE for storing the generated report  -->
55      <pdf-name>genereport.pdf</pdf-name>
56    </gene-report>
57   
58    <!-- options for the 'pilot report' script -->
59    <pilot-report>
60      <!-- full path to the R script -->
61      <path>/path/to/pilot-report.R</path>
62      <!-- full path to directory with reference data -->
63      <!-- default is 'referenceData' directory inside -->
64      <!-- the same directory as the R script -->
65      <ref-dir></ref-dir>
66      <!-- full path to directory with source code -->
67      <!-- default is 'source' directory inside -->
68      <!-- the same directory as the R script -->
69      <source-dir></source-dir>
70      <!-- full path to the PDF template -->
71      <!-- default is 'template.pdf' in the same directory as the R script -->
72      <template></template>
73      <!-- file name in BASE for storing the generated report  -->
74      <pdf-name>pilotreport.pdf</pdf-name>
75    </pilot-report>
76   
77    <ssp>
78      <!-- full path to the directory with SSP scripts (SSP_functions.R, and more...) -->
79      <path>/path/to/ssp-dir</path>
80      <!-- full path to directory with models -->
81      <!-- default is 'models' directory inside -->
82      <!-- the same directory as the R script -->
83      <models-dir></models-dir>
84     
85      <!-- List all models that should be used -->
86      <models>
87        <!-- Each entry should be a filename of the *.RData object representing the model. -->
88        <!-- Each entry should have a 'name' and an associated 'annotation-type'. -->
89        <!-- The 'annotation-type-scores' is optional. If provided it is used to store list with all classes and scores -->
90        <!-- The annotation types need to be created manually. -->
91        <model name="Subtype" annotation-type="SSP_Subtype" annotation-type-scores="SSP_Subtype_Scores">
92          Training_Run19081Genes_noNorm_SSP.subtypeMost.Fcc15_5x5foldCV.num.rules.50_24.selRules.AIMS.GS.RData
93        </model>
94      </models>
95    </ssp>
96   
97  </rscript>
98
99  <!-- Logotype information for the different sites -->
100  <!-- Uncomment as needed and set full path to image file -->
101  <!-- Supported file formats: WMF, PNG, JPG (and possible more) -->
102  <logos>
103    <!-- <region-skåne></region-skåne>  -->
104    <!-- <landstinget-kronoberg></landstinget-kronoberg>  -->
105    <!-- <uppsala-landsting></uppsala-landsting>  -->
106    <!-- <region-halland></region-halland>  -->
107    <!-- <landstinget-blekinge></landstinget-blekinge>  -->
108    <!-- <jönköpings-län></jönköpings-län>  -->
109  </logos>
110
111  <remote-hosts>
112    <!-- one or more hosts entries. Each entry should match an -->
113    <!-- entry in the opengrid-config.xml. The 'ID' of an Open Grid cluster -->
114    <!-- is a combination of the username, address and port: user@host:port -->
115    <!-- A comma-separated list is allowed -->
116    <!-- Note that the default port number (22) must be included in the ID  -->
117    <!-- even if it is not specified in the opengrid-config.xml file. -->
118 
119    <host 
120      id="user@address:port in opengrid-config.xml (one or more separated by comma)"
121      >
122     
123      <!-- full path to the location where HiSeq/NextSeq data is stored (required) -->
124      <run-archive>/casa2/run_archive</run-archive>
125      <!-- Alternate paths in search order in case data is not found in the primary -->
126      <!-- run archive. Add more entries as needed, but it is important that they -->
127      <!-- are numbered in strictly increasing order from '2' and up. -->
128      <run-archive-2></run-archive-2>
129     
130      <!-- Full path to the location where data files should be archived (required) -->
131      <!-- The path should include the name of the project -->
132      <project-archive>/casa4/project_archive/scanb</project-archive>
133      <!-- Full path to the location where external data files should be archive (optional) -->
134      <!-- If not specified, the 'project-archive' path is used -->
135      <external-archive></external-archive>
136     
137      <!-- Full path to the root location where reference genomes are located -->
138      <!-- Do not include name of project -->
139      <reference-folder>/reference</reference-folder>
140     
141      <!-- Information about programs used by reggie -->
142      <!-- Unless otherwise noted, all paths must be the same on all nodes -->
143      <programs>
144        <java>
145          <!-- full path to java binary to use (1.8 is required by GATK!) -->
146          <path>/usr/local/packages/jre/8.0_144/bin/java</path>
147        </java>
148        <pipeline-scripts>
149          <!-- folder where the pipeline scripts are located (required). -->
150          <path>/home/scanb/lorry-pipeline/pipeline-2.16</path>
151        </pipeline-scripts>
152        <picard>
153          <!-- full path to the directory with Picard jar files (required) -->
154          <path>/usr/local/packages/picard-tools/2.22.3</path>
155        </picard>
156        <genseq>
157          <!-- full path to the genseq_check_illumina_dir.pl script (required) -->
158          <path>/usr/local/packages/genseq_tools/v0.01/genseq_check_illumina_dir.pl</path>
159        </genseq>
160        <trimmomatic>
161          <!-- full path to the JAR file with the Trimmomatic program (required) -->
162          <path>/usr/local/packages/trimmomatic/0.32/trimmomatic-0.32.jar</path>
163          <!-- full path to the file with Illumina adapter information -->
164          <adapter-file>/usr/local/packages/trimmomatic/0.32/adapters/TruSeq3-PE-2.fa</adapter-file>
165        </trimmomatic>
166        <bowtie2>
167          <!-- full or partial path to bowtie2 (required) -->
168          <path>/usr/local/packages/bowtie/2.2.4/bin/bowtie2</path>
169        </bowtie2>
170        <tophat>
171          <!-- full or partial path to tophat (required) -->
172          <path>/usr/local/packages/tophat/2.0.12/bin/tophat</path>
173        </tophat>
174        <hisat>
175          <!-- full or partial path to hisat (required) -->
176          <path>/usr/local/packages/hisat/2.1.0/bin/hisat2</path>
177        </hisat>
178        <samtools>
179          <!-- full or partial path to samtools (required) -->
180          <path>/usr/local/packages/samtools/1.4/samtools</path>
181        </samtools>
182        <bedtools>
183          <!-- full or partial path to bedtools (required) -->
184          <path>/usr/local/packages/bedtools/2.26.0/bin/bedtools</path>
185        </bedtools>
186        <cufflinks>
187          <!-- full or partial path to cufflinks (required) -->
188          <path>/usr/local/packages/cufflinks/2.2.1/bin/cufflinks</path>
189        </cufflinks>
190        <stringtie>
191          <!-- full or partial path to stringtie (required) -->
192          <path>/usr/local/packages/stringtie/1.3.3b/bin/stringtie</path>
193        </stringtie>
194        <gatk>
195          <!-- full path to GenomeAnalysisToolkit JAR file (required) -->
196          <path>/usr/local/packages/GenomeAnalysisTK/3.8/GenomeAnalysisTK.jar</path>
197        </gatk>
198        <mosdepth>
199          <!-- full or partial path to mosdepth (required) -->
200          <path>/usr/local/packages/mosdepth/0.2.6/bin/mosdepth</path>
201        </mosdepth>
202        <vardict>
203          <!-- path to the directory where VarDict is installed -->
204          <!-- NOTE! not including the 'bin/VarDict' part since -->
205          <!-- that will be added automatically -->
206          <path>/usr/local/packages/vardict/1.6.0</path>
207        </vardict>
208        <vcfanno>
209          <!-- full or partial path to vcfanno (required) -->
210          <path>/usr/local/packages/vcfanno/0.3.2/bin/vcfanno</path>
211        </vcfanno>
212        <snpeff>
213          <!-- full path to the snpEff.jar file (required) -->
214          <path>/usr/local/packages/snpeff/4.3s/snpEff.jar</path>
215        </snpeff>
216        <snpsift>
217          <!-- full path to the SnpSift.jar file (required) -->
218          <path>/usr/local/packages/snpeff/4.3s/SnpSift.jar</path>
219        </snpsift>
220        <fgbio>
221          <!-- full path to the fgbio.jar file (required) -->
222          <path>/usr/local/packages/fgbio/0.8.1/fgbio.jar</path>
223        </fgbio>
224        <novoalign>
225          <!-- full path to the novoalign file (required) -->
226          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novoalign</path>
227        </novoalign>
228        <novosort>
229          <!-- full path to the novosort file (required) -->
230          <path>/usr/local/packages/novocraft/V4.00.Pre-20190904/novosort</path>
231        </novosort>
232      </programs>
233     
234      <!-- priority values that are selectable in the web interface -->
235      <!-- allowed range is -1023 to 1024 -->
236      <!-- NOTE! positive values require special permissions on the cluster -->
237      <priorities>
238        <!-- <priority name="high" value="500" /> -->
239        <priority name="normal" value="0" default="true" />
240        <priority name="low" value="-500" />
241      </priorities>
242     
243      <!-- settings for the demuxing step (RNAseq) -->
244      <demux>
245        <!-- parallel environment option to the queue system -->
246        <!-- the default setting requests 4 slots -->
247        <parallel-environment>smp 4-4</parallel-environment>
248        <!-- Number of open files to set with 'ulimit -n' command -->
249        <!-- if not specified, the default on the server is used -->
250        <ulimit></ulimit>
251        <!-- amount of memory to give to Picard (default is 50g)-->
252        <picard-memory>50g</picard-memory>
253        <!-- static options for the picard ExtractIlluminaBarcodes step -->
254        <extract-options>-QUIET true -VERBOSITY WARNING</extract-options>
255        <!-- static options for the picard IlluminaBasecallsToFastq step -->
256        <fastq-options>-INCLUDE_NON_PF_READS false -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
257        <!-- number of tiles to process when debugging (default=2 (HiSeq, NovaSeq), 16 (NextSeq)) -->
258        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
259        <debug-tile-limit-novaseq>2</debug-tile-limit-novaseq>
260        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
261        <!-- static options for Bowtie when used for estimating fragment size -->
262        <bowtie-options>-q --fr -k 1 --phred33 --local --no-hd --no-unal -t -u 100000</bowtie-options>
263        <!-- the smallest number of fragments that must have been used in the fragment -->
264        <!-- size estimation, or we will set FragmentSizeAvg and FragmentSizeStdev to -1 -->
265        <bowtie-fragment-count-limit>20000</bowtie-fragment-count-limit>
266        <!-- static options for Trimmomatic -->
267        <trimmomatic-options>
268          <!-- The first step should ONLY filter Illumina adapters-->
269          <step-1>ILLUMINACLIP:${AdapterFile}:2:30:12:1:true MINLEN:20</step-1>
270          <!-- The second step is for all other filters -->
271          <step-2>MAXINFO:40:0.9 MINLEN:20</step-2>
272        </trimmomatic-options>
273        <!-- static options for gzip compression with pigz (default=-5) -->
274        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
275        <pigz-options>-5</pigz-options>
276      </demux>
277 
278      <!-- settings for the demuxing step (MIPs) -->
279      <demux-mips>
280        <!-- parallel environment option to the queue system -->
281        <!-- the default setting requests 8-16 slots -->
282        <parallel-environment>smp 8-16</parallel-environment>
283        <!-- amount of memory to give to Picard (default is 50g)-->
284        <picard-memory>50g</picard-memory>
285        <!-- static options for the picard ExtractIlluminaBarcodes step -->
286        <extract-options>-MINIMUM_BASE_QUALITY 0 -MINIMUM_QUALITY 2 -MAX_MISMATCHES 2 -MIN_MISMATCH_DELTA 2 -MAX_NO_CALLS 2 -QUIET true -VERBOSITY WARNING</extract-options>
287        <!-- static options for the picard IlluminaBasecallsToFastq step -->
288        <fastq-options>-INCLUDE_NON_PF_READS false -APPLY_EAMSS_FILTER false -MINIMUM_QUALITY 2 -MAX_READS_IN_RAM_PER_TILE 5000000 -QUIET true -VERBOSITY WARNING</fastq-options>
289        <!-- static options to put into the "Read group" files -->
290        <readgroup-options>PL=ILLUMINA CN=BRCAlab</readgroup-options>
291        <!-- number of tiles to process when debugging (default=2 (HiSeq), 16 (NextSeq)) -->
292        <debug-tile-limit-hiseq>2</debug-tile-limit-hiseq>
293        <debug-tile-limit-nextseq>16</debug-tile-limit-nextseq>
294        <!-- static options for gzip compression with pigz (default=-5) -->
295        <!-- NOTE! Number of threads (-p) is set automatically and should not be included here -->
296        <pigz-options>-5</pigz-options>
297      </demux-mips>
298 
299      <mask>
300        <!-- relative path from <reference-folder> to the reference genome used for masking -->
301        <!-- This is the -x option used for bowtie -->
302        <reference-name>scanb/ribo_phix_repeats_filter/ribo_phix_repeats_filter</reference-name>
303       
304        <!-- static options for bowtie -->
305        <bowtie-options>-q --fr -k 1 --phred33 -t --local</bowtie-options>
306       
307        <!-- max number of sequences to align when running in debug mode (default=2 millions)-->
308        <debug-max-align>2000000</debug-max-align>
309      </mask>
310 
311      <align>
312        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
313        <!-- TODO selectable in GUI? saved as annotation? -->
314        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet</reference-gidx>
315        <reference-tidx>hg38/UCSC_hg38_knownGenes_22sep2014/knownGenes.vs.hg38.analysisSet</reference-tidx>
316       
317        <!-- static options for tophat -->
318        <tophat-options>--library-type fr-firststrand --keep-fasta-order --no-coverage-search --max-insertion-length 20 --max-deletion-length 20 --read-gap-length 20 --read-edit-dist 22</tophat-options>
319        <!-- adjustment values for the 'mate-inner-dist' and 'mate-std-dev' -->
320        <!-- parameters to tophat. The specified values are added to those -->
321        <!-- calculated by bowtie -->
322        <adjust-mate-inner-dist>13</adjust-mate-inner-dist>
323        <adjust-mate-std-dev>10</adjust-mate-std-dev>
324       
325        <!-- static options for the picard MarkDuplicates step -->
326        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
327      </align>
328     
329      <!-- settings for aligning with Hisat -->
330      <align-hisat>
331        <!-- parallel environment option to the queue system -->
332        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
333        <parallel-environment>smp 8-16</parallel-environment>
334       
335        <!-- relative path from <reference-folder> to the reference genome used for alignment -->
336        <reference-tidx>hg38/hg38.analysisSet_gencode27_snp150/genome_snp_tran</reference-tidx>
337       
338        <!-- static options for hisat -->
339        <hisat-options>-q --fr --phred33 -t --dta --dta-cufflink --new-summary --no-unal --non-deterministic --novel-splicesite-outfile aligned/splicesites.tsv --rna-strandness RF --summary-file aligned/summary.txt --rg PL:Illumina --rg CN:SCANB-prim</hisat-options>
340       
341        <!-- static options for the picard MarkDuplicates step -->
342        <mark-duplicates-options>-REMOVE_DUPLICATES false -ASSUME_SORTED true -MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 2000 -QUIET true -VERBOSITY WARNING</mark-duplicates-options>
343       
344        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
345        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
346       
347        <!-- relative path from <reference-folder> to VCF file with SNP that we should look for -->
348        <haplotypecaller-dbsnp>scanb/genotyping-213-snp_feb2018.vcf</haplotypecaller-dbsnp>
349       
350        <!-- static options for the HaplotypeCaller step -->
351        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --annotation AlleleBalance --no_cmdline_in_header</haplotypecaller-options>
352      </align-hisat>
353     
354      <!-- settings for aligning MIPs sequencing -->
355      <align-mips>
356        <!-- parallel environment option to the queue system -->
357        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
358        <parallel-environment>smp 8-16</parallel-environment>
359       
360        <!-- Options for Trimmomatic -->
361        <trimmomatic>
362          <!-- Optional path to Trimmomatic, if not specified the default in the 'programs' section is used -->
363          <path>/usr/local/packages/trimmomatic/0.39/trimmomatic.jar</path>
364          <!-- The first step should filter Illumina adapters-->
365          <step-1>ILLUMINACLIP:adapter.fa:3:12:7:1:true MINLEN:30</step-1>
366          <!-- The second step is for all other filters -->
367          <step-2>MAXINFO:30:0.25 MINLEN:30</step-2>
368        </trimmomatic>
369       
370        <!-- The amplicons BED files are used by novoalign -->
371        <!-- The main-dir setting should point to a directory with the BED files -->
372        <!-- Which BED file to use is determined by matching the 'panel' attribute with -->
373        <!-- the MIPS_Panel annotation from the DNA item. If no match is found the -->
374        <!-- alignment will fail. -->
375        <amplicons>
376          <main-dir>${ReferenceDir}/project/brcalab/b37/mipcombo_v0</main-dir>
377          <bed panel="MI.B1B2.1">MIPCOMBO_B1B2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
378          <bed panel="MI.ID.1">MIPCOMBO_CHEK2_snpid55_snpid9plus_v01.bed</bed>
379          <bed panel="MI.PALB2.1">MIPCOMBO_PALB2_CHEK2_snpid55_snpid9plus_v01.bed</bed>
380          <bed panel="MI.CDKN2A.1">MIPCOMBO_CDKN2A_CDK4_ex2_snpid55_snpid9plus_v01.bed</bed>
381          <bed panel="MI.TP53.1">MIPCOMBO_TP53_CHEK2_snpid55_snpid9plus_v01.bed</bed>
382        </amplicons>
383       
384        <!-- Parameters that are needed by novoalign -->
385        <novoalign>
386          <index>${ReferenceDir}/novoalign/human_g1k_v37_decoy_dbSNP137_12M_k14s2.novoindex</index>
387          <options>-o BAM 5 -o Sync -g 40 -x 1 --matchReward 4 --softclip 50,30 --trim3hp AG -H 22 -t 0,2.0 --hlimit 8 -v 150 -r R --Q2Off --pechimera off -F BAMPE RX</options>
388        </novoalign>
389       
390        <!-- The following genome references are needed to extract some metrics after the alignment -->
391        <genome-dict>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.dict</genome-dict>
392        <genome-fasta>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.fasta</genome-fasta>
393       
394        <!-- Options for the picard UmiAwareMarkDuplicatesWithMateCigar step -->
395        <mark-duplicates-options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicates-options>
396
397        <!-- Options for the picard CollectTargetedPcrMetrics step -->
398        <pcr-metrics-options>-METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL ALL_READS -MINIMUM_MAPPING_QUALITY 1 -MINIMUM_BASE_QUALITY 20 -CLIP_OVERLAPPING_READS true -COVERAGE_CAP 500 -NEAR_DISTANCE 5</pcr-metrics-options>
399      </align-mips>
400     
401      <mbaf>
402        <!-- parallel environment option to the queue system -->
403        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
404        <parallel-environment>smp 8-16</parallel-environment>
405       
406        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
407        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
408        <haplotypecaller-ref>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</haplotypecaller-ref>
409       
410        <!-- relative path from <reference-folder> to VCF file with SNP:s that we should look for -->
411        <haplotypecaller-dbsnp>scanb/genotyping-mbaf-snp_oct2018.vcf</haplotypecaller-dbsnp>
412       
413        <!-- static options for the HaplotypeCaller step -->
414        <haplotypecaller-options>-stand_call_conf 20 --filter_reads_with_N_cigar --no_cmdline_in_header</haplotypecaller-options>
415      </mbaf>
416     
417      <!-- settings for variant calling -->
418      <variant-call>
419        <!-- parallel environment option to the queue system -->
420        <!-- the default setting use up to 16 slots on hosts with at least 8 slots available -->
421        <parallel-environment>smp 8-16</parallel-environment>
422
423        <!-- relative path from <reference-folder> to FASTA file used as reference for the alignment  -->
424        <!-- this should probably be the same as in <align-hisat>/<haplotypecaller-ref> -->
425        <genome-fasta>hg38/hg38.analysisSet_gencode27_snp150/hg38.analysisSet_gencodeid.fa</genome-fasta>
426       
427        <!-- Full path to base directory with databases and other stuff needed by the pipeline -->
428        <!-- This value can be used in other options as ${BaseDir} -->
429        <base-dir>${ReferenceDir}/scanb/rnaseqvarcall-feb2020</base-dir>
430       
431        <!-- static options for 'mosdepth' for regular and debug modes (optional) -->
432        <mosdepth-options></mosdepth-options>
433        <mosdepth-options-debug>-c chr6</mosdepth-options-debug>
434       
435        <!-- the required depth for a base to be callable for variants (optional, default=5) -->
436        <min-depth>5</min-depth>
437       
438        <!-- static options for VarDict (required) -->
439        <vardict-options>-f 0.02 -c 1 -S 2 -E 3 -g 4 -Q 20 -r 2 -q 20 --nosv</vardict-options>
440       
441        <!-- static options for var2vcf_valid.pl (required) -->
442        <var2vcf-options>-A -f 0.02</var2vcf-options>
443       
444        <!--static options for vcfanno command line (required) -->
445        <!-- See https://github.com/brentp/vcfanno for more information -->
446        <vcfanno-options>-p 8 -lua ${BaseDir}/vcfanno.lua -base-path ${BaseDir} ${BaseDir}/allDbs.toml</vcfanno-options>
447       
448        <!-- static options for the snpEff command (required) -->
449        <snpeff-options>-configOption data.dir=${BaseDir}/snpEff_v4_3_hg38/data -noLog -noStats -canon hg38</snpeff-options>
450
451        <!-- static options for the SnpSift command (required) -->
452        <snpsift-options>-s ${BaseDir}/rna_chr_set.txt -s ${BaseDir}/intogen-BRCA-genes-list_patch.txt -e ${BaseDir}/filter_expression.txt</snpsift-options>
453       
454        <!-- path to the COSMIC mutation signature data -->
455        <mutation-signature>${BaseDir}/COSMIC_Cancer_signatures_probabilities.RData</mutation-signature>
456      </variant-call>
457     
458      <cufflinks>
459        <!-- parallel environment option to the queue system -->
460        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
461        <parallel-environment>smp 8-16</parallel-environment>
462 
463        <!-- relative path from <reference-folder> to the reference genome used by cufflinks -->
464        <reference-gidx>hg38/hg38.analysisSet/hg38.analysisSet.fa</reference-gidx>
465        <reference-gtf>hg38/UCSC_hg38_knownGenes_22sep2014.gtf</reference-gtf>
466       
467        <!-- static options for cufflinks -->
468        <options>--multi-read-correct --library-type fr-firststrand --total-hits-norm --max-bundle-frags 10000000 --no-update-check --quiet</options>
469       
470        <!-- if the aligned sequences item has more reads than this limit (when running in debug mode) -->
471        <!-- the accepted_hits.bam will be  limited to chr1 before running cufflinks -->
472        <debug-max-aligned>2000000</debug-max-aligned>
473       
474        <!-- path to a file containing pairs of tracking_id values -->
475        <!-- *.fpkm_tracking files are searched and values from the -->
476        <!-- second column are replaced with values in the first column -->
477        <!-- If no mapping file is specified, no replacement is done -->
478        <tracking-id-map>hg38/UCSC_hg38_knownGenes_22sep2014_duplicate_transcript_id.txt</tracking-id-map>
479      </cufflinks>
480     
481      <stringtie>
482        <!-- parallel environment option to the queue system -->
483        <!-- the default setting use between 8 and 16 slots on hosts with at least 8 slots available -->
484        <parallel-environment>smp 8-16</parallel-environment>
485       
486        <!-- relative path from <reference-folder> to the reference genome used by stringtie -->
487        <reference-gtf>hg38/hg38.analysisSet_gencode27_snp150/gencode.v27.primary_assembly.annotation_subset_transcripttype_proteincoding.gtf</reference-gtf>
488
489        <!-- static options for stringtie -->
490        <options>--rf -B -e</options>
491
492      </stringtie>
493    </host>
494 
495   
496  </remote-hosts>
497
498</reggie>
Note: See TracBrowser for help on using the repository browser.