Changeset 5842
- Timestamp:
- Feb 25, 2020, 11:47:57 AM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other/pipeline/trunk/mips_functions.sh
r5840 r5842 192 192 } 193 193 194 # Get min_insert and max_insert from the Amplicons BED file 195 # The values are needed by novoalign and collecting alignment metrics 196 # Global parameters: 197 # $AmpliconsBed: Path to amplicons BED file 198 # $min_insert: Updated with value from the BED file 199 # $max_insert: Updated with value from the BED file 200 function get_min_max_insert { 201 min_insert=$(awk 'NR == 1 || $3 - $2 < min {min = $3 - $2}END{print min - 1}' "${AmpliconsBed}") 202 max_insert=$(awk 'NR == 1 || $3 - $2 > max {max = $3 - $2}END{print max + 1}' "${AmpliconsBed}") 203 } 204 205 194 206 # Align with novoalign 195 207 # Parameters: … … 203 215 # $NovoSort: Path to novosort executable 204 216 # $NumThreads: Number of threads to use 217 # $min_insert: Calculated by get_min_max_insert 218 # $max_insert: Calculated by get_min_max_insert 205 219 # $AlignedBams: For returning paths to aligned BAM files (used for input in the merge step) 206 220 function novoalign { … … 219 233 -i PE ${min_insert}-${max_insert} \ 220 234 --amplicons ${AmpliconsBed} \ 221 1 out/${ prefix}.novo_coverage.bed \235 1 out/${n}.novo_coverage.bed \ 222 236 -f tmp/${n}.umi.bam \ 223 237 > tmp/${n}.nosort.bam \ 224 3> out/${ prefix}.novo.log238 3> out/${n}.novo.log 225 239 226 240 ./stderrwrap.sh ${NovoSort} \ … … 287 301 } 288 302 303 # Get PCR metrics for the concordant.bam file 304 # Global parameters: 305 # $AmpliconsBed: Path to amplicons BED file 306 # $GenomeDict: Path to genome .dict file 307 # $GenomeFasta: Path to the genome fasta file 308 # $PcrMetricsOptions: Options for Picard 309 function collect_pcr_metrics { 310 311 local amplicon_set_name=$(basename ${AmpliconsBed} ".bed") 312 awk -F "\t" -v OFS="\t" '{print $1,$7,$8,$4,$5,$6}' ${AmpliconsBed} > amplicon_inserts.bed 313 314 ./stdwrap.sh ./picard2 BedToIntervalList \ 315 -SEQUENCE_DICTIONARY ${GenomeDict} \ 316 -INPUT amplicon_inserts.bed \ 317 -OUTPUT amplicon_inserts.intervals \ 318 >> pcr_metrics.out 319 320 ./stdwrap.sh ./picard2 CollectTargetedPcrMetrics \ 321 -INPUT out/concordant.bam \ 322 -CUSTOM_AMPLICON_SET_NAME ${amplicon_set_name} \ 323 -AMPLICON_INTERVALS amplicon_inserts.intervals \ 324 -TARGET_INTERVALS amplicon_inserts.intervals \ 325 -R ${GenomeFasta} \ 326 ${PcrMetricsOptions} \ 327 -PER_TARGET_COVERAGE out/concordant.per_amplicon_cov.txt \ 328 -PER_BASE_COVERAGE out/concordant.per_base_cov.txt.gz \ 329 -OUTPUT out/concordant.pcr_metrics.txt \ 330 >> pcr_metrics.out 331 } 332 333 # Get alignment metrics for the concordant.bam file 334 # Global parameters: 335 # $GenomeFasta: Path to the genome fasta file 336 # $max_insert: Calculated by get_min_max_insert 337 # $adapterN: Multiple adapter sequences defined earlier in this file 338 function collect_alignment_metrics { 339 340 ./stdwrap.sh ./picard2 CollectAlignmentSummaryMetrics \ 341 -INPUT out/concordant.bam \ 342 -R ${GenomeFasta} \ 343 -METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL LIBRARY -METRIC_ACCUMULATION_LEVEL READ_GROUP \ 344 -MAX_INSERT_SIZE ${max_insert} \ 345 -ADAPTER_SEQUENCE null -ADAPTER_SEQUENCE ${adapter1} -ADAPTER_SEQUENCE ${adapter2} \ 346 -ADAPTER_SEQUENCE ${adapter1rc} -ADAPTER_SEQUENCE ${adapter2rc} \ 347 -OUTPUT out/concordant.alignment_summary_metrics.txt \ 348 >> alignment_metrics.out 349 350 }
Note: See TracChangeset
for help on using the changeset viewer.