Changeset 5842


Ignore:
Timestamp:
Feb 25, 2020, 11:47:57 AM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Added functions for collecting metrics.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other/pipeline/trunk/mips_functions.sh

    r5840 r5842  
    192192}
    193193
     194# Get min_insert and max_insert from the Amplicons BED file
     195# The values are needed by novoalign and collecting alignment metrics
     196# Global parameters:
     197#   $AmpliconsBed: Path to amplicons BED file
     198#   $min_insert: Updated with value from the BED file
     199#   $max_insert: Updated with value from the BED file
     200function get_min_max_insert {
     201  min_insert=$(awk 'NR == 1 || $3 - $2 < min {min = $3 - $2}END{print min - 1}' "${AmpliconsBed}")
     202  max_insert=$(awk 'NR == 1 || $3 - $2 > max {max = $3 - $2}END{print max + 1}' "${AmpliconsBed}")
     203}
     204
     205
    194206# Align with novoalign
    195207# Parameters:
     
    203215#   $NovoSort: Path to novosort executable
    204216#   $NumThreads: Number of threads to use
     217#   $min_insert: Calculated by get_min_max_insert
     218#   $max_insert: Calculated by get_min_max_insert
    205219#   $AlignedBams: For returning paths to aligned BAM files (used for input in the merge step)
    206220function novoalign {
     
    219233    -i PE ${min_insert}-${max_insert} \
    220234    --amplicons ${AmpliconsBed} \
    221     1 out/${prefix}.novo_coverage.bed \
     235    1 out/${n}.novo_coverage.bed \
    222236    -f tmp/${n}.umi.bam \
    223237    > tmp/${n}.nosort.bam \
    224     3> out/${prefix}.novo.log
     238    3> out/${n}.novo.log
    225239   
    226240  ./stderrwrap.sh ${NovoSort} \
     
    287301}
    288302
     303# Get PCR metrics for the concordant.bam file
     304# Global parameters:
     305#   $AmpliconsBed: Path to amplicons BED file
     306#   $GenomeDict: Path to genome .dict file
     307#   $GenomeFasta: Path to the genome fasta file
     308#   $PcrMetricsOptions: Options for Picard
     309function collect_pcr_metrics {
     310 
     311  local amplicon_set_name=$(basename ${AmpliconsBed} ".bed")
     312  awk -F "\t" -v OFS="\t" '{print $1,$7,$8,$4,$5,$6}' ${AmpliconsBed} > amplicon_inserts.bed
     313
     314  ./stdwrap.sh ./picard2 BedToIntervalList \
     315    -SEQUENCE_DICTIONARY ${GenomeDict} \
     316    -INPUT amplicon_inserts.bed \
     317    -OUTPUT amplicon_inserts.intervals \
     318    >> pcr_metrics.out
     319 
     320  ./stdwrap.sh ./picard2 CollectTargetedPcrMetrics \
     321    -INPUT out/concordant.bam \
     322    -CUSTOM_AMPLICON_SET_NAME ${amplicon_set_name} \
     323    -AMPLICON_INTERVALS amplicon_inserts.intervals \
     324    -TARGET_INTERVALS amplicon_inserts.intervals \
     325    -R ${GenomeFasta} \
     326    ${PcrMetricsOptions} \
     327    -PER_TARGET_COVERAGE out/concordant.per_amplicon_cov.txt \
     328    -PER_BASE_COVERAGE out/concordant.per_base_cov.txt.gz \
     329    -OUTPUT out/concordant.pcr_metrics.txt \
     330    >> pcr_metrics.out
     331}
     332
     333# Get alignment metrics for the concordant.bam file
     334# Global parameters:
     335#   $GenomeFasta: Path to the genome fasta file
     336#   $max_insert: Calculated by get_min_max_insert
     337#   $adapterN: Multiple adapter sequences defined earlier in this file
     338function collect_alignment_metrics {
     339
     340  ./stdwrap.sh ./picard2 CollectAlignmentSummaryMetrics \
     341    -INPUT out/concordant.bam \
     342    -R ${GenomeFasta} \
     343    -METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL LIBRARY -METRIC_ACCUMULATION_LEVEL READ_GROUP \
     344    -MAX_INSERT_SIZE ${max_insert} \
     345    -ADAPTER_SEQUENCE null -ADAPTER_SEQUENCE ${adapter1} -ADAPTER_SEQUENCE ${adapter2} \
     346    -ADAPTER_SEQUENCE ${adapter1rc} -ADAPTER_SEQUENCE ${adapter2rc} \
     347    -OUTPUT out/concordant.alignment_summary_metrics.txt \
     348    >> alignment_metrics.out
     349
     350}
Note: See TracChangeset for help on using the changeset viewer.