Changeset 5840


Ignore:
Timestamp:
Feb 25, 2020, 9:35:02 AM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Added functions for re-header, split and mark duplicates.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other/pipeline/trunk/mips_functions.sh

    r5838 r5840  
    245245}
    246246
     247# Re-header the BAM file
     248# Replace SQ entries in the novo.bam header with reference genome dict
     249# - The novoalign genome index was built from a genome with ambiguity codes
     250#   and therefore the md5 checksums don't match the standard reference
     251#   genome.
     252# - Mismatching md5 values in the SQ entries will cause both picard and
     253#   GATK tools to crash downstream
     254# Global parameters:
     255#   $samtools: Path to samtools executable
     256#   $GenomeDict: Path to genome .dict file
     257function reheader_bam {
     258  ${samtools} view -H tmp/novo.bam | grep -v "^@SQ" > tmp/header.sam
     259  grep "^@SQ" ${GenomeDict} >> tmp/header.sam
     260  ${samtools} reheader -P tmp/header.sam tmp/novo.bam > tmp/novo_reheaded.bam
     261}
     262
     263# Split novo_reheader.bam into seperate files for concordant, discordant and unmapped read pairs
     264# Global parameters:
     265#   $samtools: Path to samtools executable
     266function split_bam {
     267  ${samtools} view -b -h -@ 2 -f 3 tmp/novo_reheaded.bam > tmp/concordant.bam
     268  ${samtools} view -b -h -@ 2 -G 12 -F 2 tmp/novo_reheaded.bam > tmp/discordant.bam
     269  ${samtools} view -b -h -@ 2 -f 12 tmp/novo_reheaded.bam > out/unmapped.bam
     270}
     271
     272# Mark duplicates with Picard that is aware of UMIs
     273# Parameters:
     274#   $1: Prefix to .bam file to process
     275# Global parameters:
     276#   $MarkDuplicatesOptions: Options to Picard
     277function mark_duplicates {
     278  local prefix=$1
     279 
     280  ./stdwrap.sh ./picard2 UmiAwareMarkDuplicatesWithMateCigar \
     281    ${MarkDuplicatesOptions} \
     282    -UMI_METRICS out/${prefix}.umi_metrics.txt \
     283    -METRICS_FILE out/${prefix}.dedup_metrics.txt \
     284    -INPUT tmp/${prefix}.bam \
     285    -OUTPUT out/${prefix}.bam \
     286    >> markduplicates.out
     287}
     288
Note: See TracChangeset for help on using the changeset viewer.