Changeset 5840
- Timestamp:
- Feb 25, 2020, 9:35:02 AM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other/pipeline/trunk/mips_functions.sh
r5838 r5840 245 245 } 246 246 247 # Re-header the BAM file 248 # Replace SQ entries in the novo.bam header with reference genome dict 249 # - The novoalign genome index was built from a genome with ambiguity codes 250 # and therefore the md5 checksums don't match the standard reference 251 # genome. 252 # - Mismatching md5 values in the SQ entries will cause both picard and 253 # GATK tools to crash downstream 254 # Global parameters: 255 # $samtools: Path to samtools executable 256 # $GenomeDict: Path to genome .dict file 257 function reheader_bam { 258 ${samtools} view -H tmp/novo.bam | grep -v "^@SQ" > tmp/header.sam 259 grep "^@SQ" ${GenomeDict} >> tmp/header.sam 260 ${samtools} reheader -P tmp/header.sam tmp/novo.bam > tmp/novo_reheaded.bam 261 } 262 263 # Split novo_reheader.bam into seperate files for concordant, discordant and unmapped read pairs 264 # Global parameters: 265 # $samtools: Path to samtools executable 266 function split_bam { 267 ${samtools} view -b -h -@ 2 -f 3 tmp/novo_reheaded.bam > tmp/concordant.bam 268 ${samtools} view -b -h -@ 2 -G 12 -F 2 tmp/novo_reheaded.bam > tmp/discordant.bam 269 ${samtools} view -b -h -@ 2 -f 12 tmp/novo_reheaded.bam > out/unmapped.bam 270 } 271 272 # Mark duplicates with Picard that is aware of UMIs 273 # Parameters: 274 # $1: Prefix to .bam file to process 275 # Global parameters: 276 # $MarkDuplicatesOptions: Options to Picard 277 function mark_duplicates { 278 local prefix=$1 279 280 ./stdwrap.sh ./picard2 UmiAwareMarkDuplicatesWithMateCigar \ 281 ${MarkDuplicatesOptions} \ 282 -UMI_METRICS out/${prefix}.umi_metrics.txt \ 283 -METRICS_FILE out/${prefix}.dedup_metrics.txt \ 284 -INPUT tmp/${prefix}.bam \ 285 -OUTPUT out/${prefix}.bam \ 286 >> markduplicates.out 287 } 288
Note: See TracChangeset
for help on using the changeset viewer.