source: other/pipeline/trunk/novaseq_status.sh @ 5856

Last change on this file since 5856 was 5856, checked in by Nicklas Nordborg, 4 years ago

References #1231: Add support for sequencing with NovaSeq?

Added novaseq_status.sh script.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 4.2 KB
Line 
1#!/bin/sh
2# $Id $
3
4# Nicklas Nordborg, 2020
5#
6# Finds information about a sequencing run given the barcode of a flow cell
7# It is expected that the sequencing is done with a NovaSeq sequencer
8#
9# run ./novaseq_status.sh <barcode> <run-archive-root-1> [<run-archive-root-2> ...]
10#
11# The output is a number of key-value pairs. All values may not be present.
12#
13# RunArchive: The path to the data folder for the flow cell
14# Config: Date and time the 'Config' folder was last modified
15# RunParameters: Date and time the 'RunParameters.xml' file was last modified
16# Read1: Value from <Read1NumberOfCycles> tag in RunParameters.xml
17# Read2: Value from <Read2NumberOfCycles> tag in RunParameters.xml
18# Index1Read: Value from <IndexRead1NumberOfCycles> tag in RunParameters.xml
19# Index2Read: Value from <IndexRead2NumberOfCycles> tag in RunParameters.xml
20# NovaSeqSerial: Value from <InstrumentName> tag in RunParameters.xml
21# CbclCount: Number of files ending with '.cbcl'
22# LaneCount: Value from the LaneCount attribute in FlowcellLayout tag in RunInfo.xml
23# SurfaceCount: Value from the SurfaceCount attribute in FlowcellLayout tag in RunInfo.xml
24# SwathCount: Value from the SwathCount attribute in FlowcellLayout tag in RunInfo.xml
25# TileCount: Value from the TileCount attribute in FlowcellLayout tag in RunInfo.xml
26# RTAComplete: Date and time the 'RTAComplete.txt' was last modified
27
28BARCODE=$1
29shift
30RUN_ARCHIVE=$@
31
32# Format string for file dates/times
33DATE_FORMAT="%Y%m%d %H%M%S"
34
35# Try to find a folder inside run-archive that has the barcode in the name
36# The folder may not yet exist so a missing folder is not an error
37DATA_FOLDER=`find ${RUN_ARCHIVE} -maxdepth 2 -iname "*${BARCODE}*" -type d -print 2> /dev/null || true`;
38
39# Fail if more than one folder is found
40readarray -t lines <<< "${DATA_FOLDER}"
41if [ ! ${#lines[@]} -eq 1 ]; then
42  echo "Found ${#lines[@]} data folders for flow cell ${BARCODE}" 1>&2
43  echo ${DATA_FOLDER} 1>&2
44  exit 1
45fi
46
47echo RunArchive: ${DATA_FOLDER}
48# Config folder is created immediately when starting the NovaSeq
49# We use the date of this folder to set the start date of the job
50if [ -d "${DATA_FOLDER}/Config" ]; then
51  echo "Config: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/Config"`"
52fi
53
54# RunParameters.xml is created after clustering
55# We extract information about number of reads and lanes
56# and compare that to the number of *.cbcl files we can find
57# This gives an estimate of the current sequencing cycle and we can
58# use this for progress reporting
59RUN_PARAMETERS=${DATA_FOLDER}/RunParameters.xml
60if [ -f "${RUN_PARAMETERS}" ]; then
61  echo "RunParameters: `date +"${DATE_FORMAT}" -r "${RUN_PARAMETERS}"`"
62  echo "Read1: `grep '<Read1NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
63  echo "Read2: `grep '<Read2NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
64  echo "Index1Read: `grep '<IndexRead1NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
65  echo "Index2Read: `grep '<IndexRead2NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
66  echo "NovaSeqSerial: `grep '<InstrumentName>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
67fi
68
69# Count number of BCL files which gives us information about
70# the progress of the sequencing
71CBCL_FOLDER=${DATA_FOLDER}/Data/Intensities/BaseCalls
72if [ -d "${CBCL_FOLDER}" ]; then
73  echo "CbclCount: `find "${CBCL_FOLDER}" -type f -name *.cbcl | wc -l`"
74fi
75
76
77# RunInfo.xml contains information about the layout of the flowcell
78# which we need to be able to compare the number of *.cbcl files
79RUN_INFO=${DATA_FOLDER}/RunInfo.xml
80if [ -f "${RUN_INFO}" ]; then
81  echo "LaneCount: `grep -o 'LaneCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
82  echo "SurfaceCount: `grep -o 'SurfaceCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
83  echo "SwathCount: `grep -o 'SwathCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
84  echo "TileCount: `grep -o 'TileCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
85fi
86
87
88# RTAComplete.txt is created when everything is complete
89# This becomes the end date of the job and should trigger
90# Reggie to start file checks and secondary analysis
91if [ -f "${DATA_FOLDER}/RTAComplete.txt" ]; then
92  echo "RTAComplete: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/RTAComplete.txt"`"
93fi
Note: See TracBrowser for help on using the repository browser.