source: other/pipeline/trunk/hiseq_status.sh @ 5474

Last change on this file since 5474 was 5474, checked in by Nicklas Nordborg, 3 years ago

References #1142: Create MIPs flow cell and start sequencing

Added a pipeline script hiseq_status.sh. It is a script that is similar to the nextseq_status.sh script and is used to get information about an ongoing HiSeq sequencing run. It is believed to give us all the information that is needed to monitor the progress and to enable us to detect whan the sequencing has ended.

We need some more counters to get a values that we can compare to the number of BCL files:

LaneCount * SurfaceCount * SwathCount * TileCount * (total reads) == BclCount

The nextseq_status.sh script has also been updated to give us the Index2Read value.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 3.7 KB
Line 
1#!/bin/sh
2# $Id $
3
4# Nicklas Nordborg, 2019
5#
6# Finds information about a sequencing run given the barcode of a flow cell
7# It is expected that the sequencing is done with a HiSeq sequencer
8#
9# run ./hiseq_status.sh <barcode> <run-archive-root-1> [<run-archive-root-2> ...]
10#
11# The output is a number of key-value pairs. All values may not be present.
12#
13# RunArchive: The path to the data folder for the flow cell
14# Config: Date and time the 'Config' folder was last modified
15# RunParameters: Date and time the 'runParameters.xml' file was last modified
16
17# BclCount: Number of files ending with '.bcl'
18# LaneCount: Value from <FlowcellLayout LaneCount> tag in RunInfo.xml
19# SurfaceCount: Value from <FlowcellLayout SurfaceCount> tag in RunInfo.xml
20# SwathCount: Value from <FlowcellLayout SwathCount> tag in RunInfo.xml
21# TileCount: Value from <FlowcellLayout TileCount> tag in RunInfo.xml
22# Read1: Value from <Read1> tag in runParameters.xml
23# Read2: Value from <Read2> tag in runParameters.xml
24# IndexRead1: Value from <IndexRead1> tag in runParameters.xml
25# IndexRead2: Value from <IndexRead2> tag in runParameters.xml
26# RTAComplete: Date and time the 'RTAComplete.txt' was last modified
27# HiSeqSerial: Value from <ScannerID> tag in runParameters.xml
28
29BARCODE=$1
30shift
31RUN_ARCHIVE=$@
32
33# Format string for file dates/times
34DATE_FORMAT="%Y%m%d %H%M%S"
35
36# Try to find a folder inside run-archive that has the barcode in the name
37# The folder may not yet exist so a missing folder is not an error
38DATA_FOLDER=`find ${RUN_ARCHIVE} -maxdepth 2 -iname "*${BARCODE}*" -type d -print 2> /dev/null || true`;
39
40# Fail if more than one folder is found
41readarray -t lines <<< "${DATA_FOLDER}"
42if [ ! ${#lines[@]} -eq 1 ]; then
43  echo "Found ${#lines[@]} data folders for flow cell ${BARCODE}" 1>&2
44  echo ${DATA_FOLDER} 1>&2
45  exit 1
46fi
47
48echo RunArchive: ${DATA_FOLDER}
49# Config folder is created immediately when starting the HiSeq
50# We use the date of this folder to set the start date of the job
51if [ -d "${DATA_FOLDER}/Config" ]; then
52  echo "Config: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/Config"`"
53fi
54
55# runParameters.xml is created after clustering
56# We extract information about number of reads and lanes
57# and compare that to the number of *.bgzf.bci files we can find
58# This gives an estimate of the current sequencing cycle and we can
59# use this for progress reporting
60RUN_PARAMETERS=${DATA_FOLDER}/runParameters.xml
61RUN_INFO=${DATA_FOLDER}/RunInfo.xml
62if [ -f "${RUN_PARAMETERS}" ]; then
63  echo "RunParameters: `date +"${DATE_FORMAT}" -r "${RUN_PARAMETERS}"`"
64  echo "BclCount: `find "${DATA_FOLDER}" -type f -name *.bcl | wc -l`"
65  echo "LaneCount: `grep -o 'LaneCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
66  echo "SurfaceCount: `grep -o 'SurfaceCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
67  echo "SwathCount: `grep -o 'SwathCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
68  echo "TileCount: `grep -o 'TileCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`"
69  echo "Read1: `grep '<Read1>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
70  echo "Read2: `grep '<Read2>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
71  echo "IndexRead1: `grep '<IndexRead1>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
72  echo "IndexRead2: `grep '<IndexRead2>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
73  echo "HiSeqSerial: `grep '<ScannerID>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`"
74fi
75
76# RTAComplete.txt is created when everything is complete
77# This becomes the end date of the job and should trigger
78# Reggie to start file checks and secondary analysis
79if [ -f "${DATA_FOLDER}/RTAComplete.txt" ]; then
80  echo "RTAComplete: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/RTAComplete.txt"`"
81fi
Note: See TracBrowser for help on using the repository browser.