Changeset 6007


Ignore:
Timestamp:
Sep 18, 2020, 8:24:09 AM (12 months ago)
Author:
Nicklas Nordborg
Message:

References #1266: Run prepDE.py in the StringTie? pipeline

Added -l parameter to the prepDE command line. The length is calculated from the read string used in the demux by adding all T values. If the data comes from more than one sequencing run and have been demuxed with different settings the average value is used.

Location:
extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/grid
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/grid/PrepDEJobCreator.java

    r6006 r6007  
    143143        stringTieName = stringTieName.replace(specimen.getName(), specimen.getExternalId());
    144144      }
     145     
     146      int readLength = StringTieJobCreator.getAverageReadSize(dc, rba.getAlignedSequences(dc));
    145147
    146148      // Temporary link to the job so that it is possible to handle failed jobs in the RawOnlyVariantCallingAutoConfirmer
     
    161163      String prepDECmd = prepde_path;
    162164      prepDECmd += " -i input.lst";
    163 //      prepDECmd += " -l " + readLength; TODO!!
     165      prepDECmd += " -l " + readLength;
    164166      prepDECmd += " -g ${StringTieFolder}/gene_count.csv";
    165167      prepDECmd += " -t ${StringTieFolder}/transcript_count.csv";
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/grid/StringTieJobCreator.java

    r6003 r6007  
    77import java.util.List;
    88import java.util.Set;
     9import java.util.regex.Matcher;
    910import java.util.regex.Pattern;
    1011
     
    4647import net.sf.basedb.reggie.dao.BiomaterialList;
    4748import net.sf.basedb.reggie.dao.Datafiletype;
     49import net.sf.basedb.reggie.dao.DemuxedSequences;
    4850import net.sf.basedb.reggie.dao.DoNotUse;
    4951import net.sf.basedb.reggie.dao.Fileserver;
    5052import net.sf.basedb.reggie.dao.Library;
     53import net.sf.basedb.reggie.dao.MaskedSequences;
     54import net.sf.basedb.reggie.dao.MergedSequences;
    5155import net.sf.basedb.reggie.dao.Pipeline;
    5256import net.sf.basedb.reggie.dao.Rawbioassay;
     
    7175    LoggerFactory.getLogger(StringTieJobCreator.class);
    7276
     77  /**
     78    Sum all parts of the read string that generate
     79    an ouput read (eg. all T)
     80    @since 4.27.4
     81  */
     82  public static int getTotalReadSize(String readString)
     83  {
     84    Pattern p = Pattern.compile("(\\d+)T");
     85    Matcher m = p.matcher(readString);
     86    int totalReadSize = 0;
     87    while (m.find())
     88    {
     89      totalReadSize += Values.getInt(m.group(1));
     90    }
     91    return totalReadSize;
     92  }
     93 
     94  /**
     95    Helper method for getting the read string from all demuxed sequences and
     96    calculating the average length.
     97    @since 4.27.4
     98  */
     99  public static int getAverageReadSize(DbControl dc, AlignedSequences aligned)
     100  {
     101    MaskedSequences masked = aligned.getMaskedSequences(dc);
     102    MergedSequences merged = masked.getMergedSequences(dc);
     103    List<DemuxedSequences> dxList = merged.getDemuxedSequences(dc);
     104   
     105    int totalReadSize = 0;
     106    for (DemuxedSequences dx : dxList)
     107    {
     108      String readString = (String)Annotationtype.READ_STRING.getAnnotationValue(dc, dx.getItem());
     109      totalReadSize += getTotalReadSize(readString);
     110    }
     111   
     112    return totalReadSize / dxList.size();
     113  }
     114 
     115
    73116  private Software software;
    74117  private Protocol protocol;
     
    216259        alignedName = alignedName.replace(specimen.getName(), specimen.getExternalId());
    217260      }
     261     
     262      int readLength = getAverageReadSize(dc, as);
    218263     
    219264      // Create job
     
    305350      String prepDECmd = prepde_path;
    306351      prepDECmd += " -i input.lst";
    307 //      prepDECmd += " -l " + readLength; TODO!!
     352      prepDECmd += " -l " + readLength;
    308353      prepDECmd += " -g stringtie/gene_count.csv";
    309354      prepDECmd += " -t stringtie/transcript_count.csv";
Note: See TracChangeset for help on using the changeset viewer.