Changeset 2570


Ignore:
Timestamp:
Aug 11, 2014, 2:43:00 PM (7 years ago)
Author:
Nicklas Nordborg
Message:

References #548: Register Demux and Merge as ended

Register FASTQ file items pointing to the project archive (via sftp protocol). The FASTQ files are linked with their corresponding bioassays.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/DemuxMergeServlet.java

    r2566 r2570  
    3333import net.sf.basedb.core.BioMaterialList;
    3434import net.sf.basedb.core.BooleanParameterType;
     35import net.sf.basedb.core.DataFileType;
    3536import net.sf.basedb.core.DbControl;
    3637import net.sf.basedb.core.DerivedBioAssay;
     
    3839import net.sf.basedb.core.Extract;
    3940import net.sf.basedb.core.File;
     41import net.sf.basedb.core.FileServer;
     42import net.sf.basedb.core.FileSetMember;
    4043import net.sf.basedb.core.Hardware;
    4144import net.sf.basedb.core.ItemNotFoundException;
     
    6265import net.sf.basedb.reggie.dao.BiomaterialList;
    6366import net.sf.basedb.reggie.dao.BioplateType;
     67import net.sf.basedb.reggie.dao.Datafiletype;
    6468import net.sf.basedb.reggie.dao.DemuxedSequences;
     69import net.sf.basedb.reggie.dao.Fileserver;
    6570import net.sf.basedb.reggie.dao.FlowCell;
    6671import net.sf.basedb.reggie.dao.Library;
     
    117122  */
    118123  public static final String DEMULTIPLEX_METRICS_DIR = "/home/SCANB/DemultiplexMetrics";
     124 
     125  /**
     126    Path to the directory on the BASE file system where secondary analysis files
     127    are saved. This directory maps some files to the "project-archive" directory
     128    on the file server.
     129  */
     130  public static final String SECONDARY_ANALYSIS_DIR = "/home/SCANB/SecondaryAnalysis";
    119131 
    120132  private static final Logger logger =
     
    549561          String totalMetricsFile = jobFolder + "/demultiplex_metrics.txt";
    550562          String trimmomaticOut = jobFolder + "/trimmomatic.out";
     563          String filesOut = jobFolder + "/files.out";
    551564
    552565          int currentLane = 0;
     
    689702            script.progress(percent, "Trimmomatic: " + merged.getName() + " (${NumThreads} threads)");
    690703            script.cmd("echo [" + merged.getName() + "] >> " + trimmomaticOut);
     704            script.cmd("echo [" + merged.getName() + "] >> " + filesOut);
    691705            String trimCmd = "./stdwrap.sh ./trimmomatic PE";
    692706            trimCmd += " -threads ${NumThreads}";
     
    708722            script.progress(percent, "Archiving FASTQ: " + merged.getName());
    709723            script.cmd("mkdir -p " + fastqFolder);
    710             script.bkgr("cat fastq.trimmomatic/" + R1_name + " > " + fastqFolder + "/"+R1_name);
    711             script.bkgr("cat fastq.trimmomatic/" + R2_name + " > " + fastqFolder + "/"+R2_name);
     724            script.bkgr("cp fastq.trimmomatic/" + R1_name + " " + fastqFolder + "/"+R1_name);
     725            script.cmd("cp fastq.trimmomatic/" + R2_name + " " + fastqFolder + "/"+R2_name);
     726            script.cmd("ls -1 "+fastqFolder+"/*.fastq.gz >> " + filesOut);
    712727            script.newLine();
    713728          }
     
    10001015      CmdResult metrics = cluster.executeCmd(ssh, "cat " + cluster.getJobFolder() + "/" + jobStatus.getJobName() + "/demultiplex_metrics.txt", 2);
    10011016      CmdResult trimmomatic = cluster.executeCmd(ssh, "cat " + cluster.getJobFolder() + "/" + jobStatus.getJobName() + "/trimmomatic.out", 2);
     1017      CmdResult files = cluster.executeCmd(ssh, "cat " + cluster.getJobFolder() + "/" + jobStatus.getJobName() + "/files.out", 2);
    10021018      if (metrics.getExitStatus() != 0)
    10031019      {
     
    10101026      else
    10111027      {
    1012         Reads total = parseDemultiplexMetrics(sc, metrics.getStdout(), trimmomatic.getStdout());
     1028        Reads total = parseDemultiplexMetrics(sc, metrics.getStdout(), trimmomatic.getStdout(), files.getStdout());
    10131029        String msg = Values.formatNumber(total.reads/1000000f, 1) + "M reads; ";
    10141030        msg += Values.formatNumber(total.passedFilter/1000000f, 1) + "M passed filter; ";
     
    10231039    }
    10241040   
    1025     private Reads parseDemultiplexMetrics(SessionControl sc, String metrics, String trimmomatic)
     1041    private Reads parseDemultiplexMetrics(SessionControl sc, String metrics, String trimmomatic, String files)
    10261042    {
    10271043      Map<String, Reads> sumReads = new HashMap<String, Reads>();
     
    11641180      }
    11651181     
     1182      // Parse the files.out file
     1183      currentLib = null;
     1184      lineNo = 0;
     1185      for (String line : files.split("\n"))
     1186      {
     1187        lineNo++;
     1188        Matcher m = libPattern.matcher(line);
     1189        if (m.matches())
     1190        {
     1191          String libName = m.group(1);
     1192          currentLib = sumReads.get(libName);
     1193          if (currentLib == null)
     1194          {
     1195            logger.error("At line " + lineNo + ": Found files section for lib '" + libName + "' but not demultiplex metrics");
     1196          }
     1197          continue;
     1198        }
     1199        else
     1200        {
     1201          if (currentLib == null)
     1202          {
     1203            logger.error("At line " + lineNo + ": Found file data but has not found a library name");
     1204          }
     1205          else
     1206          {
     1207            currentLib.addFile(line);
     1208            if (logger.isDebugEnabled())
     1209            {
     1210              logger.debug("File: " + currentLib.libName + "; " + line);
     1211            }
     1212          }
     1213        }
     1214      }
     1215     
    11661216      DbControl dc = null;
    11671217      Reads total = new Reads(null);
     
    12091259        }
    12101260       
     1261        DataFileType fastqData = Datafiletype.FASTQ.load(dc);
     1262        ItemSubtype fastqType = fastqData.getGenericType();
     1263        FileServer projectArchive = Fileserver.PROJECT_ARCHIVE.load(dc);
    12111264        for (Reads r : sumReads.values())
    12121265        {
     
    12231276            Annotationtype.PF_READS.setAnnotationValue(dc, m, r.passedFilter);
    12241277            Annotationtype.PT_READS.setAnnotationValue(dc, m, r.passedTrimmomatic);
     1278           
     1279            // Create FASTQ file links
     1280            String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, m);
     1281            Directory localDataDir = Directory.getNew(dc, new Path(SECONDARY_ANALYSIS_DIR+dataFilesFolder, Path.Type.DIRECTORY));
     1282            for (String fileName : r.files)
     1283            {
     1284              File f = File.getNew(dc, localDataDir);
     1285              f.setName(fileName.substring(fileName.lastIndexOf("/")+1));
     1286              f.setItemSubtype(fastqType);
     1287              f.setFileServer(projectArchive);
     1288              f.setDescription(r.reads + " READS; " + r.passedFilter + " PF_READS; " + r.passedTrimmomatic + " PT_READS");
     1289              String fileUrl = "sftp://" + projectArchive.getHost() + dataFilesFolder + "/" + f.getName();
     1290              try
     1291              {
     1292                f.setUrl(fileUrl, true);
     1293              }
     1294              catch (RuntimeException ex)
     1295              {
     1296                f.setUrl(fileUrl, false);
     1297              }
     1298              dc.saveItem(f);
     1299              FileSetMember member = m.getFileSet().addMember(f, fastqData);
     1300            }
     1301           
    12251302            total.reads += r.reads;
    12261303            total.passedFilter += r.passedFilter;
     
    12441321    final String libName;
    12451322    final List<String> warnings;
     1323    final List<String> files;
    12461324    long reads = 0;
    12471325    long passedFilter = 0;
     
    12521330      this.libName = libName;
    12531331      this.warnings = new ArrayList<String>();
     1332      this.files = new ArrayList<String>();
    12541333    }
    12551334   
     
    12631342    {
    12641343      this.warnings.add(warning);
     1344    }
     1345   
     1346    void addFile(String file)
     1347    {
     1348      this.files.add(file);
    12651349    }
    12661350   
Note: See TracChangeset for help on using the changeset viewer.