Changeset 3486


Ignore:
Timestamp:
Sep 15, 2015, 1:38:05 PM (8 years ago)
Author:
Nicklas Nordborg
Message:

References #809: Improve performance of demux+merge step

Start gzip with compression level 1 and in the background.

Location:
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/DemuxMergeServlet.java

    r3485 r3486  
    872872            script.cmd("cat fastq.aligned/" + fragments_name + " >> " + fragmentsOut);
    873873            script.cmd("cp fastq.aligned/"+fragments_name + " " + fastqFolder + "/" + fragments_name);
    874             script.cmd("gzip -c fastq.trimmomatic.2/" + R1_name + " > " + fastqFolder + "/"+R1_name + ".gz");
    875             script.time("DONE: gzip " + R1_name);
    876             script.cmd("gzip -c fastq.trimmomatic.2/" + R2_name + " > " + fastqFolder + "/"+R2_name + ".gz");
    877             script.time("DONE: gzip " + R2_name);
     874            String pid1 = script.bkgr("gzip -1 -c fastq.trimmomatic.2/" + R1_name + " > " + fastqFolder + "/"+R1_name + ".gz");
     875            String pid2 = script.bkgr("gzip -1 -c fastq.trimmomatic.2/" + R2_name + " > " + fastqFolder + "/"+R2_name + ".gz");
     876            script.waitForProcess(pid1);
     877            script.waitForProcess(pid2);
     878            script.time("DONE: gzip " + mergeName);
    878879
    879880            script.cmd("ls -1 "+fastqFolder+"/*.fastq.gz >> " + filesOut);
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/ssh/ScriptBuilder.java

    r3485 r3486  
    11package net.sf.basedb.reggie.ssh;
    22
     3import java.util.HashMap;
     4import java.util.Map;
     5
     6import net.sf.basedb.core.InvalidDataException;
    37import net.sf.basedb.reggie.Reggie;
    48
     
    1216  private final StringBuilder script;
    1317  private final String jobFolder;
     18 
     19  private int nextBgPid;
     20  private Map<String, String> bgProcesses;
    1421 
    1522  /**
     
    95102 
    96103  /**
    97     Adds a command that is executed in the background
     104    Adds a command that is executed in the background. The return value is the
     105    name of a variable containing the process id of the background task. Use this
     106    in {@link #waitForProcess(String)} to add a wait statement that also catches the
     107    exit code of the background task.
     108    @since 3.6
    98109  */
    99   @Deprecated
    100   public void bkgr(String cmd)
     110  public String bkgr(String cmd)
    101111  {
     112    if (bgProcesses == null) bgProcesses = new HashMap<String, String>();
     113    String pid = "BG"+nextBgPid;
     114    nextBgPid++;
     115    bgProcesses.put(pid, cmd);
    102116    script.append(cmd).append(" &\n");
     117    script.append(pid).append("=$!\n");
     118    return pid;
    103119  }
    104120 
     
    107123    Note that it is only the batch as a whole that is executed in
    108124    the background, the individual commands are executed in serial.
     125    The return value is the name of a variable containing the process
     126    id of the background task. Use this in {@link #waitForProcess(String)} to add
     127    a wait statement that also catches the exit code of the background task.
     128    @since 3.6
    109129  */
    110   @Deprecated
    111   public void bkgr(String... cmds)
     130  public String bkgr(String... cmds)
    112131  {
    113     script.append("{ ");
     132    StringBuilder cmd = new StringBuilder();
     133    cmd.append("{ ");
    114134    for (String c : cmds)
    115135    {
    116       script.append(c).append("; ");
     136      cmd.append(c).append("; ");
    117137    }
    118     script.append(" } &\n");
     138    cmd.append(" }");
     139    return bkgr(cmd.toString());
    119140  }
    120141
     142  /**
     143    Wait for the given process to end.
     144    @param pid The process id variable name returned by {@link #bkgr(String)}.
     145    @since 3.6
     146  */
     147  public void waitForProcess(String pid)
     148  {
     149    if (bgProcesses == null || !bgProcesses.containsKey(pid))
     150    {
     151      throw new InvalidDataException("Unknown background process id: " + pid);
     152    }
     153    bgProcesses.remove(pid);
     154    script.append("wait $").append(pid).append("\n");
     155  }
     156
     157 
    121158  /**
    122159    Get the current length of the script.
     
    128165 
    129166  /**
    130     Get the generated script.
     167    Get the generated script. Will throw an exception if {@link #waitForProcess(String)} hasn't
     168    been called for all commands started in the background ({@link #bkgr(String)} or {@link #bkgr(String...)})
    131169  */
    132170  @Override
    133171  public String toString()
    134172  {
     173    if (bgProcesses != null && bgProcesses.size() > 0)
     174    {
     175      throw new InvalidDataException("The script start background tasks, but does not wait for them: " + bgProcesses);
     176    }
    135177    return script.toString();
    136178  }
Note: See TracChangeset for help on using the changeset viewer.