Changeset 6010


Ignore:
Timestamp:
Sep 18, 2020, 8:37:24 AM (12 months ago)
Author:
Nicklas Nordborg
Message:

Merged Reggie 4.27.4 to the trunk.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
14 edited
4 copied

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk

  • extensions/net.sf.basedb.reggie/trunk/resources/index.js

    r5922 r6010  
    211211      Doc.show('more-boxes-are-needed', 'inline-block');
    212212    }
     213   
     214    if (counts['stringtie-without-prepde']) Doc.show('prepde-wizard', 'list-item');
    213215   
    214216    index.displayCounterAge();
  • extensions/net.sf.basedb.reggie/trunk/resources/index.jsp

    r5922 r6010  
    838838                  title="Number of rawbioassays without a SCAN-B report PDF">∙</span>
    839839             
     840              <li id="prepde-wizard" style="display: none;"><span class="require-permission" data-role="Administrator" data-link="analysis/prepde.jsp?ID=<%=ID%>"
     841                >Run prepDE.py</span>
     842                <span class="counter" data-counter="stringtie-without-prepde"
     843                  title="Number of StringTie rawbioassays that is missing prepDE data">∙</span>
     844             
    840845              </ul>
    841846            </dd>
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/autoconfirm/AutoConfirmService.java

    r5845 r6010  
    306306      numItems += confirmRawOnlyVariantCalling(dc, manager, "variants-raw.vcf.gz");
    307307     
     308      // Temporary prepDE for existing StringTie items
     309      numItems += confirmPrepDE(dc, manager, "gene_count.csv");
     310     
    308311      // Commit now so that all items are reverted to manual flow just in case
    309312      // Actual auto-confirm and starting of next step is done later
     
    418421  }
    419422
    420  
     423  /**
     424    This will check if there are any '*.csv' AnyToAny links from a RawBioAssay (StringTie)
     425    to a JOB with ERROR status. If so, the link is removed to allowed it to be picked up for
     426    re-processing.
     427    @since 4.27.4
     428  */
     429  private int confirmPrepDE(DbControl dc, AutoConfirmManager manager, String csvName)
     430  {
     431    ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
     432    query.setIncludes(dc.getSessionControl().getActiveProjectId() != 0 ? Reggie.INCLUDE_IN_CURRENT_PROJECT : Include.ALL);
     433 
     434    // StringTie only
     435    Rawdatatype.STRINGTIE.addFilter(dc, query);
     436   
     437    // Must have '*.csv' linking to a JOB item
     438    query.restrict(AnyToAnyRestriction.exists(csvName, Item.JOB));
     439 
     440    int numItems = 0;
     441    Iterator<RawBioAssay> it = query.iterate(dc);
     442    while (it.hasNext())
     443    {
     444      numItems++;
     445      RawBioAssay stringTie = it.next();
     446      AutoConfirmer<?> ac = new PrepDEAutoConfirmer(stringTie, csvName);
     447      manager.checkRulesAndAdd(dc, ac);
     448    }
     449    return numItems;
     450  }
     451
    421452  /**
    422453    Timer for auto-confirmation
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/autoconfirm/SequencingRunAutoConfirmer.java

    r5803 r6010  
    194194      jobCreator.setPriority(priority);
    195195     
    196       DemuxDefinition demuxDef = new DemuxDefinition(sr, readString, null, true);
     196      DemuxDefinition demuxDef = new DemuxDefinition(sr, readString, null, false); 
    197197      Job demuxJob = jobCreator.createDemuxJob(dc, cluster, Arrays.asList(demuxDef), null);     
    198198      dc.commit();
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/counter/CounterService.java

    r5922 r6010  
    13851385    // ---
    13861386   
     1387    // StringTie raw bioassays without prepDE results
     1388    query = RawBioAssay.getQuery();
     1389    query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     1390    Rawdatatype.STRINGTIE.addFilter(dc, query);
     1391    Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, query);
     1392    // Must have a ANALYSIS_RESULT=Successful annotation
     1393    query.join(Annotations.leftJoin(null, Annotationtype.ANALYSIS_RESULT.load(dc), "ar"));
     1394    query.restrict(Restrictions.eq(Hql.alias("ar"), Expressions.string(Rawbioassay.FEATURE_EXTRACTION_SUCCESSFUL)));
     1395    // Must not have 'gene_count.csv' already
     1396    query.restrict(AnyToAnyRestriction.missing("gene_count.csv", null));
     1397    query.setCacheResult(true);
     1398    json.put("stringtie-without-prepde", query.count(dc));
     1399    // ---
     1400   
    13871401    // All Cufflinks raw bioassays with a gene report pdf
    13881402    query = RawBioAssay.getQuery();
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Annotationtype.java

    r5998 r6010  
    17181718
    17191719  /**
     1720    The "Model" annotation, used for hardware items.
     1721    It is a string annotation containing the generic name of
     1722    the model of the hardware.
     1723    @since 4.27.4
     1724  */
     1725  public static final Annotationtype HARDWARE_MODEL =
     1726    new Annotationtype("Model", Type.STRING, false, Item.HARDWARE);
     1727
     1728 
     1729  /**
    17201730    The "ParameterSet" annotation, used for software items.
    17211731    It is a string annotation containing a named set of alternate
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Subtype.java

    r5922 r6010  
    346346  */
    347347  public static final Subtype SSP_JOB = new Subtype("Single Sample Predictor", null, null, null, Item.JOB, false);
     348
     349  /**
     350    The definition of the prepDE job.
     351    @since 4.27.4
     352  */
     353  public static final Subtype PREPDE_JOB = new Subtype("prepDE.py", null, null, null, Item.JOB, false);
     354
    348355 
    349356  /**
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/JobCompletionHandlerFactory.java

    r5826 r6010  
    9999          action = new StringTieJobCreator.StringTieJobCompletionHandler();
    100100        }
     101        else if (jobType.equals(Subtype.PREPDE_JOB.get(dc)))
     102        {
     103          action = new PrepDEJobCreator.PrepDEJobCompletionHandler();
     104        }
    101105        else if (jobType.equals(Subtype.MBAF_JOB.get(dc)))
    102106        {
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/StringTieJobCreator.java

    r5930 r6010  
    77import java.util.List;
    88import java.util.Set;
     9import java.util.regex.Matcher;
    910import java.util.regex.Pattern;
    1011
     
    2829import net.sf.basedb.core.Protocol;
    2930import net.sf.basedb.core.RawBioAssay;
     31import net.sf.basedb.core.Sample;
    3032import net.sf.basedb.core.SessionControl;
    3133import net.sf.basedb.core.Software;
     
    4547import net.sf.basedb.reggie.dao.BiomaterialList;
    4648import net.sf.basedb.reggie.dao.Datafiletype;
     49import net.sf.basedb.reggie.dao.DemuxedSequences;
    4750import net.sf.basedb.reggie.dao.DoNotUse;
    4851import net.sf.basedb.reggie.dao.Fileserver;
    4952import net.sf.basedb.reggie.dao.Library;
     53import net.sf.basedb.reggie.dao.MaskedSequences;
     54import net.sf.basedb.reggie.dao.MergedSequences;
    5055import net.sf.basedb.reggie.dao.Pipeline;
    5156import net.sf.basedb.reggie.dao.Rawbioassay;
     
    7075    LoggerFactory.getLogger(StringTieJobCreator.class);
    7176
     77  /**
     78    Sum all parts of the read string that generate
     79    an ouput read (eg. all T)
     80    @since 4.27.4
     81  */
     82  public static int getTotalReadSize(String readString)
     83  {
     84    Pattern p = Pattern.compile("(\\d+)T");
     85    Matcher m = p.matcher(readString);
     86    int totalReadSize = 0;
     87    while (m.find())
     88    {
     89      totalReadSize += Values.getInt(m.group(1));
     90    }
     91    return totalReadSize;
     92  }
     93 
     94  /**
     95    Helper method for getting the read string from all demuxed sequences and
     96    calculating the average length.
     97    @since 4.27.4
     98  */
     99  public static int getAverageReadSize(DbControl dc, AlignedSequences aligned)
     100  {
     101    MaskedSequences masked = aligned.getMaskedSequences(dc);
     102    MergedSequences merged = masked.getMergedSequences(dc);
     103    List<DemuxedSequences> dxList = merged.getDemuxedSequences(dc);
     104   
     105    int totalReadSize = 0;
     106    for (DemuxedSequences dx : dxList)
     107    {
     108      String readString = (String)Annotationtype.READ_STRING.getAnnotationValue(dc, dx.getItem());
     109      totalReadSize += getTotalReadSize(readString);
     110    }
     111   
     112    return totalReadSize / dxList.size();
     113  }
     114 
     115
    72116  private Software software;
    73117  private Protocol protocol;
     
    171215    String pipeline_scripts_path = cfg.getRequiredConfig("programs/pipeline-scripts/path", null);
    172216    String stringtie_path = cfg.getRequiredConfig("programs/stringtie/path", parameterSet);
     217    String prepde_path = stringtie_path.substring(0, stringtie_path.lastIndexOf('/'))+"/prepDE.py";
    173218
    174219    // Options for the programs
     
    202247     
    203248      Library lib = Library.get(aligned.getExtract());
     249      Sample specimen =  (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN);
    204250      boolean isExternal = Reggie.isExternalItem(aligned.getName());
    205251      String archiveRoot = isExternal ? externalRoot : projectRoot;
    206252      String bamFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned);
    207253      File bamFile = Datafiletype.BAM.getFile(dc, aligned);
     254     
     255      String alignedName = ScriptUtil.checkValidScriptParameter(aligned.getName());
     256      if (specimen != null && specimen.getExternalId() != null)
     257      {
     258        // Replace SCANB-ID in alignedName with Sample.externalId
     259        alignedName = alignedName.replace(specimen.getName(), specimen.getExternalId());
     260      }
     261     
     262      int readLength = getAverageReadSize(dc, as);
    208263     
    209264      // Create job
     
    218273
    219274      // Create raw bioassay
    220       String rawName = as.getNextRawBioAssayName(dc, stringTieType);
     275      String stringTieName = as.getNextRawBioAssayName(dc, stringTieType);
    221276      RawBioAssay raw = stringTieType.createRawBioAssay(dc);
    222277      Pipeline.RNASEQ_HISAT_STRINGTIE.setAnnotation(dc, raw);
    223278      raw.setArrayDesign(arrayDesign);
    224279      raw.setJob(stringTieJob);
    225       raw.setName(rawName);
     280      raw.setName(stringTieName);
    226281      raw.setParentExtract(lib.getExtract());
    227282      raw.setSoftware(software);
     
    290345      script.cmd(stringTieCmd + " > stringtie/stringtie.out");
    291346      script.newLine();
     347     
     348      script.comment("Run prepDE");
     349      script.progress(90, "Running prepDE");
     350      String prepDECmd = prepde_path;
     351      prepDECmd += " -i input.lst";
     352      prepDECmd += " -l " + readLength;
     353      prepDECmd += " -g stringtie/gene_count.csv";
     354      prepDECmd += " -t stringtie/transcript_count.csv";
     355      script.cmd("echo " + stringTieName + " ./stringtie/transcript.gtf > input.lst");
     356      script.cmd(prepDECmd + " > stringtie/prepde.out");
     357      script.newLine();
    292358
    293359      script.progress(95, "Copying result files to project archive");
     
    297363      if (externalGroup != null)
    298364      {
    299         ScriptUtil.addChgrp(externalGroup, "${StringTieFolder}", rawName, null, script);
     365        ScriptUtil.addChgrp(externalGroup, "${StringTieFolder}", stringTieName, null, script);
    300366      }
    301367      script.cmd("ls -1 ${StringTieFolder}/* >> ${WD}/files.out");
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/plugins/release/MergedWriter.java

    r5861 r6010  
    8383      List<PooledLibrary> poolsWithLib = item.getPools(lib);
    8484     
     85      Set<String> demuxSoftware = getDemuxSoftwareNames(item, demuxedSequences);
    8586      Set<String> readStrings = getReadStrings(item, demuxedSequences);
    8687      Set<String> poolNames = getSequencedPoolNames(item, demuxedSequences, poolsWithLib);
    87       Set<String> sequencers = getSequencerSerials(item, sequencingRuns);
    88      
     88      Set<String> sequencerSerials = getSequencerSerials(item, sequencingRuns);
     89      Set<String> sequencerModels = getSequencerModels(item, sequencingRuns);
     90     
     91      jsonAnnotations.add(item.createAnnotationJSON("DemuxSoftware", Values.getString(demuxSoftware, ";", true)));
    8992      jsonAnnotations.add(item.createAnnotationJSON("ReadString", Values.getString(readStrings, ";", true)));
    90       jsonAnnotations.add(item.createAnnotationJSON("SequencerSerial", Values.getString(sequencers, ";", true)));
     93      jsonAnnotations.add(item.createAnnotationJSON("SequencerSerial", Values.getString(sequencerSerials, ";", true)));
     94      jsonAnnotations.add(item.createAnnotationJSON("SequencerModel", Values.getString(sequencerModels, ";", true)));
    9195      jsonAnnotations.add(item.createAnnotationJSON("PoolName", Values.getString(poolNames, ";", true)));
    9296     
     
    139143     
    140144      Set<String> readStrings = getReadStrings(item, demuxedSequences);
     145      Set<String> demuxSoftware = getDemuxSoftwareNames(item, demuxedSequences);
    141146      Set<String> poolNames = getSequencedPoolNames(item, demuxedSequences, poolsWithLib);
    142       Set<String> sequencers = getSequencerSerials(item, sequencingRuns);
    143      
     147      Set<String> sequencerSerials = getSequencerSerials(item, sequencingRuns);
     148      Set<String> sequencerModels = getSequencerModels(item, sequencingRuns);
     149     
     150      jsonAnnotations.add(item.createAnnotationJSON("DemuxSoftware", Values.getString(demuxSoftware, ";", true)));
    144151      jsonAnnotations.add(item.createAnnotationJSON("ReadString", Values.getString(readStrings, ";", true)));
    145       jsonAnnotations.add(item.createAnnotationJSON("SequencerSerial", Values.getString(sequencers, ";", true)));
     152      jsonAnnotations.add(item.createAnnotationJSON("SequencerSerial", Values.getString(sequencerSerials, ";", true)));
     153      jsonAnnotations.add(item.createAnnotationJSON("SequencerModel", Values.getString(sequencerModels, ";", true)));
    146154      jsonAnnotations.add(item.createAnnotationJSON("PoolName", Values.getString(poolNames, ";", true)));
    147155     
     
    183191 
    184192  /**
     193    Get the model of the sequencers that have been involved in
     194    creating the merged sequences. Typically it is a single sequencer,
     195    but there are cases where data comes from multiple sequencing
     196    runs on different sequencers.
     197    @since 4.27.4
     198  */
     199  private Set<String> getSequencerModels(CohortItem item, List<SequencingRun> sequencingRuns)
     200  {
     201    Set<String> models = new TreeSet<>();
     202    for (SequencingRun sr : sequencingRuns)
     203    {
     204      String m = (String)item.getAnnotationValue(Annotationtype.HARDWARE_MODEL, sr.getItem().getHardware());
     205      if (m != null) models.add(m);
     206    }
     207    return models;
     208  }
     209
     210 
     211  /**
    185212    Get the read strings used to demux the data in the given demuxed sequences.
    186213    Typically the same read string is used for all data, but there are cases
     
    196223    }
    197224    return readStrings;
     225  }
     226 
     227  /**
     228    Get the name of the software used in demuxing. A merged item **should**
     229    have the same software for all DemuxedSequences. We do not check this
     230    but return all names that we find.
     231    @since 4.27.4
     232  */
     233  private Set<String> getDemuxSoftwareNames(CohortItem item, List<DemuxedSequences> demuxedSequences)
     234  {
     235    Set<String> softwareNames = new TreeSet<>();
     236    for (DemuxedSequences dx : demuxedSequences)
     237    {
     238      String software = getName(dx.getItem().getSoftware());
     239      if (software != null) softwareNames.add(software);
     240    }
     241    return softwareNames;
    198242  }
    199243 
     
    239283    mergedFactory.createFileType(Datafiletype.FASTQ);
    240284   
     285    mergedFactory.createAnnotationType("DemuxSoftware", Type.STRING);
    241286    mergedFactory.createAnnotationType("PoolName", Type.STRING);
     287    mergedFactory.createAnnotationType("SequencerModel", Type.STRING); // Annotationtype.HARDWARE_MODEL
    242288    mergedFactory.createAnnotationType("SequencerSerial", Type.STRING);
    243289    mergedFactory.createAnnotationType("ReadString", Type.STRING);
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/plugins/release/StringTieWriter.java

    r5928 r6010  
    5353    this.fileFilterStringTie = new FileListFilter(new HashSet<String>(Arrays.asList(
    5454      "gene.tsv", "transcript.gtf", "e2t.ctab", "e_data.ctab", "i2t.ctab", "i_data.ctab",
    55       "t_data.ctab", "transcript_covered.gtf"
     55      "t_data.ctab", "transcript_covered.gtf", "gene_count.csv", "transcript_count.csv"
    5656        )), true, new FileLocationFilter(Location.EXTERNAL, true));
    5757   
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/InstallServlet.java

    r5973 r6010  
    342342        jsonChecks.add(checkSubtype(dc, Subtype.VARIANT_STATISTICS_JOB, null, createIfMissing));
    343343        jsonChecks.add(checkSubtype(dc, Subtype.SSP_JOB, null, createIfMissing));
     344        jsonChecks.add(checkSubtype(dc, Subtype.PREPDE_JOB, null, createIfMissing));
    344345        jsonChecks.add(checkSubtype(dc, Subtype.REPORT_SOFTWARE, null, createIfMissing));
    345346        jsonChecks.add(checkSubtype(dc, Subtype.REPORT_JOB, null, createIfMissing));
     
    676677        jsonChecks.add(checkAnnotationType(dc, Annotationtype.BARCODE_SEQUENCE_2, 1, null, createIfMissing, effectivePermissionsUse));
    677678        jsonChecks.add(checkAnnotationType(dc, Annotationtype.SERIAL_NUMBER, 1, null, createIfMissing, effectivePermissionsUse));
     679        jsonChecks.add(checkAnnotationType(dc, Annotationtype.HARDWARE_MODEL, 1, null, createIfMissing, effectivePermissionsUse));
    678680       
    679681        jsonChecks.add(checkAnnotationType(dc, Annotationtype.PARAMETER_SET, 1, null, createIfMissing, secondaryAnalysisUse));
     
    10501052       
    10511053        jsonChecks.add(checkAnnotationTypeCategory(dc, Subtype.SEQUENCER, createIfMissing,
    1052           Annotationtype.SERIAL_NUMBER, Annotationtype.FLOWCELL_TYPE
     1054          Annotationtype.SERIAL_NUMBER, Annotationtype.HARDWARE_MODEL, Annotationtype.FLOWCELL_TYPE
    10531055        ));
    10541056       
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/StringTieServlet.java

    r5923 r6010  
    33import java.io.IOException;
    44import java.util.ArrayList;
     5import java.util.Collections;
    56import java.util.List;
    67
     
    2728import net.sf.basedb.core.RawBioAssay;
    2829import net.sf.basedb.core.SessionControl;
     30import net.sf.basedb.core.SimpleProgressReporter;
    2931import net.sf.basedb.core.Software;
    3032import net.sf.basedb.core.Trashcan;
     
    4850import net.sf.basedb.reggie.dao.Datafiletype;
    4951import net.sf.basedb.reggie.dao.Library;
     52import net.sf.basedb.reggie.dao.Pipeline;
    5053import net.sf.basedb.reggie.dao.Rawbioassay;
    5154import net.sf.basedb.reggie.dao.Rawdatatype;
     
    5356import net.sf.basedb.reggie.dao.Rna;
    5457import net.sf.basedb.reggie.dao.Subtype;
     58import net.sf.basedb.reggie.grid.PrepDEJobCreator;
    5559import net.sf.basedb.reggie.grid.ScriptUtil;
    5660import net.sf.basedb.reggie.grid.StringTieJobCreator;
     61import net.sf.basedb.reggie.query.AnyToAnyRestriction;
    5762import net.sf.basedb.util.Values;
    5863import net.sf.basedb.util.error.ThrowableUtil;
     
    221226        json.put("rawBioAssays", jsonRawBioAssays);
    222227      }
     228      else if ("CountStringTieWithoutPrepDE".equals(cmd))
     229      {
     230        dc = sc.newDbControl();
     231       
     232        ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
     233        query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     234        Rawdatatype.STRINGTIE.addFilter(dc, query);
     235        Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, query);
     236        // Must have a ANALYSIS_RESULT=Successful annotation
     237        query.join(Annotations.leftJoin(null, Annotationtype.ANALYSIS_RESULT.load(dc), "ar"));
     238        query.restrict(Restrictions.eq(Hql.alias("ar"), Expressions.string(Rawbioassay.FEATURE_EXTRACTION_SUCCESSFUL)));
     239        // Must not have 'gene_count.csv' already
     240        query.restrict(AnyToAnyRestriction.missing("gene_count.csv", null));
     241        json.put("countRawBioAssays", query.count(dc));
     242      }
    223243     
    224244    }
     
    461481          }
    462482        }
     483      }
     484      else if ("RunPrepDE".equals(cmd))
     485      {
     486        dc = sc.newDbControl();
     487
     488        ReggieRole.checkPermission(dc, "'" + cmd + "' wizard", ReggieRole.SECONDARY_ANALYSIS, ReggieRole.ADMINISTRATOR);
     489
     490        SimpleProgressReporter progress = new SimpleProgressReporter(null);
     491        sc.setSessionSetting("prepde-progress", progress);
     492        progress.display(1, "Loading StringTie raw bioassays...");
     493
     494        JSONObject jsonReq = JsonUtil.parseRequest(req);
     495        String clusterId = (String)jsonReq.get("cluster");
     496        boolean debug = Boolean.TRUE.equals(jsonReq.get("debug"));
     497        Number priority = (Number)jsonReq.get("priority");
     498        OpenGridCluster cluster = OpenGridService.getInstance().getClusterById(dc, clusterId);
     499        if (cluster == null)
     500        {
     501          throw new ItemNotFoundException("OpenGridCluster[" + clusterId + "]");
     502        }
     503       
     504        PrepDEJobCreator prepDE = new PrepDEJobCreator();
     505        prepDE.setDebug(debug);
     506        prepDE.setPriority(priority == null ? null : priority.intValue());
     507
     508        ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
     509        query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     510        Rawdatatype.STRINGTIE.addFilter(dc, query);
     511        Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, query);
     512        // Must have a ANALYSIS_RESULT=Successful annotation
     513        query.join(Annotations.leftJoin(null, Annotationtype.ANALYSIS_RESULT.load(dc), "ar"));
     514        query.restrict(Restrictions.eq(Hql.alias("ar"), Expressions.string(Rawbioassay.FEATURE_EXTRACTION_SUCCESSFUL)));
     515        // Must not have 'gene_count.csv' already
     516        query.restrict(AnyToAnyRestriction.missing("gene_count.csv", null));
     517        query.order(Orders.asc(Hql.property("name"))); // Predicatable sort order is important since we loading 500 at a time
     518        query.order(Orders.asc(Hql.property("id")));
     519        query.setMaxResults(500);
     520       
     521        int totalCount = (int)query.count(dc);
     522        dc.close();
     523       
     524        // We create jobs with max 500 raw bioassays in each job
     525        // Each batch uses a separate transaction to minimize problems in case something goes wrong
     526        int currentCount = 0;
     527        try
     528        {
     529          while (true)
     530          {
     531            dc = sc.newDbControl();
     532            List<Rawbioassay> stringTie = Rawbioassay.toList(query.list(dc));
     533            if (stringTie.size() == 0) break;
     534           
     535            currentCount += stringTie.size();
     536            progress.display(5+(90*currentCount) / totalCount, "Submitting " + currentCount + " of " + totalCount + " items...");
     537           
     538            // For debugging
     539            // stringTie = stringTie.subList(0, 2);
     540           
     541            JobDefinition jobDef = prepDE.createPrepDEJob(dc, cluster, stringTie);
     542            Job job = ScriptUtil.submitJobs(dc, cluster, Collections.singletonList(jobDef)).get(0);
     543            dc.commit();
     544           
     545            if (job.getStatus() == Job.Status.ERROR)
     546            {
     547              jsonMessages.add("[Error]Job submission failed: " + job.getStatusMessage());
     548              break;
     549            }
     550            else
     551            {
     552              jsonMessages.add("Submitted prepDE.py (" + stringTie.size() + " rawbioassays) job to " + cluster.getConnectionInfo().getName() + " with id " + job.getExternalId());
     553            }
     554            // NOTE! query.setFirstResult() is not needed since the previous 500 will get a temporary "gene_count.csv" link
     555            // query.setFirstResult(query.getMaxResults()+query.getFirstResult());
     556          }
     557
     558        }
     559        catch (RuntimeException ex)
     560        {
     561          jsonMessages.add("[Error]Job submission failed: " + ex.getMessage());
     562        }
     563        finally
     564        {
     565          dc.close();
     566        }
     567
    463568      }
    464569
Note: See TracChangeset for help on using the changeset viewer.