Changeset 6005


Ignore:
Timestamp:
Sep 17, 2020, 1:25:48 PM (13 months ago)
Author:
Nicklas Nordborg
Message:

References #1266: Run prepDE.py in the StringTie? pipeline

Added PrepDEJobCreator which generate job for running prepDE.py for existing StringTie raw bioassays. The wizard simply submit jobs in batches of 500 for all existing raw bioassays that doesn't have count data already.

Location:
extensions/net.sf.basedb.reggie/branches/4.27-stable
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/resources/analysis/prepde.js

    r6004 r6005  
    122122    url += '&cmd=RunPrepDE';
    123123
    124     Wizard.showLoadingAnimation('Performing registration...');
     124    Wizard.showLoadingAnimation('Performing registration...', 'prepde-progress');
    125125    Wizard.asyncJsonRequest(url, prepde.submissionResults, 'POST', JSON.stringify(submitInfo));
    126126  }
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/dao/Subtype.java

    r5922 r6005  
    346346  */
    347347  public static final Subtype SSP_JOB = new Subtype("Single Sample Predictor", null, null, null, Item.JOB, false);
     348
     349  /**
     350    The definition of the prepDE job.
     351    @since 4.27.4
     352  */
     353  public static final Subtype PREPDE_JOB = new Subtype("prepDE.py", null, null, null, Item.JOB, false);
     354
    348355 
    349356  /**
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/grid/JobCompletionHandlerFactory.java

    r5826 r6005  
    9999          action = new StringTieJobCreator.StringTieJobCompletionHandler();
    100100        }
     101        else if (jobType.equals(Subtype.PREPDE_JOB.get(dc)))
     102        {
     103          action = new PrepDEJobCreator.PrepDEJobCompletionHandler();
     104        }
    101105        else if (jobType.equals(Subtype.MBAF_JOB.get(dc)))
    102106        {
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/servlet/InstallServlet.java

    r6000 r6005  
    342342        jsonChecks.add(checkSubtype(dc, Subtype.VARIANT_STATISTICS_JOB, null, createIfMissing));
    343343        jsonChecks.add(checkSubtype(dc, Subtype.SSP_JOB, null, createIfMissing));
     344        jsonChecks.add(checkSubtype(dc, Subtype.PREPDE_JOB, null, createIfMissing));
    344345        jsonChecks.add(checkSubtype(dc, Subtype.REPORT_SOFTWARE, null, createIfMissing));
    345346        jsonChecks.add(checkSubtype(dc, Subtype.REPORT_JOB, null, createIfMissing));
  • extensions/net.sf.basedb.reggie/branches/4.27-stable/src/net/sf/basedb/reggie/servlet/StringTieServlet.java

    r5923 r6005  
    33import java.io.IOException;
    44import java.util.ArrayList;
     5import java.util.Collections;
    56import java.util.List;
    67
     
    2728import net.sf.basedb.core.RawBioAssay;
    2829import net.sf.basedb.core.SessionControl;
     30import net.sf.basedb.core.SimpleProgressReporter;
    2931import net.sf.basedb.core.Software;
    3032import net.sf.basedb.core.Trashcan;
     
    4850import net.sf.basedb.reggie.dao.Datafiletype;
    4951import net.sf.basedb.reggie.dao.Library;
     52import net.sf.basedb.reggie.dao.Pipeline;
    5053import net.sf.basedb.reggie.dao.Rawbioassay;
    5154import net.sf.basedb.reggie.dao.Rawdatatype;
     
    5356import net.sf.basedb.reggie.dao.Rna;
    5457import net.sf.basedb.reggie.dao.Subtype;
     58import net.sf.basedb.reggie.grid.PrepDEJobCreator;
    5559import net.sf.basedb.reggie.grid.ScriptUtil;
    5660import net.sf.basedb.reggie.grid.StringTieJobCreator;
     61import net.sf.basedb.reggie.query.AnyToAnyRestriction;
    5762import net.sf.basedb.util.Values;
    5863import net.sf.basedb.util.error.ThrowableUtil;
     
    221226        json.put("rawBioAssays", jsonRawBioAssays);
    222227      }
     228      else if ("CountStringTieWithoutPrepDE".equals(cmd))
     229      {
     230        dc = sc.newDbControl();
     231       
     232        ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
     233        query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     234        Rawdatatype.STRINGTIE.addFilter(dc, query);
     235        Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, query);
     236        // Must have a ANALYSIS_RESULT=Successful annotation
     237        query.join(Annotations.leftJoin(null, Annotationtype.ANALYSIS_RESULT.load(dc), "ar"));
     238        query.restrict(Restrictions.eq(Hql.alias("ar"), Expressions.string(Rawbioassay.FEATURE_EXTRACTION_SUCCESSFUL)));
     239        // Must not have 'gene_count.csv' already
     240        query.restrict(AnyToAnyRestriction.missing("gene_count.csv", null));
     241        json.put("countRawBioAssays", query.count(dc));
     242      }
    223243     
    224244    }
     
    461481          }
    462482        }
     483      }
     484      else if ("RunPrepDE".equals(cmd))
     485      {
     486        dc = sc.newDbControl();
     487
     488        ReggieRole.checkPermission(dc, "'" + cmd + "' wizard", ReggieRole.SECONDARY_ANALYSIS, ReggieRole.ADMINISTRATOR);
     489
     490        SimpleProgressReporter progress = new SimpleProgressReporter(null);
     491        sc.setSessionSetting("prepde-progress", progress);
     492        progress.display(1, "Loading StringTie raw bioassays...");
     493
     494        JSONObject jsonReq = JsonUtil.parseRequest(req);
     495        String clusterId = (String)jsonReq.get("cluster");
     496        boolean debug = Boolean.TRUE.equals(jsonReq.get("debug"));
     497        Number priority = (Number)jsonReq.get("priority");
     498        OpenGridCluster cluster = OpenGridService.getInstance().getClusterById(dc, clusterId);
     499        if (cluster == null)
     500        {
     501          throw new ItemNotFoundException("OpenGridCluster[" + clusterId + "]");
     502        }
     503       
     504        PrepDEJobCreator prepDE = new PrepDEJobCreator();
     505        prepDE.setDebug(debug);
     506        prepDE.setPriority(priority == null ? null : priority.intValue());
     507
     508        ItemQuery<RawBioAssay> query = RawBioAssay.getQuery();
     509        query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     510        Rawdatatype.STRINGTIE.addFilter(dc, query);
     511        Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, query);
     512        // Must have a ANALYSIS_RESULT=Successful annotation
     513        query.join(Annotations.leftJoin(null, Annotationtype.ANALYSIS_RESULT.load(dc), "ar"));
     514        query.restrict(Restrictions.eq(Hql.alias("ar"), Expressions.string(Rawbioassay.FEATURE_EXTRACTION_SUCCESSFUL)));
     515        // Must not have 'gene_count.csv' already
     516        query.restrict(AnyToAnyRestriction.missing("gene_count.csv", null));
     517        query.order(Orders.asc(Hql.property("name"))); // Predicatable sort order is important since we loading 500 at a time
     518        query.order(Orders.asc(Hql.property("id")));
     519        query.setMaxResults(500);
     520       
     521        int totalCount = (int)query.count(dc);
     522        dc.close();
     523       
     524        // We create jobs with max 500 raw bioassays in each job
     525        // Each batch uses a separate transaction to minimize problems in case something goes wrong
     526        int currentCount = 0;
     527        try
     528        {
     529          while (true)
     530          {
     531            dc = sc.newDbControl();
     532            List<Rawbioassay> stringTie = Rawbioassay.toList(query.list(dc));
     533            if (stringTie.size() == 0) break;
     534           
     535            currentCount += stringTie.size();
     536            progress.display(5+(90*currentCount) / totalCount, "Submitting " + currentCount + " of " + totalCount + " items...");
     537           
     538            // For debugging
     539            //stringTie = stringTie.subList(0, 2);
     540           
     541            JobDefinition jobDef = prepDE.createPrepDEJob(dc, cluster, stringTie);
     542            Job job = ScriptUtil.submitJobs(dc, cluster, Collections.singletonList(jobDef)).get(0);
     543            dc.commit();
     544           
     545            if (job.getStatus() == Job.Status.ERROR)
     546            {
     547              jsonMessages.add("[Error]Job submission failed: " + job.getStatusMessage());
     548              break;
     549            }
     550            else
     551            {
     552              jsonMessages.add("Submitted prepDE.py (" + stringTie.size() + " rawbioassays) job to " + cluster.getConnectionInfo().getName() + " with id " + job.getExternalId());
     553            }
     554            query.setFirstResult(query.getMaxResults()+query.getFirstResult());
     555          }
     556
     557        }
     558        catch (RuntimeException ex)
     559        {
     560          jsonMessages.add("[Error]Job submission failed: " + ex.getMessage());
     561        }
     562        finally
     563        {
     564          dc.close();
     565        }
     566
    463567      }
    464568
Note: See TracChangeset for help on using the changeset viewer.