Changeset 5766


Ignore:
Timestamp:
Dec 2, 2019, 10:52:46 AM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1208: Implement wizard for building database of variant frequencies in SCAN-B samples

Implemented export step for creating a file with patient and VCF file information. This file is intended to be used by the statistics script.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/resources/analysis/vcall_build.js

    r5765 r5766  
    189189    url += '&cmd=BuildVariantStatistics';
    190190
    191     Wizard.showLoadingAnimation('Performing registration...');
     191    Wizard.showLoadingAnimation('Performing registration...', 'variant-statistics-progress');
    192192    Wizard.asyncJsonRequest(url, vcall.submissionResults, 'POST', JSON.stringify(submitInfo));
    193193  }
  • extensions/net.sf.basedb.reggie/trunk/resources/analysis/vcall_build.jsp

    r5763 r5766  
    121121   
    122122    <div id="wizard-status"></div>
     123    <div id="wizard-progress"></div>
    123124 
    124125    <table class="navigation" id="navigation">
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/VariantStatisticsJobCreator.java

    r5765 r5766  
    11package net.sf.basedb.reggie.grid;
    22
     3import java.io.StringWriter;
    34import java.util.Arrays;
     5import java.util.List;
     6import java.util.regex.Matcher;
     7import java.util.regex.Pattern;
    48
    59import org.slf4j.Logger;
    610import org.slf4j.LoggerFactory;
    711
     12import net.sf.basedb.core.BioSource;
    813import net.sf.basedb.core.DbControl;
     14import net.sf.basedb.core.DerivedBioAssay;
     15import net.sf.basedb.core.InvalidDataException;
    916import net.sf.basedb.core.ItemList;
    1017import net.sf.basedb.core.ItemNotFoundException;
    1118import net.sf.basedb.core.ItemParameterType;
     19import net.sf.basedb.core.ItemQuery;
    1220import net.sf.basedb.core.Job;
     21import net.sf.basedb.core.ProgressReporter;
    1322import net.sf.basedb.core.SessionControl;
     23import net.sf.basedb.core.Type;
     24import net.sf.basedb.core.query.Expressions;
     25import net.sf.basedb.core.query.Hql;
     26import net.sf.basedb.core.query.Restrictions;
     27import net.sf.basedb.core.snapshot.SnapshotManager;
    1428import net.sf.basedb.opengrid.JobDefinition;
    1529import net.sf.basedb.opengrid.OpenGridCluster;
     
    1731import net.sf.basedb.opengrid.config.ClusterConfig;
    1832import net.sf.basedb.opengrid.config.JobConfig;
     33import net.sf.basedb.opengrid.filetransfer.StringUploadSource;
    1934import net.sf.basedb.reggie.Reggie;
    2035import net.sf.basedb.reggie.XmlConfig;
     36import net.sf.basedb.reggie.dao.Annotationtype;
     37import net.sf.basedb.reggie.dao.Subtype;
    2138import net.sf.basedb.util.Values;
     39import net.sf.basedb.util.export.TableWriter;
    2240
    2341/**
     
    6179    @return The corresponding job in BASE
    6280  */
    63   public Job createVariantStatisticsJob(DbControl dc, OpenGridCluster cluster, ItemList list)
     81  @SuppressWarnings("unchecked")
     82  public Job createVariantStatisticsJob(DbControl dc, OpenGridCluster cluster, ItemList list, ProgressReporter progress)
    6483  {
    6584    SessionControl sc = dc.getSessionControl();
     
    94113    // TODO -- Create script here
    95114   
    96     // TODO -- we need to export a file with paths to raw VCF:s and patient information
    97    
    98    
    99115    script.progress(99, "Cleaning up temporary folders");
    100116
     117    // Export a list with PATIENT name and path to RAW VCF file
     118    StringWriter vcfList = new StringWriter(list.getSize()*80);
     119    TableWriter tw = new TableWriter(vcfList);
     120    // For each alignment we need to find the patient. We design a query
     121    // to load it from the specimen name which we get by cutting the front
     122    // of the alignment name
     123    ItemQuery<BioSource> findPatBySpecimen = BioSource.getQuery();
     124    Subtype.PATIENT.addFilter(dc, findPatBySpecimen);
     125    findPatBySpecimen.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     126    findPatBySpecimen.join(Hql.innerJoin("childCreationEvents", "cce"));
     127    findPatBySpecimen.join(Hql.innerJoin("cce", "event", "evt"));
     128    findPatBySpecimen.join(Hql.innerJoin("evt", "bioMaterial", "cse")); // 'cse' should now reference a case
     129    findPatBySpecimen.join(Hql.innerJoin("cse", "childCreationEvents", "cce2"));
     130    findPatBySpecimen.join(Hql.innerJoin("cce2", "event", "evt2"));
     131    findPatBySpecimen.join(Hql.innerJoin("evt2", "bioMaterial", "spm")); // 'spm' should now reference a specimen/nospecimen
     132    findPatBySpecimen.restrict(Restrictions.eq(Hql.property("spm", "name"), Expressions.parameter("specimen")));
     133   
     134    ItemQuery<DerivedBioAssay> query = (ItemQuery<DerivedBioAssay>)list.getMembers();
     135    query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     136    List<DerivedBioAssay> alignments = query.list(dc);
     137   
     138    SnapshotManager manager = new SnapshotManager();
     139    int count = 0;
     140    int total = alignments.size();
     141    Pattern specimenName = Pattern.compile("(\\d+\\.\\d+)\\..*"); // Extract specimen name by taking first part of alignment name
     142    for (DerivedBioAssay alignment : alignments)
     143    {
     144      count++;
     145      if (progress != null && count % 100 == 0)
     146      {
     147        progress.display(count * 90 / total, "Exporting Patient and VCF list (" + count + " of " + total + ")...");
     148      }
     149     
     150      Matcher m = specimenName.matcher(alignment.getName());
     151      if (!m.matches())
     152      {
     153        throw new InvalidDataException("Could not get specimen name for alignment: "+ alignment.getName());
     154      }
     155     
     156      String specimen = m.group(1);
     157      findPatBySpecimen.setParameter("specimen", specimen, Type.STRING);
     158      List<BioSource> patients = findPatBySpecimen.list(dc);
     159      if (patients.size() != 1)
     160      {
     161        if (patients.size() == 0) throw new ItemNotFoundException("Could not find a patient item for alignment: " +alignment.getName());
     162        throw new InvalidDataException("Found "+ patients.size() + " patient items for alignment: " + alignment.getName());
     163      }
     164      String patient = patients.get(0).getName();
     165      String dataFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, manager, alignment);
     166      tw.tablePrintData(patient, dataFolder+"/variants-raw.vcf.gz");
     167    }
     168    tw.flush();
     169    tw.close();
     170   
    101171    JobDefinition jobDef = new JobDefinition("VariantStatistics", jobConfig, statJob);
    102172    jobDef.setDebug(debug);
     173    jobDef.addFile(new StringUploadSource("vcflist.txt", vcfList.toString()));
    103174    jobDef.setCmd(script.toString());
    104175   
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/VariantCallingServlet.java

    r5765 r5766  
    2929import net.sf.basedb.core.RawBioAssay;
    3030import net.sf.basedb.core.SessionControl;
     31import net.sf.basedb.core.SimpleProgressReporter;
    3132import net.sf.basedb.core.Software;
    3233import net.sf.basedb.core.Trashcan;
     
    530531        }
    531532       
     533        SimpleProgressReporter progress = new SimpleProgressReporter(null);
     534        sc.setSessionSetting("variant-statistics-progress", progress);
     535       
    532536        ItemList list = ItemList.getById(dc, listId.intValue());
    533537       
     
    536540        jobCreator.setPriority(priority == null ? null : priority.intValue());
    537541
    538         Job statJob = jobCreator.createVariantStatisticsJob(dc, cluster, list);
     542        Job statJob = jobCreator.createVariantStatisticsJob(dc, cluster, list, progress);
    539543        if (statJob.getStatus() == Job.Status.ERROR)
    540544        {
     
    545549          jsonMessages.add("Submitted variant statistics job to " + cluster.getConnectionInfo().getName() + " with id " + statJob.getExternalId());
    546550        }
     551        progress.display(100, "Done");
    547552        dc.commit();
    548 
    549553      }
    550554
     
    564568      if (dc != null) dc.close();
    565569      json.writeJSONString(resp.getWriter());
     570      sc.setSessionSetting("variant-statistics-progress", null);
    566571    }
    567572   
Note: See TracChangeset for help on using the changeset viewer.