Changeset 5721


Ignore:
Timestamp:
Nov 13, 2019, 8:53:49 AM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1199: Implement Variant calling pipeline

Renamed the raw variant file to variants-raw.vcf.gz. Fixed annotations so that they attach to the correct item and are imported from the statistics:

  • VariantsRaw and CallableBases: Belong to the Hisat alignment
  • VariantsFiltered: Belong to the VariantCall raw bioassay
Location:
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/autoconfirm/AutoConfirmService.java

    r5692 r5721  
    291291      // Check if there are any failed mBAF analysis or variant calling jobs
    292292      numItems += confirmMBafAnalysis(dc, manager, "mbaf_genotype.vcf");
    293       numItems += confirmVariantCalling(dc, manager, "variants-all.vcf.gz");
     293      numItems += confirmVariantCalling(dc, manager, "variants-raw.vcf.gz");
    294294     
    295295      // Commit now so that all items are reverted to manual flow just in case
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/autoconfirm/VariantCallAutoConfirmer.java

    r5692 r5721  
    1212  Auto-confirm implementation for alignments after running variant calling.
    1313  This auto-confirmer is needed to handle the case when the analysis fails.
    14   In this case the 'variants-all.vcf.gz' will be linked to the failed JOB instead
     14  In this case the 'variants-raw.vcf.gz' will be linked to the failed JOB instead
    1515  of the VCF.
    1616 
    17   If the auto-confirmer finds a failed job, it will delete the 'variants-all.vcf.gz'
     17  If the auto-confirmer finds a failed job, it will delete the 'variants-raw.vcf.gz'
    1818  link, add the alignment to the {@link BiomaterialList#VARIANT_CALLING_PIPELINE} list
    1919  and set  the {@link Annotationtype#AUTO_PROCESSING} to ReProcess.
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Annotationtype.java

    r5706 r5721  
    18371837
    18381838  /**
    1839     Number of callable bases. Integer.
     1839    Number of callable bases. Integer. Belongs to the alignment.
    18401840    @since 4.24
    18411841  */
    18421842  public static final Annotationtype CALLABLE_BASES =
    1843     new Annotationtype("CallableBases", Type.INT, false, Item.RAWBIOASSAY);
    1844  
    1845   /**
    1846     Number of raw variants found. Integer.
     1843    new Annotationtype("CallableBases", Type.INT, false, Item.DERIVEDBIOASSAY);
     1844 
     1845  /**
     1846    Number of raw variants found. Integer. Belongs to the alignment.
    18471847    @since 4.24
    18481848  */
    1849   public static final Annotationtype VARIANTS_ALL =
    1850     new Annotationtype("VariantsAll", Type.INT, false, Item.RAWBIOASSAY);
    1851 
    1852   /**
    1853     Number of variants passed filter. Integer.
     1849  public static final Annotationtype VARIANTS_RAW =
     1850    new Annotationtype("VariantsRaw", Type.INT, false, Item.DERIVEDBIOASSAY);
     1851
     1852  /**
     1853    Number of variants passed filter. Integer. Belongs to the child raw bioassay.
    18541854    @since 4.24
    18551855  */
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/VariantCallingJobCreator.java

    r5720 r5721  
    222222      vcallPipeline.removeItem(aligned);
    223223     
    224       File rawVariants = as.getLinkedFile(dc, "variants-all.vcf.gz");
     224      File rawVariants = as.getLinkedFile(dc, "variants-raw.vcf.gz");
    225225      if (rawVariants != null && skipRaw && rawOnly)
    226226      {
     
    300300      {
    301301        // TODO -- what about filtering mode??
    302         AnyToAny link = AnyToAny.getNewOrExisting(dc, aligned, "variants-all.vcf.gz", vCallJob, false);
     302        AnyToAny link = AnyToAny.getNewOrExisting(dc, aligned, "variants-raw.vcf.gz", vCallJob, false);
    303303        if (!link.isInDatabase()) dc.saveItem(link);
    304304      }
     
    403403        script.cmd("tabix tmp/gc-50.vcf.gz");
    404404        script.cmd("./stderrwrap.sh " + vcfanno_path + " gc_stat.toml tmp/variants-raw-1.vcf > tmp/variants-raw-2.vcf 3>> vcfanno.out");
    405         script.cmd("cat tmp/variants-raw-2.vcf | bgzip -c > resultsraw/variants-all.vcf.gz");
     405        script.cmd("cat tmp/variants-raw-2.vcf | bgzip -c > resultsraw/variants-raw.vcf.gz");
    406406        script.newLine();
    407407      }
     
    461461        script.cmd("ls -1 resultsraw/variants-* >> ${WD}/files.out");
    462462        script.cmd("echo \"Callable bases: `awk -F'\t' 'BEGIN{SUM=0}{ SUM+=$3-$2 } END{print SUM}' resultsraw/variants-callable.bed`\" >> ${WD}/stats.out");
    463         script.cmd("echo \"All variants: `zcat resultsraw/variants-all.vcf.gz | grep -v '^#\' | wc -l`\" >> ${WD}/stats.out");
     463        script.cmd("echo \"Raw variants: `zcat resultsraw/variants-raw.vcf.gz | grep -v '^#\' | wc -l`\" >> ${WD}/stats.out");
    464464        if (externalGroup != null)
    465465        {
     
    471471        script.cmd("mkdir -p ${FilteredFolder}");
    472472        script.cmd("rm -rf ${FilteredFolder}/*");
    473         script.cmd("cp resultsfilter/variants-* ${FilteredFolder}");
    474         script.cmd("ls -1 resultsfilter/variants-* >> ${WD}/files.out");
     473        script.cmd("cp resultsfilter/* ${FilteredFolder}");
     474        script.cmd("ls -1 ${FilteredFolder}/* >> ${WD}/files.out");
    475475        script.cmd("echo \"Annotated variants: `zcat resultsfilter/variants-annotated.vcf.gz | grep -v '^#\' | wc -l`\" >> ${WD}/stats.out");
    476476        script.cmd("echo \"Filtered variants: `cat resultsfilter/variants-filtered.vcf | grep -v '^#\' | wc -l`\" >> ${WD}/stats.out");
    477477        if (externalGroup != null)
    478478        {
    479           script.cmd("chgrp -R " + externalGroup + " ${FilteredFolder}/variants-* 2>> ${WD}/chgrp.out || echo [" + aligned.getName() +"] >> ${WD}/chgrp.out" );
     479          script.cmd("chgrp -R " + externalGroup + " ${FilteredFolder}/* 2>> ${WD}/chgrp.out || echo [" + aligned.getName() +"] >> ${WD}/chgrp.out" );
    480480        }
    481481      }
     
    551551        {
    552552          FileOwner alignedOwner = FileOwner.create(dc, aligned, analysisDir);
    553           pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-all.vcf.gz"));
     553          pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz"));
     554          Annotationtype.CALLABLE_BASES.setAnnotationValue(dc, aligned, pf.stat.numCallableBases);
     555          Annotationtype.VARIANTS_RAW.setAnnotationValue(dc, aligned, pf.stat.numRawVariants);
    554556        }
    555557        if (!filterSkipped)
     
    557559          FileOwner vCallOwner = FileOwner.create(dc, raw, analysisDir);
    558560          msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf"));
     561          Annotationtype.VARIANTS_PASSED_FILTER.setAnnotationValue(dc, raw, pf.stat.numFiltered);
    559562        }
    560563
     
    567570          else if (filterSkipped)
    568571          {
    569             msg = "Found " + pf.stat.numAllVariants + " variants; skipped filtering; ";
     572            msg = "Found " + pf.stat.numRawVariants + " variants; skipped filtering; ";
    570573            msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases.";
    571574          }
    572575          else
    573576          {
    574             msg = "Found " + pf.stat.numAllVariants + " variants; " + pf.stat.numFiltered + " passed filter. ";
     577            msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter. ";
    575578            msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases.";
    576579          }
     
    597600  {
    598601    int numCallableBases = 0;
    599     int numAllVariants = 0;
     602    int numRawVariants = 0;
    600603    int numAnnotated = 0;
    601604    int numFiltered = 0;
     
    613616          s.numCallableBases = val;
    614617        }
    615         else if ("All variants".equals(key))
    616         {
    617           s.numAllVariants = val;
     618        else if ("Raw variants".equals(key))
     619        {
     620          s.numRawVariants = val;
    618621        }
    619622        else if ("Annotated variants".equals(key))
     
    723726        if (filename.endsWith(".vcf")) f.setMimeTypeAuto("text/plain", vcfType);
    724727       
    725         if (filename.equals("variants-all.vcf.gz"))
    726         {
    727           f.setDescription(stat.numAllVariants + " variants.");
     728        if (filename.equals("variants-raw.vcf.gz"))
     729        {
     730          f.setDescription(stat.numRawVariants + " variants.");
    728731        }
    729732        else if (filename.equals("variants-annotated.vcf.gz"))
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/pdf/ScanBReportWorker.java

    r5694 r5721  
    242242    throws IOException
    243243  {
    244     File vcfFile = aligned.getLinkedFile(dc, "variants-all.vcf.gz");
     244    File vcfFile = aligned.getLinkedFile(dc, "variants-raw.vcf.gz");
    245245    if (vcfFile == null) return Collections.singleton("No data");
    246246   
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/InstallServlet.java

    r5706 r5721  
    643643       
    644644        jsonChecks.add(checkAnnotationType(dc, Annotationtype.CALLABLE_BASES, 1, null, createIfMissing, effectivePermissionsUse));
    645         jsonChecks.add(checkAnnotationType(dc, Annotationtype.VARIANTS_ALL, 1, null, createIfMissing, effectivePermissionsUse));
     645        jsonChecks.add(checkAnnotationType(dc, Annotationtype.VARIANTS_RAW, 1, null, createIfMissing, effectivePermissionsUse));
    646646        jsonChecks.add(checkAnnotationType(dc, Annotationtype.VARIANTS_PASSED_FILTER, 1, null, createIfMissing, effectivePermissionsUse));
    647647       
     
    949949            Annotationtype.QC_GENOTYPE_STATUS, Annotationtype.QC_GENOTYPE_VERIFIED,
    950950            Annotationtype.QC_GENOTYPE_COMMENT, Annotationtype.PIPELINE,
     951            Annotationtype.CALLABLE_BASES, Annotationtype.VARIANTS_RAW,
    951952            Annotationtype.DO_NOT_USE, Annotationtype.DO_NOT_USE_COMMENT,
    952953            Annotationtype.MBAF_SNP_COUNT, Annotationtype.MBAF_SNP_COUNT_FILTERED,
     
    10061007        jsonChecks.add(checkAnnotationTypeCategory(dc, Rawdatatype.VARIANT_CALL, createIfMissing,
    10071008            Annotationtype.DATA_FILES_FOLDER, Annotationtype.PIPELINE,
    1008             Annotationtype.CALLABLE_BASES, Annotationtype.VARIANTS_ALL,
    10091009            Annotationtype.VARIANTS_PASSED_FILTER,
    10101010            Annotationtype.DO_NOT_USE, Annotationtype.DO_NOT_USE_COMMENT,
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/servlet/VariantCallingServlet.java

    r5714 r5721  
    103103        {
    104104          as.loadAnnotations(dc, "pipeline", Annotationtype.PIPELINE, null);
    105           as.setAnnotation("rawVariants", JsonUtil.loadLinkedItem(dc, as.getItem(), "variants-all.vcf.gz", Item.FILE, null));
     105          as.setAnnotation("rawVariants", JsonUtil.loadLinkedItem(dc, as.getItem(), "variants-raw.vcf.gz", Item.FILE, null));
    106106          Library lib = as.getLibrary(dc);
    107107          lib.loadBioPlateLocation();
Note: See TracChangeset for help on using the changeset viewer.