Changeset 6545


Ignore:
Timestamp:
Jan 21, 2022, 7:48:38 AM (7 months ago)
Author:
Nicklas Nordborg
Message:

References #1354: Search functionality for the OncoArray?-500K SNP chip

Implemented first version of indexing the reference VCF file.

Location:
extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/DocumentCreator.java

    r6540 r6545  
    1111import org.apache.lucene.index.IndexableField;
    1212
     13import net.sf.basedb.core.ArrayDesign;
    1314import net.sf.basedb.core.File;
    1415import net.sf.basedb.core.RawBioAssay;
     
    5051    doc.add(new IntPoint(prefix+"Id", rba.getId()));
    5152    doc.add(new NumericDocValuesField(prefix+"Id", rba.getId()));
     53  }
     54 
     55  /**
     56    Add file ID and line number (optional) to the index.
     57  */
     58  void addArrayDesignFields(ArrayDesign design)
     59  {
     60    doc.add(new IntPoint("designId", design.getId()));
     61    doc.add(new StoredField("designId", design.getId()));
    5262  }
    5363 
  • extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/Indexer.java

    r6540 r6545  
    33import java.util.concurrent.Callable;
    44
     5import net.sf.basedb.core.ArrayDesign;
    56import net.sf.basedb.core.RawBioAssay;
    67
     
    3233 
    3334  /**
     35    Get the array design that was indexed.
     36  */
     37  public ArrayDesign getArrayDesign();
     38 
     39  /**
    3440    Return TRUE if the indexing was aborted before it was completed.
    3541  */
  • extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/LuceneIndex.java

    r6544 r6545  
    287287 
    288288  /**
     289    Re-open the current index with new instances of IndexReader, IndexSearcher, etc.
     290    This is requried after the index has been updated with new documents or if
     291    documents have been deleted.
     292  */
     293  protected void reOpen()
     294    throws IOException
     295  {
     296    if (isClosing()) return;
     297    rwLock.writeLock().lock();
     298    try
     299    {
     300      cache = createQueryCache();
     301      reader = createIndexReader(directory);
     302      searcher = createIndexSearcher(reader);
     303    }
     304    finally
     305    {
     306      rwLock.writeLock().unlock();
     307    }
     308  }
     309 
     310  /**
    289311    Create a reader for reading documents and information from
    290312    the given index directory.
     
    304326  }
    305327
     328  /**
     329    Create a writer for adding documents to the index.
     330  */
     331  protected IndexWriter createIndexWriter(IndexWriterConfig config)
     332    throws IOException
     333  {
     334    return new IndexWriter(directory, config);
     335  }
     336 
    306337  /**
    307338    Create an Analyzer implementation that is used for analyzing
     
    384415    added to the prefix.
    385416  */
    386   private java.io.File getExistingOrNewPath(java.io.File pathPrefix)
     417  protected java.io.File getExistingOrNewPath(java.io.File pathPrefix)
    387418    throws IOException
    388419  {
     
    400431    }
    401432    java.io.File subDir = new java.io.File(dir, prefix+".1");
    402     if (subDir.mkdir() && !subDir.exists())
     433    if (subDir.mkdirs() && !subDir.exists())
    403434    {
    404435      throw new IOException("Could not create: "+ subDir.getAbsolutePath());
     
    412443    is found, in which case the directory is created.
    413444  */
    414   private java.io.File getNewPath(java.io.File pathPrefix)
     445  protected java.io.File getNewPath(java.io.File pathPrefix)
    415446    throws IOException
    416447  {
     
    439470    index exists already.
    440471  */
    441   private Directory createIndexIfNeeded(java.io.File indexDir)
     472  protected Directory createIndexIfNeeded(java.io.File indexDir)
    442473    throws IOException
    443474  {
     
    486517      config.setMergePolicy(mergePolicy);
    487518     
    488       writer = new IndexWriter(directory, config);     
     519      writer = createIndexWriter(config);
    489520      processed = addToIndex(dc, rawBioAssays, writer);
    490521
     
    497528      progress.display(99, "Commit complete. Cleaning up...");
    498529     
    499       rwLock.writeLock().lock();
    500       try
    501       {
    502         cache = createQueryCache();
    503         reader = createIndexReader(directory);
    504         searcher = createIndexSearcher(reader);
    505       }
    506       finally
    507       {
    508         rwLock.writeLock().unlock();
    509       }
    510 
     530      reOpen();
    511531    }
    512532    catch (Exception ex)
     
    549569    and not less than minThreads.
    550570  */
    551   private int getNumThreads(int minThreads, int maxThreads)
     571  protected int getNumThreads(int minThreads, int maxThreads)
    552572  {
    553573    int numProc = Runtime.getRuntime().availableProcessors();
     
    670690      IndexWriterConfig config = new IndexWriterConfig(analyzer);
    671691      config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    672       writer = new IndexWriter(directory, config);
     692      writer = createIndexWriter(config);
    673693 
    674694      int numRba = rawBioAssays.size();
     
    694714      logger.debug("Index complete, " +numRemoved + " rawbioassays removed");
    695715     
    696       if (!isClosing())
    697       {
    698         rwLock.writeLock().lock();
    699         try
    700         {
    701           reader = createIndexReader(directory);
    702           searcher = createIndexSearcher(reader);
    703           cache = createQueryCache();
    704         }
    705         finally
    706         {
    707           rwLock.writeLock().unlock();
    708         }
    709       }
     716      reOpen();
    710717    }
    711718    catch (Exception ex)
     
    11491156  {
    11501157    if (path == null) return -1;
     1158    return getSizeOfDir(path);
     1159  }
     1160 
     1161  protected long getSizeOfDir(java.io.File dir)
     1162  {
    11511163    long size = 0;
    1152     for (java.io.File f : path.listFiles())
     1164    for (java.io.File f : dir.listFiles())
    11531165    {
    11541166      if (f.isFile()) size += f.length();
     
    11561168    return size;
    11571169  }
     1170
    11581171 
    11591172  /**
  • extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/OncoArrayIndex.java

    r6544 r6545  
    11package net.sf.basedb.varsearch.index;
    22
    3 import java.io.File;
    43import java.io.IOException;
    54import java.util.ArrayList;
     
    1514import org.apache.lucene.document.IntPoint;
    1615import org.apache.lucene.document.Field.Store;
     16import org.apache.lucene.index.ConcurrentMergeScheduler;
     17import org.apache.lucene.index.IndexReader;
    1718import org.apache.lucene.index.IndexWriter;
     19import org.apache.lucene.index.IndexWriterConfig;
     20import org.apache.lucene.index.TieredMergePolicy;
     21import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    1822import org.apache.lucene.queryparser.classic.QueryParser;
     23import org.apache.lucene.search.IndexSearcher;
    1924import org.apache.lucene.search.Query;
     25import org.apache.lucene.store.Directory;
    2026import org.slf4j.Logger;
    2127import org.slf4j.LoggerFactory;
    2228
     29import net.sf.basedb.core.ArrayDesign;
     30import net.sf.basedb.core.DbControl;
     31import net.sf.basedb.core.File;
     32import net.sf.basedb.core.Include;
     33import net.sf.basedb.core.ItemQuery;
    2334import net.sf.basedb.core.RawBioAssay;
    2435import net.sf.basedb.core.SimpleProgressReporter;
     36import net.sf.basedb.core.query.Expressions;
     37import net.sf.basedb.core.query.Hql;
     38import net.sf.basedb.core.query.Restrictions;
    2539import net.sf.basedb.util.FileUtil;
    2640import net.sf.basedb.varsearch.analyze.KeywordListAnalyzer;
     41import net.sf.basedb.varsearch.dao.Datafiletype;
    2742import net.sf.basedb.varsearch.fields.ListField;
     43import net.sf.basedb.varsearch.index.VariantCallIndex.VariantCallIndexer;
    2844import net.sf.basedb.varsearch.query.FieldAwareQueryParser;
    2945import net.sf.basedb.varsearch.query.SumFieldCollector;
     46import net.sf.basedb.varsearch.service.VarSearchService;
    3047import net.sf.basedb.varsearch.vcf.VcfParser;
    3148import net.sf.basedb.varsearch.vcf.VcfParser.VcfHeader;
     
    3754  of them as a separate document. On the other hand there is "only" about 500K
    3855  genomic locations so all annotations that are related to gene and location
    39   can be indexed in a static database (TODO!) and then we only need to have a
     56  can be indexed in a static database and then we only need to have a
    4057  single document for each rawbioassay that specify a list of SNP-ID:s that
    4158  are 0/0, 0/1 and 1/1 genotypes respectively. This complicates the query results
     
    5067  static final Logger logger = LoggerFactory.getLogger(OncoArrayIndex.class);
    5168 
     69  private java.io.File rootPathPrefix;
     70  private java.io.File rbaPathPrefix;
     71  private java.io.File refPathPrefix;
     72  private java.io.File refPath;
     73  private Directory refDirectory;
     74  private IndexReader refReader;
     75  private IndexSearcher refSearcher;
     76 
    5277  public OncoArrayIndex(String id)
    5378  {
     
    6085  */
    6186  @Override
    62   public void open(File pathPrefix, ExecutorService executor)
     87  public void open(java.io.File pathPrefix, ExecutorService executor)
    6388    throws IOException
    6489  {
    65     super.open(pathPrefix, executor);
    66     setQueryStatus(getNumIndexedSNPs() == 0 ? Status.DISABLED : Status.ENABLED);
    67   }
    68 
     90    this.rootPathPrefix = pathPrefix;
     91    this.rbaPathPrefix = new java.io.File(pathPrefix, "rba");
     92    this.refPathPrefix = new java.io.File(pathPrefix, "ref");
     93    super.open(rbaPathPrefix, executor);
     94   
     95    this.refPath = getExistingOrNewPath(refPathPrefix);
     96    this.refDirectory = createIndexIfNeeded(refPath);
     97    this.refReader = createIndexReader(refDirectory);
     98    this.refSearcher = createIndexSearcher(refReader);
     99   
     100    setQueryStatus(getNumIndexedSNPs() > 0 ? Status.ENABLED : Status.DISABLED);
     101  }
     102
     103  @Override
     104  public void close()
     105  {
     106    super.close();
     107    FileUtil.close(refReader);
     108    FileUtil.close(refDirectory);
     109    refReader = null;
     110    refSearcher = null;
     111    refDirectory = null;
     112  }
     113
     114  /**
     115    Get the path to the index directory.
     116  */
     117  @Override
     118  public java.io.File getPath()
     119  {
     120    return rootPathPrefix;
     121  }
     122 
     123  /**
     124    Get the size of the index database on disk.
     125  */
     126  @Override
     127  public long getSizeOnDisk()
     128  {
     129    if (refPath == null) return -1;
     130    return super.getSizeOnDisk()+getSizeOfDir(refPath);
     131  }
     132 
    69133  /**
    70134    The analyzer is a keyword analyzer that also support lists of comma-
     
    112176  */
    113177  public int getNumIndexedSNPs()
    114     throws IOException
    115   {
    116     if (getWorkingStatus() == Status.DISABLED) return -1;
    117     Query query = IntPoint.newRangeQuery("snpId", 0, Integer.MAX_VALUE);
    118     return getIndexSearcher().count(query);
     178  {
     179    return refReader.numDocs();
    119180  }
    120181 
     
    126187  public void doCustomAction(String customAction)
    127188  {
    128     SimpleProgressReporter progress = setProgressReporter(new SimpleProgressReporter(null));
     189    if (!"INDEX_SNP".equals(customAction)) return;
     190   
     191    // TODO -- we need a lot of error handling etc. in this method
     192
     193    SimpleProgressReporter progress = null;
     194    DbControl dc = null;
     195    IndexWriter writer = null;
    129196    try
    130197    {
    131       // TODO --
     198      dc = VarSearchService.getInstance().getRootSessionControl().newDbControl();
     199      progress = setProgressReporter(new SimpleProgressReporter(null));
    132200      setWorkingStatus(Status.INDEXING);
    133       progress.display(10, "Doing custom work...");
    134       Thread.sleep(5000);
    135       progress.display(50, "Doing some more work...");
    136       Thread.sleep(5000);
    137       progress.display(90, "Almost done...");
    138       Thread.sleep(5000);
    139     }
    140     catch (InterruptedException ex)
    141     {}
     201     
     202      ItemQuery<ArrayDesign> query = ArrayDesign.getQuery();
     203      query.setIncludes(Include.ALL);
     204      query.restrict(Restrictions.eq(Hql.property("name"), Expressions.string("OncoArray500K")));
     205      List<ArrayDesign> list = query.list(dc);
     206      ArrayDesign oaDesign = list.get(0);
     207     
     208      File vcf = Datafiletype.VCF_DESIGN.getFile(dc, oaDesign);
     209     
     210      IndexWriterConfig config = new IndexWriterConfig(getAnalyzer());
     211      config.setOpenMode(OpenMode.CREATE_OR_APPEND);
     212     
     213      // Create a multi-threader MergeScheduler with 1-8 threads
     214      int numMergeThreads = getNumThreads(1, 8);
     215      ConcurrentMergeScheduler mergeSceduler = new ConcurrentMergeScheduler();
     216      mergeSceduler.setMaxMergesAndThreads(numMergeThreads*2, numMergeThreads);
     217      config.setMergeScheduler(mergeSceduler);
     218     
     219      // Create a MergePolicy for 1GB segments
     220      TieredMergePolicy mergePolicy = new TieredMergePolicy();
     221      mergePolicy.setMaxMergedSegmentMB(5000); // The default is 5000
     222      config.setMergePolicy(mergePolicy);
     223     
     224      writer = new IndexWriter(refDirectory, config);
     225     
     226      VariantCallIndexer indexer = new VariantCallIndexer(this, writer, oaDesign, vcf);
     227      indexer.call();
     228      writer.commit();
     229     
     230      refReader = createIndexReader(refDirectory);
     231      refSearcher = createIndexSearcher(refReader);
     232      oaDesign.setNumFileFeatures(indexer.getNumGenotypes());
     233     
     234      dc.commit();
     235    }
     236    catch (Exception ex)
     237    {
     238      logger.error("Custom action '"+customAction+"' failed ("+getName()+")", ex);
     239    }
    142240    finally
    143241    {
    144242      setWorkingStatus(Status.IDLE);
     243      setQueryStatus(getNumIndexedSNPs() > 0 ? Status.ENABLED : Status.DISABLED);
    145244      setProgressReporter(null);
     245      if (dc != null) dc.close();
    146246    }
    147247  }
     
    202302    {
    203303      return rba;
     304    }
     305   
     306    @Override
     307    public ArrayDesign getArrayDesign()
     308    {
     309      return null;
    204310    }
    205311   
  • extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/VariantCallIndex.java

    r6540 r6545  
    1919
    2020import net.sf.basedb.core.AnyToAny;
     21import net.sf.basedb.core.ArrayDesign;
     22import net.sf.basedb.core.File;
    2123import net.sf.basedb.core.RawBioAssay;
    2224import net.sf.basedb.util.FileUtil;
     25import net.sf.basedb.util.Values;
    2326import net.sf.basedb.varsearch.VarSearch;
    2427import net.sf.basedb.varsearch.analyze.AlphaNumericIgnoreCaseAnalyzer;
     
    2730import net.sf.basedb.varsearch.analyze.HgvsCdnaAnalyzer;
    2831import net.sf.basedb.varsearch.analyze.HgvsProtAnalyzer;
     32import net.sf.basedb.varsearch.dao.Datafiletype;
    2933import net.sf.basedb.varsearch.fields.ListField;
    3034import net.sf.basedb.varsearch.query.FieldAwareQueryParser;
     
    121125  }
    122126
    123  
    124   static class VariantCallIndexer
     127  /**
     128    Indexer implementation for VCF files with information and annotations
     129    that are compatible with the standard Variant calling pipeline in Reggie.
     130    We support two versions:
     131     * VCF files attached to raw bioassays with full genotype and annotation information per SNP
     132     * VCF files attached to array designs with annotation information per SNP (no genotype information)
     133  */
     134  public static class VariantCallIndexer
    125135    implements Indexer
    126136  {
    127137   
    128  
    129138    private final LuceneIndex idx;
    130139    private final IndexWriter writer;
    131140    private final int num;
    132141    private final RawBioAssay rba;
     142    private final ArrayDesign design;
    133143    private final List<VcfFile> vcfFiles;
    134144    private int numVariants;
    135145    private int numGenotypes;
    136146    private boolean aborted;
    137  
    138    
     147   
     148    /**
     149      Create an indexer for indexing VCF files linked to a raw bioassay.
     150    */
    139151    public VariantCallIndexer(LuceneIndex idx, IndexWriter writer, int num, RawBioAssay rba, List<VcfFile> vcfFiles)
    140152    {
     
    143155      this.writer = writer;
    144156      this.rba = rba;
     157      this.design = null;
    145158      this.vcfFiles = vcfFiles;
     159    }
     160   
     161    /**
     162      Create an indexer for indexing VCF files linked to an array design.
     163    */
     164    public VariantCallIndexer(LuceneIndex idx, IndexWriter writer, ArrayDesign design, File vcfFile)
     165    {
     166      this.idx = idx;
     167      this.num = 1;
     168      this.writer = writer;
     169      this.rba = null;
     170      this.design = design;
     171      this.vcfFiles = Arrays.asList(new VcfFile(vcfFile, Datafiletype.VCF_DESIGN.get(design.getDbControl())));
    146172    }
    147173   
     
    172198    {
    173199      return rba;
     200    }
     201   
     202    @Override
     203    public ArrayDesign getArrayDesign()
     204    {
     205      return design;
    174206    }
    175207   
     
    198230      boolean indexAllGenotypes = idx.getIndexAllGenotypes();
    199231      boolean updateLinkDescriptions = !VarSearch.isReggieInstalled();
     232      String itemName = rba != null ? rba.getName() : design.getName();
    200233      try
    201234      {
    202         logger.debug("Indexing #" + num +": " +rba.getName() + ": " + vcfFiles.size() + " VCF files");
     235        logger.debug("Indexing #" + num +": " +itemName + ": " + vcfFiles.size() + " VCF files");
    203236        long time = -System.currentTimeMillis();
    204237       
    205         // Re-indexing: delete existing information about this raw bioassay id
    206         writer.deleteDocuments(IntPoint.newExactQuery("rbaId", rba.getId()));
    207         writer.deleteDocuments(IntPoint.newExactQuery("mainId", rba.getId()));
     238        if (rba != null)
     239        {
     240          // Re-indexing: delete existing information about this raw bioassay id
     241          writer.deleteDocuments(IntPoint.newExactQuery("rbaId", rba.getId()));
     242          writer.deleteDocuments(IntPoint.newExactQuery("mainId", rba.getId()));
     243        }
     244        if (design != null)
     245        {
     246          // Re-indexing: delete existing information about this array design
     247          writer.deleteDocuments(IntPoint.newExactQuery("designId", design.getId()));
     248        }
    208249       
    209250        for (VcfFile vcfFile : vcfFiles)
     
    212253          int numFileGenotypes = 0;
    213254         
    214           logger.debug("Indexing #" + num +": " +rba.getName() + ": " + vcfFile.getName());
     255          logger.debug("Indexing #" + num +": " +itemName + ": " + vcfFile.getName());
    215256          parser = new VcfParser(vcfFile.getFile());
    216257          VcfHeader header = parser.parseHeaders();
     
    218259          if (header == null)
    219260          {
    220             logger.warn("Unable to index (no header found): " +rba.getName() + "/" + vcfFile.getName());
     261            logger.warn("Unable to index (no header found): " +itemName + "/" + vcfFile.getName());
    221262            continue; // With the next VCF
    222263          }
     
    224265          int chrCol = header.indexOf("#CHROM");
    225266          int posCol = header.indexOf("POS");
     267          int idCol = header.indexOf("ID");
    226268          int refCol = header.indexOf("REF");
    227269          int altCol = header.indexOf("ALT");
    228270          int infoCol = header.indexOf("INFO");
    229271         
    230           if (isMissingColumn(chrCol, posCol, refCol, altCol, infoCol))
     272          if (isMissingColumn(chrCol, posCol, idCol, refCol, altCol, infoCol))
    231273          {
    232             logger.warn("Unable to index (missing header column): " +rba.getName() + "/" + vcfFile.getName());
     274            logger.warn("Unable to index (missing header column): " +itemName + "/" + vcfFile.getName());
    233275            logger.info("CHROM: "+chrCol);
    234276            logger.info("POS: "+posCol);
     277            logger.info("ID: "+idCol);
    235278            logger.info("REF: "+refCol);
    236279            logger.info("ALT: "+altCol);
     
    259302            DocumentCreator variant = new DocumentCreator();
    260303           
    261             // Add information about the raw bioassay
    262             variant.addRawBioAssayFields(rba, "rba"); // Variant document
     304            // Add information about the raw bioassay / array design
     305            if (rba != null)
     306            {
     307              variant.addRawBioAssayFields(rba, "rba");
     308            }
     309            if (design != null)
     310            {
     311              variant.addArrayDesignFields(design);
     312            }
    263313            variant.addFileFields(vcfFile.getFile(), line);
    264314           
     
    272322              variant.addLongField(chr, pos, Store.NO);
    273323            }
     324            String snpId = Values.getString(line.col(idCol), ".");
     325            if (!snpId.equals("."))
     326            {
     327              variant.addStringField("snpId", snpId, Store.YES);
     328            }
     329           
    274330            variant.addStringField("ref", line.col(refCol), Store.YES);
    275331            variant.addStringField("alt", line.col(altCol), Store.YES);
     
    339395            boolean isVariant = false;
    340396            boolean isGenotype = false;
    341            
    342             for (Info info : line.format())
    343             {
    344               String key = info.key;
    345               String val = info.value;
    346              
    347               if ("GT".equals(key))
    348               {
    349                 variant.addStringField("gt", val, Store.YES);
    350                 isGenotype = !"./.".equals(val);
    351                 isVariant = isGenotype && !"0/0".equals(val);
    352               }
    353               else if ("DP".equals(key))
    354               {
    355                 variant.addIntField("dp", info.intValue(), Store.YES);
    356               }
    357               else if ("VD".equals(key))
    358               {
    359                 variant.addIntField("vd", info.intValue(), Store.YES);
    360               }
    361               else if ("AF".equals(key))
    362               {
    363                 variant.addFloatField("af", info.floatValue(), val);
    364               }
    365             }         
     397            if (rba != null)
     398            {
     399              for (Info info : line.format())
     400              {
     401                String key = info.key;
     402                String val = info.value;
     403               
     404                if ("GT".equals(key))
     405                {
     406                  variant.addStringField("gt", val, Store.YES);
     407                  isGenotype = !"./.".equals(val);
     408                  isVariant = isGenotype && !"0/0".equals(val);
     409                }
     410                else if ("DP".equals(key))
     411                {
     412                  variant.addIntField("dp", info.intValue(), Store.YES);
     413                }
     414                else if ("VD".equals(key))
     415                {
     416                  variant.addIntField("vd", info.intValue(), Store.YES);
     417                }
     418                else if ("AF".equals(key))
     419                {
     420                  variant.addFloatField("af", info.floatValue(), val);
     421                }
     422              }
     423            }
     424            else if (design != null)
     425            {
     426              isGenotype = true;
     427              isVariant = true;
     428            }
    366429            if (isGenotype)
    367430            {
     
    382445          FileUtil.close(parser);
    383446         
    384           AnyToAny link = vcfFile.getLink();
    385           if (link != null && updateLinkDescriptions)
     447          if (rba != null)
    386448          {
    387             if (indexAllGenotypes)
    388             {
    389               link.setDescription("Found "+VarSearch.formatCount(numFileVariants, " variant", " variants")+
    390                 " in "+VarSearch.formatCount(numFileGenotypes, " genotyped target.", " genotyped targets."));
    391             }
    392             else
    393             {
    394               link.setDescription("Found "+VarSearch.formatCount(numFileVariants, " variant.", " variants."));
    395             }
    396             vcfFile.getFile().setDescription(link.getDescription());
     449            AnyToAny link = vcfFile.getLink();
     450            if (link != null && updateLinkDescriptions)
     451            {
     452              if (indexAllGenotypes)
     453              {
     454                link.setDescription("Found "+VarSearch.formatCount(numFileVariants, " variant", " variants")+
     455                  " in "+VarSearch.formatCount(numFileGenotypes, " genotyped target.", " genotyped targets."));
     456              }
     457              else
     458              {
     459                link.setDescription("Found "+VarSearch.formatCount(numFileVariants, " variant.", " variants."));
     460              }
     461              vcfFile.getFile().setDescription(link.getDescription());
     462            }
    397463          }
    398464        }
    399465       
    400         // Add summary fields for the complete rba
    401         DocumentCreator main = new DocumentCreator();
    402         main.addRawBioAssayFields(rba, "main");
    403         main.addIntField("numVariants", numVariants, Store.YES);
    404         main.addIntField("numGenotypes", numGenotypes, Store.YES);
    405         writer.addDocument(main.doc());       
     466        if (rba != null)
     467        {
     468          // Add summary fields for the complete rba
     469          DocumentCreator main = new DocumentCreator();
     470          main.addRawBioAssayFields(rba, "main");
     471          main.addIntField("numVariants", numVariants, Store.YES);
     472          main.addIntField("numGenotypes", numGenotypes, Store.YES);
     473          writer.addDocument(main.doc());
     474        }
    406475       
    407476        time += System.currentTimeMillis();
    408         logger.debug("Index complete #"+num+": " +rba.getName() + ": " + numVariants + " variants; " + numGenotypes + " genotypes; " + time + " ms");
     477        logger.debug("Index complete #"+num+": " +itemName + ": " + numVariants + " variants; " + numGenotypes + " genotypes; " + time + " ms");
    409478        if (LuceneIndex.SLOW_MODE) Thread.sleep(10);
    410479      }
    411480      catch (Exception ex)
    412481      {
    413         logger.warn("Indexing failed #"+num+": " + rba.getName(), ex);
     482        logger.warn("Indexing failed #"+num+": " + itemName, ex);
    414483        throw ex;
    415484      }
Note: See TracChangeset for help on using the changeset viewer.