Changeset 6541
- Timestamp:
- Jan 17, 2022, 8:47:47 AM (21 months ago)
- Location:
- extensions/net.sf.basedb.varsearch/trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.varsearch/trunk/resources/admin/manager.jsp
r6167 r6541 26 26 { 27 27 display: grid; 28 grid-template-columns: 2fr 1fr 1fr;28 grid-template-columns: 2fr 2fr 1fr; 29 29 width: 60em; 30 30 border-width: 1px; -
extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/LuceneColumnFactory.java
r6530 r6541 233 233 } 234 234 235 AllDocsCollector hits = getHits(item.getId(), maxHitsPerRba, new SortByChromAndPos()); 235 // TODO -- sorting doesn't work with the OncoArray index 236 AllDocsCollector hits = getHits(item.getId(), maxHitsPerRba, null); //new SortByChromAndPos()); 236 237 237 238 NullSafeStringBuilder sb = new NullSafeStringBuilder(); … … 251 252 if (indexAllGenotypes) 252 253 { 253 if (gt == null &&"./.".equals(gt))254 if (gt == null || "./.".equals(gt)) 254 255 { 255 256 cls += " no-data"; -
extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/index/LuceneIndex.java
r6531 r6541 6 6 import java.util.Collection; 7 7 import java.util.Collections; 8 import java.util.HashMap;9 8 import java.util.List; 10 import java.util.Map;11 9 import java.util.Set; 12 10 import java.util.concurrent.CountDownLatch; … … 20 18 21 19 import org.apache.lucene.analysis.Analyzer; 22 import org.apache.lucene.analysis.core.KeywordAnalyzer; 23 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; 20 import org.apache.lucene.document.Document; 24 21 import org.apache.lucene.document.IntPoint; 25 22 import org.apache.lucene.document.LongPoint; … … 53 50 import net.sf.basedb.core.query.IdListRestriction; 54 51 import net.sf.basedb.util.FileUtil; 55 import net.sf.basedb.varsearch.analyze.AlphaNumericIgnoreCaseAnalyzer; 56 import net.sf.basedb.varsearch.analyze.EffectAnalyzer; 57 import net.sf.basedb.varsearch.analyze.HgvsCdnaAnalyzer; 58 import net.sf.basedb.varsearch.analyze.HgvsProtAnalyzer; 52 import net.sf.basedb.util.Values; 59 53 import net.sf.basedb.varsearch.dao.Itemlist; 60 import net.sf.basedb.varsearch.query.FieldAwareQueryParser; 61 import net.sf.basedb.varsearch.query.FloatQueryField; 62 import net.sf.basedb.varsearch.query.IntQueryField; 63 import net.sf.basedb.varsearch.query.LongQueryField; 54 import net.sf.basedb.varsearch.query.AllDocsCollector; 64 55 import net.sf.basedb.varsearch.query.QueryCache; 65 import net.sf.basedb.varsearch.query.QueryField;66 56 import net.sf.basedb.varsearch.query.RawBioAssayIdCollector; 67 import net.sf.basedb.varsearch.query.StripWildcardQueryField;68 57 import net.sf.basedb.varsearch.service.VarSearchService; 69 58 70 59 /** 71 Represents a Lucene index database. 60 Represents a Lucene index database. Subclasses are required to implement 61 some methods. 62 72 63 @author nicklas 73 64 */ 74 public class LuceneIndex65 public abstract class LuceneIndex 75 66 implements Closeable 76 67 { … … 101 92 private ExecutorService executor; 102 93 private Analyzer analyzer; 103 private Map<String, QueryField> queryFields;104 94 private QueryCache cache; 105 95 … … 118 108 } 119 109 110 /** 111 Get the ID of the index. 112 */ 120 113 public String getId() 121 114 { … … 247 240 this.path = getExistingOrNewPath(pathPrefix); 248 241 this.directory = createIndexIfNeeded(path); 249 250 // Create the Analyzer. Most fields are indexed literally with the 251 // default KeywordAnalyzer. 252 Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); 253 fieldAnalyzers.put("gene", new AlphaNumericIgnoreCaseAnalyzer()); // Gene names can be a list and we ignore case 254 fieldAnalyzers.put("c", new HgvsCdnaAnalyzer()); // HGVS.c analyzer 255 fieldAnalyzers.put("p", new HgvsProtAnalyzer()); // HGVS.p analyzer 256 fieldAnalyzers.put("effect", new EffectAnalyzer()); // ANN.Annotation (=Effect) analyzer 257 this.analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers); 258 259 queryFields = new HashMap<>(); 260 queryFields.put("pos", LongQueryField.INSTANCE); 261 for (int i = 1; i < 23; i++) 262 { 263 queryFields.put("chr"+i, LongQueryField.INSTANCE); 264 } 265 queryFields.put("chrX", LongQueryField.INSTANCE); 266 queryFields.put("chrY", LongQueryField.INSTANCE); 267 queryFields.put("c", StripWildcardQueryField.INSTANCE); 268 queryFields.put("p", StripWildcardQueryField.INSTANCE); 269 queryFields.put("dp", IntQueryField.INSTANCE); 270 queryFields.put("vd", IntQueryField.INSTANCE); 271 queryFields.put("af", FloatQueryField.INSTANCE); 272 queryFields.put("rbaId", IntQueryField.INSTANCE); 273 queryFields.put("file", IntQueryField.INSTANCE); 274 275 this.cache = new QueryCache(60); // 1 hour 276 this.reader = DirectoryReader.open(directory); 277 this.searcher = new IndexSearcher(reader, executor); 242 this.reader = createIndexReader(directory); 243 this.searcher = createIndexSearcher(reader); 244 this.analyzer = createAnalyzer(); 245 this.cache = createQueryCache(); 278 246 this.status = Status.IDLE; 279 247 this.autoUpdateAction = AutoUpdate.DEFAULT; 280 248 } 281 249 250 /** 251 Create a reader for reading documents and information from 252 the given index directory. 253 */ 254 protected IndexReader createIndexReader(Directory directory) 255 throws IOException 256 { 257 return DirectoryReader.open(directory); 258 } 259 260 /** 261 Create a searcher for executing queries against the given index. 262 */ 263 protected IndexSearcher createIndexSearcher(IndexReader reader) 264 { 265 return new IndexSearcher(reader, executor); 266 } 267 268 /** 269 Create an Analyzer implementation that is used for analyzing 270 text that is going into the index. The analyzer is also used for 271 parsing query strings. 272 */ 273 protected abstract Analyzer createAnalyzer(); 274 275 /** 276 Create a cache for storing results from queries that takes 277 a long time to execute. 278 */ 279 protected QueryCache createQueryCache() 280 { 281 // TODO -- add support for implementations that return null 282 // and implement a setting for what is considered a long time 283 return new QueryCache(60); // 1 hour 284 } 285 282 286 @Override 283 287 public void close() … … 443 447 config.setMergePolicy(mergePolicy); 444 448 445 writer = new IndexWriter(directory, config); 446 449 writer = new IndexWriter(directory, config); 447 450 processed = addToIndex(dc, rawBioAssays, writer); 448 451 … … 454 457 logger.debug("Commit completed ("+name+")"); 455 458 progress.display(99, "Commit complete. Cleaning up..."); 459 456 460 rwLock.writeLock().lock(); 457 461 try 458 462 { 459 cache = new QueryCache(60);460 reader = DirectoryReader.open(directory);461 searcher = newIndexSearcher(reader);463 cache = createQueryCache(); 464 reader = createIndexReader(directory); 465 searcher = createIndexSearcher(reader); 462 466 } 463 467 finally … … 535 539 ThreadFactory threadFactory = new IndexThreadFactory(id); 536 540 threadPool = Executors.newFixedThreadPool(numThreads, threadFactory); 537 ExecutorCompletionService< RawBioAssayIndexer> executor = new ExecutorCompletionService<>(threadPool);541 ExecutorCompletionService<Indexer> executor = new ExecutorCompletionService<>(threadPool); 538 542 539 543 numRba = 0; 540 int allRba = rawBioAssays.size(); 544 int allRba = rawBioAssays.size(); 541 545 for (RawBioAssay rba : rawBioAssays) 542 546 { … … 545 549 { 546 550 numRba++; 547 executor.submit( new RawBioAssayIndexer(this,writer, numRba, rba, vcfFiles));551 executor.submit(createIndexer(writer, numRba, rba, vcfFiles)); 548 552 if (numRba % 100 == 0) 549 553 { … … 564 568 int numFailed = 0; 565 569 int numAborted = 0; 566 570 567 571 for (int i = 0; i < numRba; i++) 568 572 { 569 Future< RawBioAssayIndexer> result = executor.take();573 Future<Indexer> result = executor.take(); 570 574 dc.getSessionControl().updateLastAccess(); // To avoid session timeout 571 575 if (result != null && !result.isCancelled()) … … 573 577 try 574 578 { 575 RawBioAssayIndexer indexer = result.get();579 Indexer indexer = result.get(); 576 580 if (!indexer.wasAborted()) 577 581 { … … 607 611 608 612 /** 613 Create an Indexer implementation that knows how to index the information 614 in the given raw bioassay. 615 */ 616 protected abstract Indexer createIndexer(IndexWriter writer, int num, RawBioAssay rba, List<VcfFile> vcfFiles); 617 618 /** 609 619 Remove RawBioassays from the index. 610 620 */ … … 629 639 { 630 640 // delete existing information about this raw bioassay id 641 // TODO -- maybe this should also be in the subclasses 631 642 writer.deleteDocuments(IntPoint.newExactQuery("rbaId", rbaId)); 632 643 writer.deleteDocuments(IntPoint.newExactQuery("mainId", rbaId)); … … 649 660 try 650 661 { 651 cache = new QueryCache(60);652 reader = DirectoryReader.open(directory);653 searcher = new IndexSearcher(reader);662 reader = createIndexReader(directory); 663 searcher = createIndexSearcher(reader); 664 cache = createQueryCache(); 654 665 } 655 666 finally … … 714 725 path = newPath; 715 726 directory = new NIOFSDirectory(path.toPath()); 716 cache = new QueryCache(60);717 reader = DirectoryReader.open(directory);718 searcher = new IndexSearcher(reader);727 reader = createIndexReader(directory); 728 searcher = createIndexSearcher(reader); 729 cache = createQueryCache(); 719 730 } 720 731 finally … … 831 842 path = rebuildPath; 832 843 directory = new NIOFSDirectory(path.toPath()); 833 cache = new QueryCache(60);834 reader = DirectoryReader.open(directory);835 searcher = new IndexSearcher(reader);844 reader = createIndexReader(directory); 845 searcher = createIndexSearcher(reader); 846 cache = createQueryCache(); 836 847 } 837 848 finally … … 912 923 } 913 924 914 pr ivateInteger countGenotypes(RawBioAssay rba, boolean allGenotypes)925 protected Integer countGenotypes(RawBioAssay rba, boolean allGenotypes) 915 926 throws IOException 916 927 { 917 928 if (status == Status.DISABLED) return null; 918 929 919 Query query = IntPoint.newExactQuery("mainId", rba.getId()); 920 if (getIndexSearcher().count(query) == 0) return null; 921 922 query = IntPoint.newExactQuery("rbaId", rba.getId()); 923 if (indexAllGenotypes && !allGenotypes) 924 { 925 // We need to exclude 0/0 genotype from the result 926 BooleanQuery.Builder b = new BooleanQuery.Builder(); 927 b.add(query, Occur.MUST); 928 b.add(new TermQuery(new Term("gt", "0/0")), Occur.MUST_NOT); 929 query = b.build(); 930 } 931 return getIndexSearcher().count(query); 932 } 930 Query query = IntPoint.newExactQuery("mainId", rba.getId()); 931 932 AllDocsCollector hits = new AllDocsCollector(getIndexReader(), 1, null); 933 getIndexSearcher().search(query, hits); 934 if (hits.getTotalHits() == 0) return null; 935 936 Document doc = hits.getDocuments().iterator().next(); 937 return Values.getInt(doc.get(allGenotypes ? "numGenotypes" : "numVariants")); 938 } 939 933 940 934 941 /** … … 990 997 Create a new parser for creating queries from strings. 991 998 */ 992 public QueryParser createQueryParser() 993 { 994 return new FieldAwareQueryParser("gene", analyzer, queryFields); 995 } 999 public abstract QueryParser createQueryParser(); 996 1000 997 1001 /** … … 1007 1011 @return The number of variants or -1 if the index is not open 1008 1012 */ 1009 public int getNumVariants() 1010 { 1011 if (status == Status.DISABLED) return -1; 1012 return reader.numDocs(); 1013 } 1013 public abstract long getNumVariants() 1014 throws IOException; 1014 1015 1015 1016 /** … … 1040 1041 /** 1041 1042 Get the id of all raw bioassays where the specified variant has been found. 1043 TODO -- this need to be subclassed 1042 1044 */ 1043 1045 public Set<Integer> getRawBioAssaysWithVariant(String chrom, long pos, String ref, String alt) -
extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/query/LuceneQueryFactory.java
r6524 r6541 244 244 queries.add(query); 245 245 } 246 246 247 247 logger.debug("Index: " + idx.getName()); 248 248 logger.debug("QueryString: " + queryString); -
extensions/net.sf.basedb.varsearch/trunk/src/net/sf/basedb/varsearch/service/VarSearchService.java
r6525 r6541 35 35 import net.sf.basedb.varsearch.index.DatafileVcfLocator; 36 36 import net.sf.basedb.varsearch.index.LuceneIndex; 37 import net.sf.basedb.varsearch.index.VariantCallIndex; 37 38 import net.sf.basedb.varsearch.index.LuceneIndex.AutoUpdate; 38 39 import net.sf.basedb.varsearch.index.LuceneIndex.FullRebuildRunnable; 39 40 import net.sf.basedb.varsearch.index.LuceneIndex.QueryThreadFactory; 40 41 import net.sf.basedb.varsearch.index.LuceneIndex.Status; 42 import net.sf.basedb.varsearch.index.OncoArrayIndex; 41 43 42 44 public class VarSearchService … … 157 159 try 158 160 { 159 LuceneIndex filtered = new LuceneIndex("filtered");161 LuceneIndex filtered = new VariantCallIndex("filtered"); 160 162 filtered.setName("Variants (filtered)"); 161 163 filtered.setItemList(Itemlist.VARIANT_INDEX_FILTERED); … … 165 167 indexes.put(filtered.getId(), filtered); 166 168 167 LuceneIndex raw = new LuceneIndex("raw");169 LuceneIndex raw = new VariantCallIndex("raw"); 168 170 raw.setName("Variants (all)"); 169 171 raw.setItemList(Itemlist.VARIANT_INDEX_ALL); … … 174 176 indexes.put(raw.getId(), raw); 175 177 176 LuceneIndex targeted = new LuceneIndex("targeted");178 LuceneIndex targeted = new VariantCallIndex("targeted"); 177 179 targeted.setName("Variants (targeted)"); 178 180 targeted.setItemList(Itemlist.VARIANT_INDEX_TARGETED); … … 184 186 targeted.open(new File(dbDir, "targeted"), null); 185 187 indexes.put(targeted.getId(), targeted); 186 188 189 LuceneIndex oncoArray500K = new OncoArrayIndex("oncoarray-500K"); 190 oncoArray500K.setName("Genotyping (OncoArray500K)"); 191 oncoArray500K.setItemList(Itemlist.GENOTYPING_ONCOARRAY_500K); 192 oncoArray500K.setIndexAllGenotypes(true); 193 oncoArray500K.setViewAllVariantsEnabled(false); 194 oncoArray500K.setVcfFileLocator(new DatafileVcfLocator(Datafiletype.VCF, false)); 195 oncoArray500K.open(new File(dbDir, "oncoarray-500k"), null); 196 indexes.put(oncoArray500K.getId(), oncoArray500K); 187 197 } 188 198 catch (IOException ex)
Note: See TracChangeset
for help on using the changeset viewer.