private Query CreateLuceneQuery(ChemicalStructure queryStructure, SearchType searchType) { BooleanQuery query = new BooleanQuery(); switch (searchType) { // For exact searches, the search results MUST match the exact key. case SearchType.Exact: query.Add(new TermQuery(new Term(ChemicalStructureIndexer.FIELD_EXACTKEY, queryStructure.GetUniqueKey())), Occur.MUST); break; // For similarity searches, the search results SHOULD contain the query similarity fingerprint positions. case SearchType.Similarity: foreach (int fingerprintPosition in queryStructure.GetSimilarityFingerprintPositions()) { query.Add(new TermQuery(new Term(ChemicalStructureIndexer.FIELD_FINGERPRINT_POSITION_SIMILARITY, fingerprintPosition.ToString())), Occur.SHOULD); } break; // For substructure searches, the search results MUST contain ALL of the query substructure fingerprint positions. case SearchType.Substructure: foreach (int fingerprintPosition in queryStructure.GetSubstructureFingerprintPositions()) { query.Add(new TermQuery(new Term(ChemicalStructureIndexer.FIELD_FINGERPRINT_POSITION_SUBSTRUCTURE, fingerprintPosition.ToString())), Occur.MUST); } break; } return(query); }
/// <summary> /// Searches for a specified query structure. /// </summary> /// <param name="queryStructure">The chemical structure to search for.</param> /// <param name="searchType">The desired search type (Exact, Similarity, or Substructure).</param> /// <returns></returns> public List <ChemicalStructureSearchResult> Search(ChemicalStructure queryStructure, SearchType searchType) { var results = new List <ChemicalStructureSearchResult>(); // Form the lucene query. Query query = CreateLuceneQuery(queryStructure, searchType); // Execute to obtain lucene hit pointers; we're going to artifically cap this out at 100 hits. TopDocs hits = this.IndexSearcher.Search(query, 100); // Loop through and form the results. foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { // Retrieve the lucene document, and form a chemical structure result. Document doc = this.IndexSearcher.Doc(scoreDoc.Doc); var result = new ChemicalStructureSearchResult() { ChemicalStructure = new ChemicalStructure(doc.Get(ChemicalStructureIndexer.FIELD_NAME), doc.Get(ChemicalStructureIndexer.FIELD_MOLFILE)), Score = scoreDoc.Score, }; // One catch: for Substructure searches, we have actually identified the set of chemical structures that MIGHT be // substructure matches; for Substructure searches, we need to perform an actual substructure determination. if ((searchType != SearchType.Substructure) || result.ChemicalStructure.HasSubstructure(queryStructure)) { results.Add(result); } } return(results); }
/// <summary> /// Returns whether the chemical structure contains a specified substructure. /// </summary> /// <param name="substructureQuery">The specified substructure to search for.</param> /// <returns>True if this chemical structure contains the specified substructure.</returns> public bool HasSubstructure(ChemicalStructure substructureQuery) { bool hasSubstructure = false; using (Indigo indigo = new Indigo()) { // Load inputs. IndigoObject structure = CreateIndigoStructure(indigo); IndigoObject substructure = indigo.loadQueryMolecule(substructureQuery.MolfileContents); // Perform the match. IndigoObject substructureMatcher = indigo.substructureMatcher(structure); hasSubstructure = (substructureMatcher.match(substructure) != null); // Dispose. structure.Dispose(); substructure.Dispose(); substructureMatcher.Dispose(); } return(hasSubstructure); }
/// <summary> /// Adds a chemical structure to the index. /// </summary> /// <param name="chemicalStructure">The chemical structure to add to the index.</param> public void AddChemicalStructure(ChemicalStructure chemicalStructure) { // Create a new Lucene document for the chemical structure. Document doc = new Document(); // Add stored fields for the chemical structure name and molfile; this will // allow us to retrieve them later. doc.Add(new Field(FIELD_NAME, chemicalStructure.Name, Field.Store.YES, Field.Index.NO)); doc.Add(new Field(FIELD_MOLFILE, chemicalStructure.MolfileContents, Field.Store.YES, Field.Index.NO)); // Add searchable fields for exact key and similarity/substructure fingerprints. doc.Add(new Field(FIELD_EXACTKEY, chemicalStructure.GetUniqueKey(), Field.Store.NO, Field.Index.NOT_ANALYZED)); foreach (int fingerprintPosition in chemicalStructure.GetSimilarityFingerprintPositions()) { doc.Add(new Field(FIELD_FINGERPRINT_POSITION_SIMILARITY, fingerprintPosition.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); } foreach (int fingerprintPosition in chemicalStructure.GetSubstructureFingerprintPositions()) { doc.Add(new Field(FIELD_FINGERPRINT_POSITION_SUBSTRUCTURE, fingerprintPosition.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); } // Add the document to the index. this.IndexWriter.AddDocument(doc); }