public List <IndexPageResult> GetDocumentPagesWithQuery(string query) { List <IndexPageResult> results = new List <IndexPageResult>(); Dictionary <string, IndexPageResult> fingerprints_already_seen = new Dictionary <string, IndexPageResult>(); try { using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (IndexSearcher index_searcher = new IndexSearcher(index_reader)) { QueryParser query_parser = new QueryParser(Version.LUCENE_29, "content", analyzer); Query query_object = query_parser.Parse(query); Lucene.Net.Search.Hits hits = index_searcher.Search(query_object); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); int page = Convert.ToInt32(hit.Get("page")); double score = hit.GetScore(); // If this is the first time we have seen this fingerprint, make the top-level record if (!fingerprints_already_seen.ContainsKey(fingerprint)) { IndexPageResult result = new IndexPageResult(); result.fingerprint = fingerprint; result.score = score; // Add to our structures results.Add(result); fingerprints_already_seen[fingerprint] = result; } // And add the page record { IndexPageResult result = fingerprints_already_seen[fingerprint]; result.page_results.Add(new PageResult { page = page, score = score }); } } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentPagesWithQuery: There was a problem opening the index file for searching."); } return(results); }
public List <string> GetDocumentsSimilarToDocument(string document_filename) { List <string> fingerprints = new List <string>(); IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true); Searcher index_searcher = new IndexSearcher(index_reader); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader); mlt.SetFieldNames(new string[] { "content" }); mlt.SetMinTermFreq(0); Query query = mlt.Like(new StreamReader(document_filename)); Hits hits = index_searcher.Search(query); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); index_reader.Close(); return(fingerprints); }
/*** * Understands the lucene query syntax */ public List <IndexResult> GetDocumentsWithQuery(string query) { List <IndexResult> fingerprints = new List <IndexResult>(); HashSet <string> fingerprints_already_seen = new HashSet <string>(); try { IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true); Searcher index_searcher = new IndexSearcher(index_reader); QueryParser query_parser = new QueryParser(Version.LUCENE_29, "content", analyzer); Query query_object = query_parser.Parse(query); Hits hits = index_searcher.Search(query_object); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); string page = hit.Get("page"); if (!fingerprints_already_seen.Contains(fingerprint)) { fingerprints_already_seen.Add(fingerprint); IndexResult index_result = new IndexResult { fingerprint = fingerprint, score = hit.GetScore() }; fingerprints.Add(index_result); } } // Close the index index_searcher.Close(); index_reader.Close(); } catch (Exception ex) { Logging.Warn(ex, "There was a problem opening the index file for searching."); } return(fingerprints); }
public HashSet <string> GetDocumentsWithWord(string keyword) { HashSet <string> fingerprints = new HashSet <string>(); try { keyword = ReasonableWord.MakeReasonableWord(keyword); if (null != keyword) { ////Do a quick check for whether there are actually any segments files, otherwise we throw many exceptions in the IndexReader.Open in a very tight loop. ////Added by Nik to cope with some exception...will uncomment this when i know what the problem is... //var segments_files = Directory.GetFiles(LIBRARY_INDEX_BASE_PATH, "segments*", SearchOption.AllDirectories); //if (segments_files.Length <= 0) //{ // Logging.Debug("No index segments files found"); // return fingerprints; //} using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (IndexSearcher index_searcher = new IndexSearcher(index_reader)) { TermQuery term_query = new TermQuery(new Term("content", keyword)); Hits hits = index_searcher.Search(term_query); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); } index_reader.Close(); } } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsWithWord: There was a problem opening the index file for searching."); } return(fingerprints); }
public List <string> GetDocumentsSimilarToDocument(string document_filename) { List <string> fingerprints = new List <string>(); try { using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (IndexSearcher index_searcher = new IndexSearcher(index_reader)) { LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader); mlt.SetFieldNames(new string[] { "content" }); mlt.SetMinTermFreq(0); Query query = mlt.Like(new StreamReader(document_filename)); Hits hits = index_searcher.Search(query); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsSimilarToDocument: There was a problem opening the index file for searching."); } return(fingerprints); }