public List <string> GetDocumentsSimilarToDocument(string document_filename) { List <string> fingerprints = new List <string>(); IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true); Searcher index_searcher = new IndexSearcher(index_reader); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader); mlt.SetFieldNames(new string[] { "content" }); mlt.SetMinTermFreq(0); Query query = mlt.Like(new StreamReader(document_filename)); Hits hits = index_searcher.Search(query); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); index_reader.Close(); return(fingerprints); }
public List <string> GetDocumentsSimilarToDocument(string document_filename) { List <string> fingerprints = new List <string>(); try { using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (IndexSearcher index_searcher = new IndexSearcher(index_reader)) { LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader); mlt.SetFieldNames(new string[] { "content" }); mlt.SetMinTermFreq(0); Query query = mlt.Like(new StreamReader(document_filename)); Hits hits = index_searcher.Search(query); var i = hits.Iterator(); while (i.MoveNext()) { Hit hit = (Hit)i.Current; string fingerprint = hit.Get("fingerprint"); fingerprints.Add(fingerprint); } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsSimilarToDocument: There was a problem opening the index file for searching."); } return(fingerprints); }
public static void Main(String[] a) { String indexName = "localhost_index"; String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new Uri(a[++i]); } } StreamWriter temp_writer; temp_writer = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding); temp_writer.AutoFlush = true; StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(indexName); Lucene.Net.Search.Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < Math.Min(25, len); i++) { Lucene.Net.Documents.Document d = hits.Doc(i); String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }