/// <summary> /// for given document and fields in that doc get fixed no of docs that are similar /// assumes you have index that is up to date /// </summary> /// <returns>list of similar docs found</returns> public IEnumerable<SearchResultItem> FindMoreLikeThis() { var results = new List<SearchResultItem>(); if (IsInit()) { var moreLikeThis = new MoreLikeThis(reader); moreLikeThis.SetFieldNames(fieldsToSearch.ToArray()); moreLikeThis.SetMinTermFreq(1); moreLikeThis.SetMinDocFreq(1); int currentLuceneDocId = GetLuceneDocNo(docId); if (currentLuceneDocId != 0) { var query = moreLikeThis.Like(currentLuceneDocId); var docs = searcher.Search(query, maxNo); int count = docs.ScoreDocs.Length; //start at 1 as first item will be current document itself which we dont want for (int i = 1; i < count; i++) { var d = reader.Document(docs.ScoreDocs[i].doc); var item = new SearchResultItem { PageName = d.GetField("nodeName").StringValue(), NodeId = int.Parse(d.GetField("__NodeId").StringValue()) }; results.Add(item); } } } return results; }
public static void Main(System.String[] a) { System.String indexName = "localhost_index"; System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; System.Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new System.Uri(a[++i]); } } System.IO.StreamWriter temp_writer; temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); temp_writer.AutoFlush = true; System.IO.StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); MoreLikeThis mlt = new MoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new System.IO.FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < System.Math.Min(25, len); i++) { Document d = hits.Doc(i); System.String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }
public static void Main(System.String[] a) { System.String indexName = "localhost_index"; System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; System.Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new System.Uri(a[++i]); } } System.IO.StreamWriter temp_writer; temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); temp_writer.AutoFlush = true; System.IO.StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); MoreLikeThis mlt = new MoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new System.IO.FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < System.Math.Min(25, len); i++) { Document d = hits.Doc(i); System.String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) o.WriteLine("\tsummary: " + d.Get("summary")); o.WriteLine(); } }