public StoryCollection Find(int hostId, int storyId) { int? docId = ConvertStoryIdtoDocId(hostId, storyId); if (docId.HasValue) { IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId); IndexReader indexReader = indexSearch.GetIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.SetAnalyzer(new DnkAnalyzer()); //mlt.SetFieldNames(new string[] { "title", "description" }); //these values control the query used to find related/similar stories // //-we are only using the title and tags fields, //-the term must appear 1 or more times, //-the query will only have 3 terms //-a word less than 3 char in len with be ignored //-the term must appear at in at least 4 doc mlt.SetFieldNames(new string[] { "title", "tags" }); mlt.SetMinTermFreq(1); mlt.SetMaxQueryTerms(5); mlt.SetMinWordLen(3); mlt.SetMinDocFreq(4); mlt.SetStopWords(StopWords()); mlt.SetBoost(true); Query mltQuery = mlt.Like(docId.Value); Hits hits = indexSearch.Search(mltQuery); List<int> results = new List<int>(); for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); int hitStoryId = int.Parse(d.GetField("id").StringValue()); if (hitStoryId != storyId) { results.Add(hitStoryId); if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN) break; } } return SearchQuery.LoadStorySearchResults(results); } else return null; }
public static void Main(System.String[] a) { System.String indexName = "localhost_index"; System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; System.Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new System.Uri(a[++i]); } } System.IO.StreamWriter temp_writer; temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); temp_writer.AutoFlush = true; System.IO.StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); MoreLikeThis mlt = new MoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new System.IO.FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < System.Math.Min(25, len); i++) { Document d = hits.Doc(i); System.String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) o.WriteLine("\tsummary: " + d.Get("summary")); o.WriteLine(); } }
public static void Main(System.String[] a) { System.String indexName = "localhost_index"; System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; System.Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new System.Uri(a[++i]); } } System.IO.StreamWriter temp_writer; temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); temp_writer.AutoFlush = true; System.IO.StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); MoreLikeThis mlt = new MoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new System.IO.FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < System.Math.Min(25, len); i++) { Document d = hits.Doc(i); System.String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }
public IList<CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs) { // See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html var searcher = GetSearcher(indexName); var mlt = new MoreLikeThis(searcher.GetIndexReader()); mlt.SetAnalyzer(GetAnalyzer(SearchType.Morphologic)); mlt.SetFieldNames(new string[] {"Title", "Content"}); mlt.SetMinWordLen(4); // to avoid most Hebrew ambigous stop-words var query = mlt.Like(indexDocumentId); var tsdc = TopScoreDocCollector.create(maxDocs, true); searcher.Search(query, tsdc); var hits = tsdc.TopDocs().scoreDocs; var ret = new List<CorpusDocument>(maxDocs); foreach (var hit in hits) { var d = searcher.Doc(hit.doc); ret.Add(new CorpusDocument { Id = d.Get("Id"), Title = d.Get("Title"), }); } return ret; }
static void Main(string[] args) { var stopWords = new Hashtable(StringComparer.InvariantCultureIgnoreCase); foreach (var s in StopWords) { stopWords[s] = s; } var connectionString = "Data Source=(local);Initial Catalog=AllgressDB;Integrated Security=true"; Repo = new Repository(connectionString); var directory = PopulateIndex(); //SearchForTerm(directory); var policy = Repo.GetPolicySection(35); string text = Convert(policy.Text); using (var reader = DirectoryReader.Open(directory, true)) using (var indexSearcher = new IndexSearcher(reader)) { var moreLikeThis = new MoreLikeThis(reader); moreLikeThis.SetStopWords(stopWords); moreLikeThis.SetFieldNames(new[] { "description", "procedures", "objectives", "references" }); moreLikeThis.SetBoost(true); moreLikeThis.SetMinDocFreq(2); moreLikeThis.SetMinTermFreq(1); var query = moreLikeThis.Like(text); (query as BooleanQuery).Add(new TermQuery(new Term("type", "policy")), Occur.MUST_NOT); moreLikeThis.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)); var docs = from scoreDoc in indexSearcher.Search(query, null, 1000).ScoreDocs select new ScoredDocument { Score = scoreDoc.Score, Document = indexSearcher.Doc(scoreDoc.Doc) }; var results = docs.ToArray(); } Console.ReadKey(); }