public StoryCollection Find(int hostId, int storyId) { int? docId = ConvertStoryIdtoDocId(hostId, storyId); if (docId.HasValue) { IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId); IndexReader indexReader = indexSearch.GetIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.SetAnalyzer(new DnkAnalyzer()); //mlt.SetFieldNames(new string[] { "title", "description" }); //these values control the query used to find related/similar stories // //-we are only using the title and tags fields, //-the term must appear 1 or more times, //-the query will only have 3 terms //-a word less than 3 char in len with be ignored //-the term must appear at in at least 4 doc mlt.SetFieldNames(new string[] { "title", "tags" }); mlt.SetMinTermFreq(1); mlt.SetMaxQueryTerms(5); mlt.SetMinWordLen(3); mlt.SetMinDocFreq(4); mlt.SetStopWords(StopWords()); mlt.SetBoost(true); Query mltQuery = mlt.Like(docId.Value); Hits hits = indexSearch.Search(mltQuery); List<int> results = new List<int>(); for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); int hitStoryId = int.Parse(d.GetField("id").StringValue()); if (hitStoryId != storyId) { results.Add(hitStoryId); if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN) break; } } return SearchQuery.LoadStorySearchResults(results); } else return null; }
static void Main(string[] args) { var stopWords = new Hashtable(StringComparer.InvariantCultureIgnoreCase); foreach (var s in StopWords) { stopWords[s] = s; } var connectionString = "Data Source=(local);Initial Catalog=AllgressDB;Integrated Security=true"; Repo = new Repository(connectionString); var directory = PopulateIndex(); //SearchForTerm(directory); var policy = Repo.GetPolicySection(35); string text = Convert(policy.Text); using (var reader = DirectoryReader.Open(directory, true)) using (var indexSearcher = new IndexSearcher(reader)) { var moreLikeThis = new MoreLikeThis(reader); moreLikeThis.SetStopWords(stopWords); moreLikeThis.SetFieldNames(new[] { "description", "procedures", "objectives", "references" }); moreLikeThis.SetBoost(true); moreLikeThis.SetMinDocFreq(2); moreLikeThis.SetMinTermFreq(1); var query = moreLikeThis.Like(text); (query as BooleanQuery).Add(new TermQuery(new Term("type", "policy")), Occur.MUST_NOT); moreLikeThis.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)); var docs = from scoreDoc in indexSearcher.Search(query, null, 1000).ScoreDocs select new ScoredDocument { Score = scoreDoc.Score, Document = indexSearcher.Doc(scoreDoc.Doc) }; var results = docs.ToArray(); } Console.ReadKey(); }