public static IList <CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs) { // See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html var mlt = new MoreLikeThis(Searcher.GetIndexReader()); mlt.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)); mlt.SetFieldNames(new[] { "Title", "Content" }); mlt.SetMinWordLen(4); // improve relevancy var query = mlt.Like(indexDocumentId); var tsdc = TopScoreDocCollector.create(maxDocs, true); Searcher.Search(query, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var ret = new List <CorpusDocument>(maxDocs); foreach (var hit in hits) { var d = Searcher.Doc(hit.doc); ret.Add(new CorpusDocument { Id = d.Get("Id"), Title = d.Get("Title"), }); } return(ret); }
public StoryCollection Find(int hostId, int storyId) { int?docId = ConvertStoryIdtoDocId(hostId, storyId); if (docId.HasValue) { IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId); IndexReader indexReader = indexSearch.GetIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.SetAnalyzer(new DnkAnalyzer()); //mlt.SetFieldNames(new string[] { "title", "description" }); //these values control the query used to find related/similar stories // //-we are only using the title and tags fields, //-the term must appear 1 or more times, //-the query will only have 3 terms //-a word less than 3 char in len with be ignored //-the term must appear at in at least 4 doc mlt.SetFieldNames(new string[] { "title", "tags" }); mlt.SetMinTermFreq(1); mlt.SetMaxQueryTerms(5); mlt.SetMinWordLen(3); mlt.SetMinDocFreq(4); mlt.SetStopWords(StopWords()); mlt.SetBoost(true); Query mltQuery = mlt.Like(docId.Value); Hits hits = indexSearch.Search(mltQuery); List <int> results = new List <int>(); for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); int hitStoryId = int.Parse(d.GetField("id").StringValue()); if (hitStoryId != storyId) { results.Add(hitStoryId); if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN) { break; } } } return(SearchQuery.LoadStorySearchResults(results)); } else { return(null); } }