public static IList <CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs) { // See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html var mlt = new MoreLikeThis(Searcher.GetIndexReader()); mlt.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)); mlt.SetFieldNames(new[] { "Title", "Content" }); mlt.SetMinWordLen(4); // improve relevancy var query = mlt.Like(indexDocumentId); var tsdc = TopScoreDocCollector.create(maxDocs, true); Searcher.Search(query, tsdc); var hits = tsdc.TopDocs().ScoreDocs; var ret = new List <CorpusDocument>(maxDocs); foreach (var hit in hits) { var d = Searcher.Doc(hit.doc); ret.Add(new CorpusDocument { Id = d.Get("Id"), Title = d.Get("Title"), }); } return(ret); }
public IEnumerable <SearchEngineResult> RelatedContents(int entryId, int max, int blogId) { var list = new List <SearchEngineResult>(); //First look for the original doc Query query = GetIdSearchQuery(entryId); TopDocs hits = Searcher.Search(query, max); if (hits.scoreDocs.Length <= 0) { return(list); } int docNum = hits.scoreDocs[0].doc; //Setup MoreLikeThis searcher var reader = DoWriterAction(w => w.GetReader()); var mlt = new MoreLikeThis(reader); mlt.SetAnalyzer(_analyzer); mlt.SetFieldNames(new[] { Title, Body, Tags }); mlt.SetMinDocFreq(_settings.Parameters.MinimumDocumentFrequency); mlt.SetMinTermFreq(_settings.Parameters.MinimumTermFrequency); mlt.SetBoost(_settings.Parameters.MoreLikeThisBoost); var moreResultsQuery = mlt.Like(docNum); return(PerformQuery(list, moreResultsQuery, max + 1, blogId, entryId)); }
public SearchEngineResponse RelatedContents(int entryId, int max, int blogId) { var list = new List <SearchEngineResult>(); //First look for the original doc Query query = GetIdSearchQuery(entryId); TopDocs hits = Searcher.Search(query, max); if (hits.scoreDocs.Length <= 0) { return(new SearchEngineResponse { TotalCount = hits.totalHits, Results = list }); } int docNum = hits.scoreDocs[0].doc; //Setup MoreLikeThis searcher var reader = DoWriterAction(w => w.GetReader()); var mlt = new MoreLikeThis(reader); mlt.SetAnalyzer(_analyzer); mlt.SetFieldNames(new[] { Categories, Silouhettes, Tags }); var moreResultsQuery = mlt.Like(docNum); return(PerformQuery(list, moreResultsQuery, 0, max + 1, blogId, entryId)); }
public StoryCollection Find(int hostId, int storyId) { int?docId = ConvertStoryIdtoDocId(hostId, storyId); if (docId.HasValue) { IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId); IndexReader indexReader = indexSearch.GetIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.SetAnalyzer(new DnkAnalyzer()); //mlt.SetFieldNames(new string[] { "title", "description" }); //these values control the query used to find related/similar stories // //-we are only using the title and tags fields, //-the term must appear 1 or more times, //-the query will only have 3 terms //-a word less than 3 char in len with be ignored //-the term must appear at in at least 4 doc mlt.SetFieldNames(new string[] { "title", "tags" }); mlt.SetMinTermFreq(1); mlt.SetMaxQueryTerms(5); mlt.SetMinWordLen(3); mlt.SetMinDocFreq(4); mlt.SetStopWords(StopWords()); mlt.SetBoost(true); Query mltQuery = mlt.Like(docId.Value); Hits hits = indexSearch.Search(mltQuery); List <int> results = new List <int>(); for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); int hitStoryId = int.Parse(d.GetField("id").StringValue()); if (hitStoryId != storyId) { results.Add(hitStoryId); if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN) { break; } } } return(SearchQuery.LoadStorySearchResults(results)); } else { return(null); } }