public IEnumerable <SearchEngineResult> RelatedContents(int entryId, int max, int blogId) { var list = new List <SearchEngineResult>(); //First look for the original doc Query query = GetIdSearchQuery(entryId); TopDocs hits = Searcher.Search(query, max); if (hits.scoreDocs.Length <= 0) { return(list); } int docNum = hits.scoreDocs[0].doc; //Setup MoreLikeThis searcher var reader = DoWriterAction(w => w.GetReader()); var mlt = new MoreLikeThis(reader); mlt.SetAnalyzer(_analyzer); mlt.SetFieldNames(new[] { Title, Body, Tags }); mlt.SetMinDocFreq(_settings.Parameters.MinimumDocumentFrequency); mlt.SetMinTermFreq(_settings.Parameters.MinimumTermFrequency); mlt.SetBoost(_settings.Parameters.MoreLikeThisBoost); var moreResultsQuery = mlt.Like(docNum); return(PerformQuery(list, moreResultsQuery, max + 1, blogId, entryId)); }
/// <summary> /// for given document and fields in that doc get fixed no of docs that are similar /// assumes you have index that is up to date /// </summary> /// <returns>list of similar docs found</returns> public IEnumerable <SearchResultItem> FindMoreLikeThis() { var results = new List <SearchResultItem>(); if (IsInit()) { var moreLikeThis = new MoreLikeThis(reader); moreLikeThis.SetFieldNames(fieldsToSearch.ToArray()); moreLikeThis.SetMinTermFreq(1); moreLikeThis.SetMinDocFreq(1); int currentLuceneDocId = GetLuceneDocNo(docId); if (currentLuceneDocId != 0) { var query = moreLikeThis.Like(currentLuceneDocId); var docs = searcher.Search(query, maxNo); int count = docs.ScoreDocs.Length; //start at 1 as first item will be current document itself which we dont want for (int i = 1; i < count; i++) { var d = reader.Document(docs.ScoreDocs[i].doc); var item = new SearchResultItem { PageName = d.GetField("nodeName").StringValue(), NodeId = int.Parse(d.GetField("__NodeId").StringValue()) }; results.Add(item); } } } return(results); }
public StoryCollection Find(int hostId, int storyId) { int?docId = ConvertStoryIdtoDocId(hostId, storyId); if (docId.HasValue) { IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId); IndexReader indexReader = indexSearch.GetIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.SetAnalyzer(new DnkAnalyzer()); //mlt.SetFieldNames(new string[] { "title", "description" }); //these values control the query used to find related/similar stories // //-we are only using the title and tags fields, //-the term must appear 1 or more times, //-the query will only have 3 terms //-a word less than 3 char in len with be ignored //-the term must appear at in at least 4 doc mlt.SetFieldNames(new string[] { "title", "tags" }); mlt.SetMinTermFreq(1); mlt.SetMaxQueryTerms(5); mlt.SetMinWordLen(3); mlt.SetMinDocFreq(4); mlt.SetStopWords(StopWords()); mlt.SetBoost(true); Query mltQuery = mlt.Like(docId.Value); Hits hits = indexSearch.Search(mltQuery); List <int> results = new List <int>(); for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); int hitStoryId = int.Parse(d.GetField("id").StringValue()); if (hitStoryId != storyId) { results.Add(hitStoryId); if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN) { break; } } } return(SearchQuery.LoadStorySearchResults(results)); } else { return(null); } }