Example #1
0
        public IEnumerable <SearchEngineResult> RelatedContents(int entryId, int max, int blogId)
        {
            var list = new List <SearchEngineResult>();

            //First look for the original doc
            Query   query = GetIdSearchQuery(entryId);
            TopDocs hits  = Searcher.Search(query, max);

            if (hits.scoreDocs.Length <= 0)
            {
                return(list);
            }

            int docNum = hits.scoreDocs[0].doc;

            //Setup MoreLikeThis searcher
            var reader = DoWriterAction(w => w.GetReader());
            var mlt    = new MoreLikeThis(reader);

            mlt.SetAnalyzer(_analyzer);
            mlt.SetFieldNames(new[] { Title, Body, Tags });
            mlt.SetMinDocFreq(_settings.Parameters.MinimumDocumentFrequency);
            mlt.SetMinTermFreq(_settings.Parameters.MinimumTermFrequency);
            mlt.SetBoost(_settings.Parameters.MoreLikeThisBoost);

            var moreResultsQuery = mlt.Like(docNum);

            return(PerformQuery(list, moreResultsQuery, max + 1, blogId, entryId));
        }
Example #2
0
        public StoryCollection Find(int hostId, int storyId)
        {
            int?docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader   indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List <int> results = new List <int>();


                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d          = hits.Doc(i);
                    int      hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                        {
                            break;
                        }
                    }
                }

                return(SearchQuery.LoadStorySearchResults(results));
            }
            else
            {
                return(null);
            }
        }