Example #1
0
        public IEnumerable <SearchEngineResult> RelatedContents(int entryId, int max, int blogId)
        {
            var list = new List <SearchEngineResult>();

            //First look for the original doc
            Query   query = GetIdSearchQuery(entryId);
            TopDocs hits  = Searcher.Search(query, max);

            if (hits.scoreDocs.Length <= 0)
            {
                return(list);
            }

            int docNum = hits.scoreDocs[0].doc;

            //Setup MoreLikeThis searcher
            var reader = DoWriterAction(w => w.GetReader());
            var mlt    = new MoreLikeThis(reader);

            mlt.SetAnalyzer(_analyzer);
            mlt.SetFieldNames(new[] { Title, Body, Tags });
            mlt.SetMinDocFreq(_settings.Parameters.MinimumDocumentFrequency);
            mlt.SetMinTermFreq(_settings.Parameters.MinimumTermFrequency);
            mlt.SetBoost(_settings.Parameters.MoreLikeThisBoost);

            var moreResultsQuery = mlt.Like(docNum);

            return(PerformQuery(list, moreResultsQuery, max + 1, blogId, entryId));
        }
Example #2
0
        /// <summary>
        /// for given document and fields in that doc get fixed no of docs that are similar
        /// assumes you have index that is up to date
        /// </summary>
        /// <returns>list of similar docs found</returns>
        public IEnumerable <SearchResultItem> FindMoreLikeThis()
        {
            var results = new List <SearchResultItem>();

            if (IsInit())
            {
                var moreLikeThis = new MoreLikeThis(reader);
                moreLikeThis.SetFieldNames(fieldsToSearch.ToArray());
                moreLikeThis.SetMinTermFreq(1);
                moreLikeThis.SetMinDocFreq(1);
                int currentLuceneDocId = GetLuceneDocNo(docId);
                if (currentLuceneDocId != 0)
                {
                    var query = moreLikeThis.Like(currentLuceneDocId);
                    var docs  = searcher.Search(query, maxNo);
                    int count = docs.ScoreDocs.Length;
                    //start at 1 as first item will be current document itself which we dont want
                    for (int i = 1; i < count; i++)
                    {
                        var d    = reader.Document(docs.ScoreDocs[i].doc);
                        var item = new SearchResultItem
                        {
                            PageName = d.GetField("nodeName").StringValue(),
                            NodeId   = int.Parse(d.GetField("__NodeId").StringValue())
                        };
                        results.Add(item);
                    }
                }
            }
            return(results);
        }
Example #3
0
        public StoryCollection Find(int hostId, int storyId)
        {
            int?docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader   indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List <int> results = new List <int>();


                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d          = hits.Doc(i);
                    int      hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                        {
                            break;
                        }
                    }
                }

                return(SearchQuery.LoadStorySearchResults(results));
            }
            else
            {
                return(null);
            }
        }