Пример #1
0
        public StoryCollection Find(int hostId, int storyId)
        {
            int? docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List<int> results = new List<int>();

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d = hits.Doc(i);
                    int hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                            break;
                    }
                }

                return SearchQuery.LoadStorySearchResults(results);
            }
            else
                return null;
        }
Пример #2
0
		public IList<CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs)
		{
			// See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html

			var searcher = GetSearcher(indexName);
			var mlt = new MoreLikeThis(searcher.GetIndexReader());
			mlt.SetAnalyzer(GetAnalyzer(SearchType.Morphologic));
			mlt.SetFieldNames(new string[] {"Title", "Content"});
			mlt.SetMinWordLen(4); // to avoid most Hebrew ambigous stop-words

			var query = mlt.Like(indexDocumentId);

			var tsdc = TopScoreDocCollector.create(maxDocs, true);
			searcher.Search(query, tsdc);
			var hits = tsdc.TopDocs().scoreDocs;

			var ret = new List<CorpusDocument>(maxDocs);

			foreach (var hit in hits)
			{
				var d = searcher.Doc(hit.doc);
				ret.Add(new CorpusDocument
				        	{
				        		Id = d.Get("Id"),
				        		Title = d.Get("Title"),
				        	});
			}
			return ret;
		}