Beispiel #1
0
        public static IList<CorpusDocument> GetMoreLikeThis(string indexName, int indexDocumentId, int maxDocs)
        {
            // See: http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/similar/MoreLikeThis.html

            var mlt = new MoreLikeThis(Searcher.GetIndexReader());
            mlt.SetAnalyzer(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29));
            mlt.SetFieldNames(new[] { "Title", "Content" });
            mlt.SetMinWordLen(4); // improve relevancy

            var query = mlt.Like(indexDocumentId);

            var tsdc = TopScoreDocCollector.create(maxDocs, true);
            Searcher.Search(query, tsdc);
            var hits = tsdc.TopDocs().ScoreDocs;

            var ret = new List<CorpusDocument>(maxDocs);

            foreach (var hit in hits)
            {
                var d = Searcher.Doc(hit.doc);
                ret.Add(new CorpusDocument
                {
                    Id = d.Get("Id"),
                    Title = d.Get("Title"),
                });
            }
            return ret;
        }
        public override Query Rewrite(IndexReader reader)
        {
            MoreLikeThis mlt = new MoreLikeThis(reader);

            mlt.SetFieldNames(moreLikeFields);
            mlt.SetAnalyzer(analyzer);
            mlt.SetMinTermFreq(minTermFrequency);
            if (minDocFreq >= 0)
            {
                mlt.SetMinDocFreq(minDocFreq);
            }
            mlt.SetMaxQueryTerms(maxQueryTerms);
            mlt.SetStopWords(stopWords);
            BooleanQuery bq = (BooleanQuery)mlt.Like( new System.IO.StringReader(likeText));
            BooleanClause[] clauses = bq.GetClauses();
            //make at least half the terms match
            bq.SetMinimumNumberShouldMatch((int)(clauses.Length * percentTermsToMatch));
            return bq;
        }
Beispiel #3
0
        public override Query Rewrite(IndexReader reader)
        {
            MoreLikeThis mlt = new MoreLikeThis(reader);

            mlt.SetFieldNames(moreLikeFields);
            mlt.SetAnalyzer(analyzer);
            mlt.SetMinTermFreq(minTermFrequency);
            if (minDocFreq >= 0)
            {
                mlt.SetMinDocFreq(minDocFreq);
            }
            mlt.SetMaxQueryTerms(maxQueryTerms);
            mlt.SetStopWords(stopWords);
            BooleanQuery bq = (BooleanQuery)mlt.Like(new System.IO.StringReader(likeText));

            BooleanClause[] clauses = bq.GetClauses();
            //make at least half the terms match
            bq.SetMinimumNumberShouldMatch((int)(clauses.Length * percentTermsToMatch));
            return(bq);
        }