private void CreateMLTQuery(MoreLikeThis query)
 {
     query.Analyzer      = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
     query.MinTermFreq   = 1;
     query.MinDocFreq    = 1;
     query.MaxQueryTerms = 15;
     query.SetFieldNames(new string[] { "title", "description" });
     query.SetStopWords(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 }
コード例 #2
0
        public StoryCollection Find(int hostId, int storyId)
        {
            int?docId = ConvertStoryIdtoDocId(hostId, storyId);

            if (docId.HasValue)
            {
                IndexSearcher indexSearch = SearchQuery.GetSearcher(hostId);
                IndexReader   indexReader = indexSearch.GetIndexReader();

                MoreLikeThis mlt = new MoreLikeThis(indexReader);

                mlt.SetAnalyzer(new DnkAnalyzer());
                //mlt.SetFieldNames(new string[] { "title", "description" });

                //these values control the query used to find related/similar stories
                //
                //-we are only using the title and tags fields,
                //-the term must appear 1 or more times,
                //-the query will only have 3 terms
                //-a word less than 3 char in len with be ignored
                //-the term must appear at in at least 4 doc
                mlt.SetFieldNames(new string[] { "title", "tags" });
                mlt.SetMinTermFreq(1);
                mlt.SetMaxQueryTerms(5);
                mlt.SetMinWordLen(3);
                mlt.SetMinDocFreq(4);
                mlt.SetStopWords(StopWords());
                mlt.SetBoost(true);
                Query mltQuery = mlt.Like(docId.Value);

                Hits hits = indexSearch.Search(mltQuery);

                List <int> results = new List <int>();


                for (int i = 0; i < hits.Length(); i++)
                {
                    Document d          = hits.Doc(i);
                    int      hitStoryId = int.Parse(d.GetField("id").StringValue());

                    if (hitStoryId != storyId)
                    {
                        results.Add(hitStoryId);
                        if (results.Count == NUMBER_OF_RELATED_STORIES_TO_RETURN)
                        {
                            break;
                        }
                    }
                }

                return(SearchQuery.LoadStorySearchResults(results));
            }
            else
            {
                return(null);
            }
        }
コード例 #3
0
        //Realiza la búsqueda de un texto "original" contra los documentos indexados utilizando TFIDF
        public static List <Document> moreLikeThisAnalyzer(String original, ISet <string> stopWords, Lucene.Net.Analysis.Analyzer analyzer)
        {
            Trace.WriteLine("Realizando la Búsqueda");
            List <Document> DocumenResult = new List <Document>();

            IndexReader   indexReader   = IndexReader.Open(_directory, true);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            MoreLikeThis mlt = new MoreLikeThis(indexReader);

            mlt.SetFieldNames(DEFAULT_FIELD_NAMES);
            mlt.MinDocFreq    = DEFALT_MIN_DOC_FREQ;
            mlt.MinTermFreq   = DEFAULT_MIN_TERM_FREQ;
            mlt.MaxQueryTerms = MAX_QUERY_TERMS;
            mlt.MinWordLen    = DEFAULT_MIN_WORD_LENGTH;
            mlt.Analyzer      = analyzer;
            mlt.SetStopWords(stopWords);

            Query query = mlt.Like(new System.IO.StringReader(original));

            int topCount = DEFAULT_DOCUMENT_TO_SEARCH;

            TopScoreDocCollector collector = TopScoreDocCollector.Create(topCount, true);

            indexSearcher.Search(query, collector);
            ScoreDoc[] hits   = collector.TopDocs().ScoreDocs;
            var        result = new List <string>();
            //Hits hits = indexSearcher.Search(query);

            int len = hits.Length;

            Trace.WriteLine("Entering");
            Trace.WriteLine("-------------------------------------------");
            Trace.WriteLine("original :" + original);
            Trace.WriteLine("query: " + query);
            Trace.WriteLine("found: " + len + " documents");
            for (int i = 0; i < Math.Min(25, len); i++)
            {
                int d = hits[i].Doc;
                Trace.WriteLine("score   : " + hits[i].Score);
                Trace.WriteLine("name    : " + d.ToString());
                //Colocar los datos en el arreglo de resultados
                Document doc = indexSearcher.Doc(hits[i].Doc);
                DocumenResult.Add(doc);
            }
            Trace.WriteLine("-------------------------------------------");
            Trace.WriteLine("Exiting");
            return(DocumenResult);
        }
コード例 #4
0
        public SearchResult FindSimular(string key, int resultOffset, int resultLength, bool matchCategory) {
            var pageQuery = new TermQuery(new Term("key", key));
            var topDocs = _searcher.Search(pageQuery, 1);
            if (topDocs.TotalHits == 0) {
                return new SearchResult();
            }

            var doc = topDocs.ScoreDocs[0].Doc;

            var moreLikeThis = new MoreLikeThis(_reader) {
                Analyzer = _analyzer, 
                MinWordLen = 3
            };
            moreLikeThis.SetFieldNames(new[] { "title", "summary", "content", "tags" });
            moreLikeThis.SetStopWords(StopWords.DefaultEnglish);
            moreLikeThis.MinDocFreq = 2;
            
            var query = moreLikeThis.Like(doc);
            var startTime = DateTime.Now;
            var ticks = DateTime.Now.ToUniversalTime().Ticks;

            Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false);
            Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true);

            var booleanQuery = new BooleanQuery {
                {query, Occur.MUST},
                {pageQuery, Occur.MUST_NOT},
                {publishStartQuery, Occur.MUST},
                {publishStopQuery, Occur.MUST}
            };

            if (matchCategory) {
                var document = _searcher.Doc(doc);
                var field = document.GetField("category");

                if (field != null && !string.IsNullOrEmpty(field.StringValue)) {
                    var categoryQuery = new TermQuery(new Term("category", field.StringValue.ToLowerInvariant()));
                    booleanQuery.Add(categoryQuery, Occur.MUST);
                }
            }

            var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs;

            var result = new SearchResult { NumberOfHits = scoreDocs.Length };

            if (resultOffset < scoreDocs.Length) {
                var resultUpperOffset = resultOffset + resultLength;
                if (resultUpperOffset > scoreDocs.Length) {
                    resultUpperOffset = scoreDocs.Length;
                }

                for (int i = resultOffset; i < resultUpperOffset; i++) {
                    Document document = _searcher.Doc(scoreDocs[i].Doc);

                    Guid pageId;
                    (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId);

                    var hit = new SearchHit {
                        PageId = pageId,
                        Path = document.Get("path"),
                        Title = document.Get("title"),
                        Excerpt = document.Get("summary")
                    };

                    //foreach (string key in metaData) {
                    //    hit.MetaData.Add(key, document.Get(key));
                    //}

                    result.Hits.Add(hit);
                }
            }

            var timeTaken = DateTime.Now - startTime;
            result.SecondsTaken = timeTaken.TotalSeconds;

            return result;
        }