Esempio n. 1
0
        public IEnumerable <T> Search <T>(Query query, int count, Sort sort, Func <Document, ScoreDoc, T> func)
        {
            if (1 != Interlocked.Read(ref this.IsSearcherReady))
            {
                return(Array.Empty <T>());
            }

            Interlocked.Increment(ref this.ActiveSearchCount);

            try
            {
                TopFieldCollector collector = TopFieldCollector.Create(sort, count, true, true, false, false);
                this.Searcher.Search(query, collector);
                return(collector.GetTopDocs().ScoreDocs.Select <ScoreDoc, T>(
                           scoreDoc =>
                {
                    var document = this.Searcher.Doc(scoreDoc.Doc);
                    return func(document, scoreDoc);
                }
                           ));
            }
            finally
            {
                Interlocked.Decrement(ref this.ActiveSearchCount);
            }
        }
Esempio n. 2
0
        public static IEnumerable <ISearchItem> Search(string pattern, DirectoryInfo dataFolder, int page)
        {
            using (Analyzer analyzer = new SimpleAnalyzer(LuceneVersion.LUCENE_48))
                using (IndexReader reader = new PagesReader(dataFolder))
                {
                    Query             query     = new QueryParser(LuceneVersion.LUCENE_48, string.Empty, analyzer).Parse(pattern);
                    IndexSearcher     searcher  = new IndexSearcher(reader);
                    TopFieldCollector collector =
                        TopFieldCollector.Create(Sort.INDEXORDER, NumHits, false, false, false, false);
                    searcher.Search(query, collector);

                    /*
                     * IFormatter formatter = new SimpleHTMLFormatter();
                     * IScorer scorer = new QueryScorer(query);
                     * Highlighter highlighter = new Highlighter(formatter, scorer)
                     * {
                     *  TextFragmenter = new SimpleFragmenter(3)
                     * };
                     */

                    ScoreDoc[] docs = collector.GetTopDocs(page * PageSize, PageSize).ScoreDocs;
                    return(docs.Select(doc => new SearchItem(doc.Doc.ToString(), doc.Doc)));

                    /*
                     * Document document = searcher.Doc(doc.Doc);
                     * string body = document.Get("body");
                     * TokenStream stream = TokenSources.GetAnyTokenStream(reader, doc.Doc, "body", analyzer);
                     * //TokenStream stream = analyzer.GetTokenStream("test123", new StringReader("test456"));
                     * string best = highlighter.GetBestFragments(stream, body, 1, " ");
                     */
                }
        }
Esempio n. 3
0
        public void SearchGenericWithCollector()
        {
            const int NumObjects         = 10;
            const int MinNumberInclusive = 0;
            const int MaxNumberExclusive = 8;

            WriteTestObjects(NumObjects, obj => obj.ToDocument());
            Assert.AreEqual(NumObjects, writer.NumDocs);

            using (DirectoryReader reader = DirectoryReader.Open(dir))
            {
                IndexSearcher     searcher  = new IndexSearcher(reader);
                TopFieldCollector collector = TopFieldCollector.Create(
                    new Sort(new SortField("Number", SortFieldType.INT64, true)),
                    NumObjects,
                    false,
                    false,
                    false,
                    false);
                searcher.Search <TestObject>(
                    NumericRangeQuery.NewInt64Range("Number", MinNumberInclusive, MaxNumberExclusive, true, false),
                    collector);

                TopDocs topDocs = collector.GetTopDocs();

                VerifyTopDocsTestObjects(searcher, topDocs, MinNumberInclusive, MaxNumberExclusive, true);
            }
        }
Esempio n. 4
0
        public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN)
        {
            // Copy ScoreDoc[] and sort by ascending docID:
            ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone();
            Array.Sort(hits, new ComparerAnonymousInnerClassHelper(this));

            IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves;

            TopFieldCollector collector = TopFieldCollector.Create(sort, topN, true, true, true, false);

            // Now merge sort docIDs from hits, with reader's leaves:
            int hitUpto    = 0;
            int readerUpto = -1;
            int endDoc     = 0;
            int docBase    = 0;

            FakeScorer fakeScorer = new FakeScorer();

            while (hitUpto < hits.Length)
            {
                ScoreDoc            hit           = hits[hitUpto];
                int                 docID         = hit.Doc;
                AtomicReaderContext readerContext = null;
                while (docID >= endDoc)
                {
                    readerUpto++;
                    readerContext = leaves[readerUpto];
                    endDoc        = readerContext.DocBase + readerContext.Reader.MaxDoc;
                }

                if (readerContext != null)
                {
                    // We advanced to another segment:
                    collector.SetNextReader(readerContext);
                    collector.SetScorer(fakeScorer);
                    docBase = readerContext.DocBase;
                }

                fakeScorer.score = hit.Score;
                fakeScorer.doc   = docID - docBase;

                collector.Collect(fakeScorer.doc);

                hitUpto++;
            }

            return(collector.GetTopDocs());
        }
Esempio n. 5
0
        /// <summary>
        /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose
        /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should
        /// be set by specifying <paramref name="fillFields"/>.
        /// </summary>
        protected virtual TopFieldDocs Search(IList <AtomicReaderContext> leaves, Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore)
        {
            // single thread
            int limit = reader.MaxDoc;

            if (limit == 0)
            {
                limit = 1;
            }
            nDocs = Math.Min(nDocs, limit);

            TopFieldCollector collector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, !weight.ScoresDocsOutOfOrder);

            Search(leaves, weight, collector);
            return((TopFieldDocs)collector.GetTopDocs());
        }
Esempio n. 6
0
        /// <summary>
        /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose
        /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should
        /// be set by specifying <paramref name="fillFields"/>.
        /// </summary>
        protected virtual TopFieldDocs Search(Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore)
        {
            if (sort == null)
            {
                throw new System.ArgumentNullException("Sort must not be null");
            }

            int limit = reader.MaxDoc;

            if (limit == 0)
            {
                limit = 1;
            }
            nDocs = Math.Min(nDocs, limit);

            if (executor == null)
            {
                // use all leaves here!
                return(Search(m_leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore));
            }
            else
            {
                TopFieldCollector topCollector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, false);

                ReentrantLock @lock = new ReentrantLock();
                ExecutionHelper <TopFieldDocs> runner = new ExecutionHelper <TopFieldDocs>(executor);
                for (int i = 0; i < m_leafSlices.Length; i++) // search each leaf slice
                {
                    runner.Submit(new SearcherCallableWithSort(@lock, this, m_leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore));
                }
                int   totalHits = 0;
                float maxScore  = float.NegativeInfinity;
                foreach (TopFieldDocs topFieldDocs in runner)
                {
                    if (topFieldDocs.TotalHits != 0)
                    {
                        totalHits += topFieldDocs.TotalHits;
                        maxScore   = Math.Max(maxScore, topFieldDocs.MaxScore);
                    }
                }

                TopFieldDocs topDocs = (TopFieldDocs)topCollector.GetTopDocs();

                return(new TopFieldDocs(totalHits, topDocs.ScoreDocs, topDocs.Fields, topDocs.MaxScore));
            }
        }
Esempio n. 7
0
        public void TestEarlyTerminationDifferentSorter()
        {
            // test that the collector works correctly when the index was sorted by a
            // different sorter than the one specified in the ctor.
            CreateRandomIndexes(5);
            int  numHits        = TestUtil.NextInt32(Random, 1, numDocs / 10);
            Sort sort           = new Sort(new SortField("ndv2", SortFieldType.INT64, false));
            bool fillFields     = Random.nextBoolean();
            bool trackDocScores = Random.nextBoolean();
            bool trackMaxScore  = Random.nextBoolean();
            bool inOrder        = Random.nextBoolean();
            // LUCENENET specific:
            // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to
            // fix a hard-to-find null reference exception problem.
            // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912
            //TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
            //TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

            IndexSearcher searcher = NewSearcher(reader);
            int           iters    = AtLeast(5);

            for (int i = 0; i < iters; ++i)
            {
                // LUCENENET specific:
                // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to
                // fix a hard-to-find null reference exception problem.
                // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912
                TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
                TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

                TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms)));
                searcher.Search(query, collector1);
                Sort different = new Sort(new SortField("ndv2", SortFieldType.INT64));
                searcher.Search(query, new EarlyTerminatingSortingCollectorHelper(collector2, different, numHits));


                assertTrue(collector1.TotalHits >= collector2.TotalHits);
                AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs);
            }
        }
Esempio n. 8
0
 /// <summary>
 /// Search, sorting by <see cref="Sort"/>, and computing
 /// drill down and sideways counts.
 /// </summary>
 public virtual DrillSidewaysResult Search(DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, bool doDocScores, bool doMaxScore)
 {
     if (filter != null)
     {
         query = new DrillDownQuery(m_config, filter, query);
     }
     if (sort != null)
     {
         int limit = m_searcher.IndexReader.MaxDoc;
         if (limit == 0)
         {
             limit = 1; // the collector does not alow numHits = 0
         }
         topN = Math.Min(topN, limit);
         TopFieldCollector   hitCollector = TopFieldCollector.Create(sort, topN, after, true, doDocScores, doMaxScore, true);
         DrillSidewaysResult r            = Search(query, hitCollector);
         return(new DrillSidewaysResult(r.Facets, hitCollector.GetTopDocs()));
     }
     else
     {
         return(Search(after, query, topN));
     }
 }
Esempio n. 9
0
        public void TestEarlyTermination_()
        {
            CreateRandomIndexes(5);
            int  numHits                 = TestUtil.NextInt32(Random, 1, numDocs / 10);
            Sort sort                    = new Sort(new SortField("ndv1", SortFieldType.INT64, false));
            bool fillFields              = Random.nextBoolean();
            bool trackDocScores          = Random.nextBoolean();
            bool trackMaxScore           = Random.nextBoolean();
            bool inOrder                 = Random.nextBoolean();
            TopFieldCollector collector1 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
            TopFieldCollector collector2 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

            IndexSearcher searcher = NewSearcher(reader);
            int           iters    = AtLeast(5);

            for (int i = 0; i < iters; ++i)
            {
                TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms)));
                searcher.Search(query, collector1);
                searcher.Search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
            }
            assertTrue(collector1.TotalHits >= collector2.TotalHits);
            AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs);
        }
Esempio n. 10
0
        /// <summary>
        /// Retrieve suggestions, specifying whether all terms
        ///  must match (<paramref name="allTermsRequired"/>) and whether the hits
        ///  should be highlighted (<paramref name="doHighlight"/>).
        /// </summary>
        public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight)
        {
            if (m_searcherMgr == null)
            {
                throw new InvalidOperationException("suggester was not built");
            }

            Occur occur;

            if (allTermsRequired)
            {
                occur = Occur.MUST;
            }
            else
            {
                occur = Occur.SHOULD;
            }

            TokenStream  ts = null;
            BooleanQuery query;
            var          matchedTokens = new HashSet <string>();
            string       prefixToken   = null;

            try
            {
                ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key));

                //long t0 = System.currentTimeMillis();
                ts.Reset();
                var    termAtt   = ts.AddAttribute <ICharTermAttribute>();
                var    offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                string lastToken = null;
                query = new BooleanQuery();
                int maxEndOffset = -1;
                matchedTokens = new HashSet <string>();
                while (ts.IncrementToken())
                {
                    if (lastToken != null)
                    {
                        matchedTokens.Add(lastToken);
                        query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
                    }
                    lastToken = termAtt.ToString();
                    if (lastToken != null)
                    {
                        maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);
                    }
                }
                ts.End();

                if (lastToken != null)
                {
                    Query lastQuery;
                    if (maxEndOffset == offsetAtt.EndOffset)
                    {
                        // Use PrefixQuery (or the ngram equivalent) when
                        // there was no trailing discarded chars in the
                        // string (e.g. whitespace), so that if query does
                        // not end with a space we show prefix matches for
                        // that token:
                        lastQuery   = GetLastTokenQuery(lastToken);
                        prefixToken = lastToken;
                    }
                    else
                    {
                        // Use TermQuery for an exact match if there were
                        // trailing discarded chars (e.g. whitespace), so
                        // that if query ends with a space we only show
                        // exact matches for that term:
                        matchedTokens.Add(lastToken);
                        lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
                    }
                    if (lastQuery != null)
                    {
                        query.Add(lastQuery, occur);
                    }
                }

                if (contexts != null)
                {
                    BooleanQuery sub = new BooleanQuery();
                    query.Add(sub, Occur.MUST);
                    foreach (BytesRef context in contexts)
                    {
                        // NOTE: we "should" wrap this in
                        // ConstantScoreQuery, or maybe send this as a
                        // Filter instead to search, but since all of
                        // these are MUST'd, the change to the score won't
                        // affect the overall ranking.  Since we indexed
                        // as DOCS_ONLY, the perf should be the same
                        // either way (no freq int[] blocks to decode):

                        // TODO: if we had a BinaryTermField we could fix
                        // this "must be valid ut8f" limitation:
                        sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD);
                    }
                }
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(ts);
            }

            // TODO: we could allow blended sort here, combining
            // weight w/ score.  Now we ignore score and sort only
            // by weight:

            Query finalQuery = FinishQuery(query, allTermsRequired);

            //System.out.println("finalQuery=" + query);

            // Sort by weight, descending:
            TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false);

            // We sorted postings by weight during indexing, so we
            // only retrieve the first num hits now:
            ICollector           c2       = new EarlyTerminatingSortingCollector(c, SORT, num);
            IndexSearcher        searcher = m_searcherMgr.Acquire();
            IList <LookupResult> results  = null;

            try
            {
                //System.out.println("got searcher=" + searcher);
                searcher.Search(finalQuery, c2);

                TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs();

                // Slower way if postings are not pre-sorted by weight:
                // hits = searcher.search(query, null, num, SORT);
                results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
            }
            finally
            {
                m_searcherMgr.Release(searcher);
            }

            //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
            //System.out.println(results);

            return(results);
        }
Esempio n. 11
0
        public override int DoLogic()
        {
            int res = 0;

            // open reader or use existing one
            IndexSearcher searcher = RunData.GetIndexSearcher();

            IndexReader reader;

            bool closeSearcher;

            if (searcher == null)
            {
                // open our own reader
                Directory dir = RunData.Directory;
                reader        = DirectoryReader.Open(dir);
                searcher      = new IndexSearcher(reader);
                closeSearcher = true;
            }
            else
            {
                // use existing one; this passes +1 ref to us
                reader        = searcher.IndexReader;
                closeSearcher = false;
            }

            // optionally warm and add num docs traversed to count
            if (WithWarm)
            {
                Document doc      = null;
                IBits    liveDocs = MultiFields.GetLiveDocs(reader);
                for (int m = 0; m < reader.MaxDoc; m++)
                {
                    if (null == liveDocs || liveDocs.Get(m))
                    {
                        doc  = reader.Document(m);
                        res += (doc == null ? 0 : 1);
                    }
                }
            }

            if (WithSearch)
            {
                res++;
                Query   q       = queryMaker.MakeQuery();
                Sort    sort    = Sort;
                TopDocs hits    = null;
                int     numHits = NumHits;
                if (numHits > 0)
                {
                    if (WithCollector == false)
                    {
                        if (sort != null)
                        {
                            // TODO: instead of always passing false we
                            // should detect based on the query; if we make
                            // the IndexSearcher search methods that take
                            // Weight public again, we can go back to
                            // pulling the Weight ourselves:
                            TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
                                                                                   true, WithScore,
                                                                                   WithMaxScore,
                                                                                   false);
                            searcher.Search(q, null, collector);
                            hits = collector.GetTopDocs();
                        }
                        else
                        {
                            hits = searcher.Search(q, numHits);
                        }
                    }
                    else
                    {
                        ICollector collector = CreateCollector();
                        searcher.Search(q, null, collector);
                        //hits = collector.topDocs();
                    }

                    string printHitsField = RunData.Config.Get("print.hits.field", null);
                    if (hits != null && printHitsField != null && printHitsField.Length > 0)
                    {
                        Console.WriteLine("totalHits = " + hits.TotalHits);
                        Console.WriteLine("maxDoc()  = " + reader.MaxDoc);
                        Console.WriteLine("numDocs() = " + reader.NumDocs);
                        for (int i = 0; i < hits.ScoreDocs.Length; i++)
                        {
                            int      docID = hits.ScoreDocs[i].Doc;
                            Document doc   = reader.Document(docID);
                            Console.WriteLine("  " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
                        }
                    }

                    if (WithTraverse)
                    {
                        ScoreDoc[] scoreDocs     = hits.ScoreDocs;
                        int        traversalSize = Math.Min(scoreDocs.Length, TraversalSize);

                        if (traversalSize > 0)
                        {
                            bool                 retrieve     = WithRetrieve;
                            int                  numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
                            Analyzer             analyzer     = RunData.Analyzer;
                            BenchmarkHighlighter highlighter  = null;
                            if (numHighlight > 0)
                            {
                                highlighter = GetBenchmarkHighlighter(q);
                            }
                            for (int m = 0; m < traversalSize; m++)
                            {
                                int id = scoreDocs[m].Doc;
                                res++;
                                if (retrieve)
                                {
                                    Document document = RetrieveDoc(reader, id);
                                    res += document != null ? 1 : 0;
                                    if (numHighlight > 0 && m < numHighlight)
                                    {
                                        ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document);
                                        foreach (string field in fieldsToHighlight)
                                        {
                                            string text = document.Get(field);
                                            res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            if (closeSearcher)
            {
                reader.Dispose();
            }
            else
            {
                // Release our +1 ref from above
                reader.DecRef();
            }
            return(res);
        }