Пример #1
        public void TestEarlyTerminationDifferentSorter()
            // test that the collector works correctly when the index was sorted by a
            // different sorter than the one specified in the ctor.
            int  numHits        = TestUtil.NextInt32(Random, 1, numDocs / 10);
            Sort sort           = new Sort(new SortField("ndv2", SortFieldType.INT64, false));
            bool fillFields     = Random.nextBoolean();
            bool trackDocScores = Random.nextBoolean();
            bool trackMaxScore  = Random.nextBoolean();
            bool inOrder        = Random.nextBoolean();
            // LUCENENET specific:
            // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to
            // fix a hard-to-find null reference exception problem.
            // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912
            //TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
            //TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

            IndexSearcher searcher = NewSearcher(reader);
            int           iters    = AtLeast(5);

            for (int i = 0; i < iters; ++i)
                // LUCENENET specific:
                // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to
                // fix a hard-to-find null reference exception problem.
                // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912
                TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
                TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

                TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms)));
                searcher.Search(query, collector1);
                Sort different = new Sort(new SortField("ndv2", SortFieldType.INT64));
                searcher.Search(query, new EarlyTerminatingSortingCollectorHelper(collector2, different, numHits));

                assertTrue(collector1.TotalHits >= collector2.TotalHits);
                AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs);
Пример #2
        public IEnumerable <ISearchHit> Search()
            var query = CreateQuery();

            IndexSearcher searcher;

            try {
                searcher = new IndexSearcher(_directory, true);
            catch {
                // index might not exist if it has been rebuilt
                Logger.Information("Attempt to read a none existing index");
                return(Enumerable.Empty <ISearchHit>());

            using (searcher) {
                var sort = String.IsNullOrEmpty(_sort)
                               ? Sort.RELEVANCE
                               : new Sort(new SortField(_sort, _comparer, _sortDescending));
                var collector = TopFieldCollector.Create(
                    _count + _skip,

                Logger.Debug("Searching: {0}", query.ToString());
                searcher.Search(query, collector);

                var results = collector.TopDocs().ScoreDocs
                              .Select(scoreDoc => new LuceneSearchHit(searcher.Doc(scoreDoc.Doc), scoreDoc.Score))

                Logger.Debug("Search results: {0}", results.Count);

Пример #3
        /// <summary>
        /// Realiza a pesquisa a recupera a quantidade de itens do resultado.
        /// </summary>
        /// <param name="query"></param>
        /// <param name="filter"></param>
        /// <returns></returns>
        private int SearchCount(Query query, Filter filter)
            var reader   = CreateReader();
            var searcher = new IndexSearcher(reader);

                if (query != null)
                    var weight    = query.Weight(searcher);
                    var collector = TopFieldCollector.create(new global::Lucene.Net.Search.Sort(), 1, true, false, false, !weight.ScoresDocsOutOfOrder());
                    searcher.Search(weight, filter, collector);
Пример #4
 /// <summary>
 /// Search, sorting by <see cref="Sort"/>, and computing
 /// drill down and sideways counts.
 /// </summary>
 public virtual DrillSidewaysResult Search(DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, bool doDocScores, bool doMaxScore)
     if (filter != null)
         query = new DrillDownQuery(m_config, filter, query);
     if (sort != null)
         int limit = m_searcher.IndexReader.MaxDoc;
         if (limit == 0)
             limit = 1; // the collector does not alow numHits = 0
         topN = Math.Min(topN, limit);
         TopFieldCollector   hitCollector = TopFieldCollector.Create(sort, topN, after, true, doDocScores, doMaxScore, true);
         DrillSidewaysResult r            = Search(query, hitCollector);
         return(new DrillSidewaysResult(r.Facets, hitCollector.GetTopDocs()));
         return(Search(after, query, topN));
Пример #5
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: private org.apache.lucene.search.TopDocs toTopDocs(org.apache.lucene.search.Query query, org.neo4j.index.lucene.QueryContext context, org.apache.lucene.search.IndexSearcher searcher) throws java.io.IOException
        private TopDocs ToTopDocs(Query query, QueryContext context, IndexSearcher searcher)
            Sort    sorting = context != null ? context.Sorting : null;
            TopDocs topDocs;

            if (sorting == null && context != null)
                topDocs = searcher.search(query, context.Top);
                if (context == null || !context.TradeCorrectnessForSpeed)
                    TopFieldCollector collector = LuceneDataSource.ScoringCollector(sorting, context.Top);
                    searcher.search(query, collector);
                    topDocs = collector.topDocs();
                    topDocs = searcher.search(query, null, context.Top, sorting);
Пример #6
        public void TestEarlyTermination_()
            int  numHits                 = TestUtil.NextInt32(Random, 1, numDocs / 10);
            Sort sort                    = new Sort(new SortField("ndv1", SortFieldType.INT64, false));
            bool fillFields              = Random.nextBoolean();
            bool trackDocScores          = Random.nextBoolean();
            bool trackMaxScore           = Random.nextBoolean();
            bool inOrder                 = Random.nextBoolean();
            TopFieldCollector collector1 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);
            TopFieldCollector collector2 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder);

            IndexSearcher searcher = NewSearcher(reader);
            int           iters    = AtLeast(5);

            for (int i = 0; i < iters; ++i)
                TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms)));
                searcher.Search(query, collector1);
                searcher.Search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
            assertTrue(collector1.TotalHits >= collector2.TotalHits);
            AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs);
Пример #7
        /// <summary>
        /// Returns the grouped results.  Returns null if the
        /// number of groups collected is &lt;= groupOffset.
        /// <para>
        /// <b>NOTE</b>: This collector is unable to compute
        /// the groupValue per group so it will always be null.
        /// This is normally not a problem, as you can obtain the
        /// value just like you obtain other values for each
        /// matching document (eg, via stored fields, via
        /// FieldCache, etc.)
        /// </para>
        /// </summary>
        /// <typeparam name="TGroupValue">The expected return type for group value</typeparam>
        /// <param name="withinGroupSort">
        /// The <see cref="Sort"/> used to sort
        /// documents within each group.  Passing null is
        /// allowed, to sort by relevance.
        /// </param>
        /// <param name="groupOffset">Which group to start from</param>
        /// <param name="withinGroupOffset">
        /// Which document to start from within each group
        /// </param>
        /// <param name="maxDocsPerGroup">
        /// How many top documents to keep within each group.
        /// </param>
        /// <param name="fillSortFields">
        /// If true then the Comparable values for the sort fields will be set
        /// </param>
        public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields)
            //if (queueFull) {
            //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
            if (subDocUpto != 0)
            if (groupOffset >= groupQueue.Count)
            int totalGroupedHitCount = 0;

            FakeScorer fakeScorer = new FakeScorer();

            float maxScore = float.Epsilon; // LUCENENET: Epsilon in .NET is the same as MIN_VALUE in Java

            GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Count - groupOffset];
            for (int downTo = groupQueue.Count - groupOffset - 1; downTo >= 0; downTo--)
                OneGroup og = groupQueue.Pop();

                // At this point we hold all docs w/ in each group,
                // unsorted; we now sort them:
                ITopDocsCollector collector;
                if (withinGroupSort == null)
                    // Sort by score
                    if (!needsScores)
                        throw new ArgumentException("cannot sort by relevance within group: needsScores=false");
                    collector = TopScoreDocCollector.Create(maxDocsPerGroup, true);
                    // Sort by fields
                    collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true);

                for (int docIDX = 0; docIDX < og.count; docIDX++)
                    int doc = og.docs[docIDX];
                    fakeScorer.doc = doc;
                    if (needsScores)
                        fakeScorer.score = og.scores[docIDX];
                totalGroupedHitCount += og.count;

                object[] groupSortValues;

                if (fillSortFields)
                    groupSortValues = new IComparable[comparers.Length];
                    for (int sortFieldIDX = 0; sortFieldIDX < comparers.Length; sortFieldIDX++)
                        groupSortValues[sortFieldIDX] = comparers[sortFieldIDX][og.comparerSlot];
                    groupSortValues = null;

                TopDocs topDocs = collector.GetTopDocs(withinGroupOffset, maxDocsPerGroup);

                // TODO: we could aggregate scores across children
                // by Sum/Avg instead of passing NaN:
                groups[downTo] = new GroupDocs <TGroupValue>(float.NaN,
Пример #8
        public override int DoLogic()
            int res = 0;

            // open reader or use existing one
            IndexSearcher searcher = RunData.GetIndexSearcher();

            IndexReader reader;

            bool closeSearcher;

            if (searcher == null)
                // open our own reader
                Directory dir = RunData.Directory;
                reader        = DirectoryReader.Open(dir);
                searcher      = new IndexSearcher(reader);
                closeSearcher = true;
                // use existing one; this passes +1 ref to us
                reader        = searcher.IndexReader;
                closeSearcher = false;

            // optionally warm and add num docs traversed to count
            if (WithWarm)
                Document doc      = null;
                IBits    liveDocs = MultiFields.GetLiveDocs(reader);
                for (int m = 0; m < reader.MaxDoc; m++)
                    if (null == liveDocs || liveDocs.Get(m))
                        doc  = reader.Document(m);
                        res += (doc == null ? 0 : 1);

            if (WithSearch)
                Query   q       = queryMaker.MakeQuery();
                Sort    sort    = Sort;
                TopDocs hits    = null;
                int     numHits = NumHits;
                if (numHits > 0)
                    if (WithCollector == false)
                        if (sort != null)
                            // TODO: instead of always passing false we
                            // should detect based on the query; if we make
                            // the IndexSearcher search methods that take
                            // Weight public again, we can go back to
                            // pulling the Weight ourselves:
                            TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
                                                                                   true, WithScore,
                            searcher.Search(q, null, collector);
                            hits = collector.GetTopDocs();
                            hits = searcher.Search(q, numHits);
                        ICollector collector = CreateCollector();
                        searcher.Search(q, null, collector);
                        //hits = collector.topDocs();

                    string printHitsField = RunData.Config.Get("print.hits.field", null);
                    if (hits != null && printHitsField != null && printHitsField.Length > 0)
                        Console.WriteLine("totalHits = " + hits.TotalHits);
                        Console.WriteLine("maxDoc()  = " + reader.MaxDoc);
                        Console.WriteLine("numDocs() = " + reader.NumDocs);
                        for (int i = 0; i < hits.ScoreDocs.Length; i++)
                            int      docID = hits.ScoreDocs[i].Doc;
                            Document doc   = reader.Document(docID);
                            Console.WriteLine("  " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));

                    if (WithTraverse)
                        ScoreDoc[] scoreDocs     = hits.ScoreDocs;
                        int        traversalSize = Math.Min(scoreDocs.Length, TraversalSize);

                        if (traversalSize > 0)
                            bool                 retrieve     = WithRetrieve;
                            int                  numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
                            Analyzer             analyzer     = RunData.Analyzer;
                            BenchmarkHighlighter highlighter  = null;
                            if (numHighlight > 0)
                                highlighter = GetBenchmarkHighlighter(q);
                            for (int m = 0; m < traversalSize; m++)
                                int id = scoreDocs[m].Doc;
                                if (retrieve)
                                    Document document = RetrieveDoc(reader, id);
                                    res += document != null ? 1 : 0;
                                    if (numHighlight > 0 && m < numHighlight)
                                        ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document);
                                        foreach (string field in fieldsToHighlight)
                                            string text = document.Get(field);
                                            res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);

            if (closeSearcher)
                // Release our +1 ref from above
Пример #9
 public SearcherCallableWithSort(ReentrantLock @lock, IndexSearcher searcher, LeafSlice slice, Weight weight, FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort, bool doDocScores, bool doMaxScore)
     this.@lock       = @lock;
     this.searcher    = searcher;
     this.weight      = weight;
     this.nDocs       = nDocs;
     this.hq          = hq;
     this.sort        = sort;
     this.slice       = slice;
     this.after       = after;
     this.doDocScores = doDocScores;
     this.doMaxScore  = doMaxScore;
Пример #10
        /// <summary>
        /// Retrieve suggestions, specifying whether all terms
        ///  must match (<paramref name="allTermsRequired"/>) and whether the hits
        ///  should be highlighted (<paramref name="doHighlight"/>).
        /// </summary>
        public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight)
            if (m_searcherMgr == null)
                throw new InvalidOperationException("suggester was not built");

            Occur occur;

            if (allTermsRequired)
                occur = Occur.MUST;
                occur = Occur.SHOULD;

            TokenStream  ts = null;
            BooleanQuery query;
            var          matchedTokens = new HashSet <string>();
            string       prefixToken   = null;

                ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key));

                //long t0 = System.currentTimeMillis();
                var    termAtt   = ts.AddAttribute <ICharTermAttribute>();
                var    offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                string lastToken = null;
                query = new BooleanQuery();
                int maxEndOffset = -1;
                matchedTokens = new HashSet <string>();
                while (ts.IncrementToken())
                    if (lastToken != null)
                        query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
                    lastToken = termAtt.ToString();
                    if (lastToken != null)
                        maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);

                if (lastToken != null)
                    Query lastQuery;
                    if (maxEndOffset == offsetAtt.EndOffset)
                        // Use PrefixQuery (or the ngram equivalent) when
                        // there was no trailing discarded chars in the
                        // string (e.g. whitespace), so that if query does
                        // not end with a space we show prefix matches for
                        // that token:
                        lastQuery   = GetLastTokenQuery(lastToken);
                        prefixToken = lastToken;
                        // Use TermQuery for an exact match if there were
                        // trailing discarded chars (e.g. whitespace), so
                        // that if query ends with a space we only show
                        // exact matches for that term:
                        lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
                    if (lastQuery != null)
                        query.Add(lastQuery, occur);

                if (contexts != null)
                    BooleanQuery sub = new BooleanQuery();
                    query.Add(sub, Occur.MUST);
                    foreach (BytesRef context in contexts)
                        // NOTE: we "should" wrap this in
                        // ConstantScoreQuery, or maybe send this as a
                        // Filter instead to search, but since all of
                        // these are MUST'd, the change to the score won't
                        // affect the overall ranking.  Since we indexed
                        // as DOCS_ONLY, the perf should be the same
                        // either way (no freq int[] blocks to decode):

                        // TODO: if we had a BinaryTermField we could fix
                        // this "must be valid ut8f" limitation:
                        sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD);

            // TODO: we could allow blended sort here, combining
            // weight w/ score.  Now we ignore score and sort only
            // by weight:

            Query finalQuery = FinishQuery(query, allTermsRequired);

            //System.out.println("finalQuery=" + query);

            // Sort by weight, descending:
            TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false);

            // We sorted postings by weight during indexing, so we
            // only retrieve the first num hits now:
            ICollector           c2       = new EarlyTerminatingSortingCollector(c, SORT, num);
            IndexSearcher        searcher = m_searcherMgr.Acquire();
            IList <LookupResult> results  = null;

                //System.out.println("got searcher=" + searcher);
                searcher.Search(finalQuery, c2);

                TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs();

                // Slower way if postings are not pre-sorted by weight:
                // hits = searcher.search(query, null, num, SORT);
                results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);

            //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");

Пример #11
        /// <summary>
        /// Returns the grouped results.  Returns null if the
        /// number of groups collected is &lt;= groupOffset.
        /// <para>
        /// <b>NOTE</b>: This collector is unable to compute
        /// the groupValue per group so it will always be null.
        /// This is normally not a problem, as you can obtain the
        /// value just like you obtain other values for each
        /// matching document (eg, via stored fields, via
        /// FieldCache, etc.)
        /// </para>
        /// </summary>
        /// <typeparam name="TGroupValue">The expected return type for group value</typeparam>
        /// <<param name="withinGroupSort">
        /// The <see cref="Sort"/> used to sort
        /// documents within each group.  Passing null is
        /// allowed, to sort by relevance.
        /// </param>
        /// <param name="groupOffset">Which group to start from</param>
        /// <param name="withinGroupOffset">
        /// Which document to start from within each group
        /// </param>
        /// <param name="maxDocsPerGroup">
        /// How many top documents to keep within each group.
        /// </param>
        /// <param name="fillSortFields">
        /// If true then the Comparable values for the sort fields will be set
        /// </param>
        public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields)
            //if (queueFull) {
            //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
            if (subDocUpto != 0)
            if (groupOffset >= groupQueue.Size())
            int totalGroupedHitCount = 0;

            FakeScorer fakeScorer = new FakeScorer();

            float maxScore = float.MinValue;

            GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Size() - groupOffset];
            for (int downTo = groupQueue.Size() - groupOffset - 1; downTo >= 0; downTo--)
                OneGroup og = groupQueue.Pop();

                // At this point we hold all docs w/ in each group,
                // unsorted; we now sort them:
                ITopDocsCollector collector;
                if (withinGroupSort == null)
                    // Sort by score
                    if (!needsScores)
                        throw new ArgumentException("cannot sort by relevance within group: needsScores=false");
                    collector = TopScoreDocCollector.Create(maxDocsPerGroup, true);
                    // Sort by fields
                    collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true);

                collector.Scorer     = fakeScorer;
                collector.NextReader = og.readerContext;
                for (int docIDX = 0; docIDX < og.count; docIDX++)
                    int doc = og.docs[docIDX];
                    fakeScorer.doc = doc;
                    if (needsScores)
                        fakeScorer.score = og.scores[docIDX];
                totalGroupedHitCount += og.count;

                object[] groupSortValues;

                if (fillSortFields)
                    groupSortValues = new IComparable[comparators.Length];
                    for (int sortFieldIDX = 0; sortFieldIDX < comparators.Length; sortFieldIDX++)
                        groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].Value(og.comparatorSlot);
                    groupSortValues = null;

                TopDocs topDocs = collector.TopDocs(withinGroupOffset, maxDocsPerGroup);

                // TODO: we could aggregate scores across children
                // by Sum/Avg instead of passing NaN:
                groups[downTo] = new GroupDocs <TGroupValue>(float.NaN,
                maxScore = Math.Max(maxScore, topDocs.MaxScore);

             * while (groupQueue.size() != 0) {
             * final OneGroup og = groupQueue.pop();
             * //System.out.println("  leftover: og ord=" + og.groupOrd + " count=" + og.count);
             * totalGroupedHitCount += og.count;
             * }

            return(new TopGroups <TGroupValue>(new TopGroups <TGroupValue>(groupSort.GetSort(),
                                                                           withinGroupSort == null ? null : withinGroupSort.GetSort(),
                                                                           totalHitCount, totalGroupedHitCount, groups, maxScore),
Пример #12
        /// <summary>
        /// 字段分组统计(支持分页)
        /// </summary>
        /// <param name="indexSearcher"></param>
        /// <param name="pageSize"></param>
        /// <param name="pageIndex"></param>
        /// <param name="query"></param>
        /// <param name="recordCount"></param>
        /// <param name="groupKeyValueList">分组结果</param>
        /// <param name="filter"></param>
        /// <param name="sortFields"></param>
        /// <returns></returns>
        public static Dictionary <Document, ScoreDoc> SelectGroup(IndexSearcher indexSearcher, int pageSize, int pageIndex, Query query, out int recordCount, out GroupKeyValueList groupKeyValueList, Filter filter = null, params SortField[] sortFields)
            recordCount       = 0;
            groupKeyValueList = null;
            Dictionary <Document, ScoreDoc> dictPager = new Dictionary <Document, ScoreDoc>();
            int maxDoc = indexSearcher.IndexReader.MaxDoc;

            if (maxDoc == 0)
            TopDocs docs     = null;
            string  key      = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString());
            string  listKey  = key + string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex, pageSize);
            string  groupKey = "GROUP:::" + key;

            docs = MemCache.MemoryCacheBus.Get(listKey) as TopDocs;
            groupKeyValueList = MemCache.MemoryCacheBus.Get(groupKey) as GroupKeyValueList;
            if (docs == null || groupKeyValueList == null)
                int start = pageIndex * pageSize;
                start = Math.Min(start, maxDoc);

                using (GroupCollectorField groupCollectorField = new GroupCollectorField("NameValue"))
                    if (sortFields.Length > 0)
                        Sort sort = new Sort();
                        TopFieldCollector     topFieldCollector     = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                        GroupCollectorWrapper groupCollectorWrapper = new GroupCollectorWrapper(start, topFieldCollector, groupCollectorField);
                        indexSearcher.Search(query, filter, groupCollectorWrapper);
                        start = start - pageSize;
                        if (start < 0)
                            start = 0;
                        docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录
                        TopScoreDocCollector  topScoreDocCollector  = TopScoreDocCollector.Create(start + 1, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                        GroupCollectorWrapper groupCollectorWrapper = new GroupCollectorWrapper(start, topScoreDocCollector, groupCollectorField);
                        indexSearcher.Search(query, filter, groupCollectorWrapper);
                        start = start - pageSize;
                        if (start < 0)
                            start = 0;
                        docs = topScoreDocCollector.TopDocs(start, pageSize); //只返回前start条记录
                    groupCollectorField.GroupKeyValueDocCountList.Sort();     //排序
                    groupKeyValueList = ObjectExtensions.Clone(groupCollectorField.GroupKeyValueDocCountList);
                    if (docs != null && groupKeyValueList != null)
                        TimeSpan timeSpan = TimeSpan.FromMinutes(CACHE_TIME);
                        MemCache.MemoryCacheBus.Insert(groupKey, groupKeyValueList, timeSpan);
                        MemCache.MemoryCacheBus.Insert(listKey, docs, timeSpan);
            #region 返回搜索的结果集合
            if (docs != null)
                recordCount = docs.TotalHits;          //搜索结果总数量
                ScoreDoc[] scoreDocs = docs.ScoreDocs; //搜索的结果集合
                if (scoreDocs != null)
                    foreach (ScoreDoc scoreDoc in scoreDocs)
                        if (scoreDoc.Doc != int.MaxValue && scoreDoc.Score != System.Single.NegativeInfinity)
                            dictPager.Add(indexSearcher.Doc(scoreDoc.Doc), scoreDoc);
            if (dictPager.Count == 0)
            if (groupKeyValueList.Count == 0)
            groupKeyValueList = groupKeyValueList ?? new GroupKeyValueList(0);
Пример #13
        public IEnumerable <IRow> Read()
            var reader   = _readerFactory.Create();
            var numDocs  = reader.NumDocs();
            var selector = new MapFieldSelector(_fields.Select(f => f.Name).ToArray());

            using (var searcher = _searcherFactory.Create()) {
                // read from input?  consider filters, and field names
                if (_readFrom == ReadFrom.Input)
                    if (_context.Entity.Filter.Any())
                        var queryFields = _context.Entity.Filter.Select(f => f.Field).ToArray();
                        var query       = string.Join(" ", _context.Entity.Filter.Select(f => "(" + (string.IsNullOrEmpty(f.Expression) ? f.Field + ":" + f.Value : f.Expression) + ") " + f.Continuation.ToUpper()));
                        query = query.Remove(query.Length - 3);
                        var topFieldCollector = TopFieldCollector.Create(Sort.INDEXORDER, numDocs, false, false, false, false);

                        searcher.Search(new MultiFieldQueryParser(V, queryFields, _analyzer).Parse(query), topFieldCollector);

                        var topDocs = topFieldCollector.TopDocs();

                        if (topDocs == null)
                            yield break;

                        for (var i = 0; i < topDocs.TotalHits; i++)
                            var row = _rowFactory.Create();
                            var doc = searcher.Doc(i, selector);
                            foreach (var field in _fields)
                                row[field] = field.Convert(doc.Get(field.Name));
                            yield return(row);
                        for (var i = 0; i < numDocs; i++)
                            if (reader.IsDeleted(i))
                            var doc = reader.Document(i, selector);
                            var row = _rowFactory.Create();
                            foreach (var field in _fields)
                                row[field] = field.Convert(doc.Get(field.Name));
                            yield return(row);
                else      // read from output? consider tfldeleted and field aliases

                    var tflDeleted = _context.Entity.TflDeleted();
                    var collector  = TopFieldCollector.Create(Sort.INDEXORDER, numDocs, false, false, false, false);
                    searcher.Search(LuceneConversion.TypeSearch(tflDeleted, tflDeleted.Alias, false), collector);

                    var topDocs = collector.TopDocs();

                    if (topDocs == null)
                        yield break;

                    for (var i = 0; i < topDocs.TotalHits; i++)
                        var row = _rowFactory.Create();
                        var doc = searcher.Doc(i, selector);
                        foreach (var field in _fields)
                            row[field] = field.Convert(doc.Get(field.Alias));
                        yield return(row);
Пример #14
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: static org.apache.lucene.search.TopFieldCollector scoringCollector(org.apache.lucene.search.Sort sorting, int n) throws java.io.IOException
        internal static TopFieldCollector ScoringCollector(Sort sorting, int n)
            return(TopFieldCollector.create(sorting, n, false, true, false));
Пример #15
        private void DoSearch(Query query, IEnumerable <SortField> sortField, int maxResults, int?skip = null, int?take = null)
            var extractTermsSupported = CheckQueryForExtractTerms(query);

            if (extractTermsSupported)
                //This try catch is because analyzers strip out stop words and sometimes leave the query
                //with null values. This simply tries to extract terms, if it fails with a null
                //reference then its an invalid null query, NotSupporteException occurs when the query is
                //valid but the type of query can't extract terms.
                //This IS a work-around, theoretically Lucene itself should check for null query parameters
                //before throwing exceptions.
                    var set = new HashSet <Term>();
                catch (NullReferenceException)
                    //this means that an analyzer has stipped out stop words and now there are
                    //no words left to search on

                    //it could also mean that potentially a IIndexFieldValueType is throwing a null ref
                    TotalItemCount = 0;
                catch (NotSupportedException)
                    //swallow this exception, we should continue if this occurs.

            maxResults = maxResults >= 1 ? Math.Min(maxResults, LuceneSearcher.MaxDoc) : LuceneSearcher.MaxDoc;

            Collector topDocsCollector;
            var       sortFields = sortField as SortField[] ?? sortField.ToArray();

            if (sortFields.Length > 0)
                topDocsCollector = TopFieldCollector.Create(
                    new Sort(sortFields), maxResults, false, false, false, false);
                topDocsCollector = TopScoreDocCollector.Create(maxResults, true);

            LuceneSearcher.Search(query, topDocsCollector);

            if (!skip.HasValue)
                TopDocs = sortFields.Length > 0
                    ? ((TopFieldCollector)topDocsCollector).TopDocs()
                    : ((TopScoreDocCollector)topDocsCollector).TopDocs();
                if (sortFields.Length > 0 && take != null && take.Value >= 0)
                    TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip.Value, take.Value);
                else if (sortFields.Length > 0 && (take == null || take.Value < 0))
                    TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip.Value);
                else if (take != null && take.Value >= 0)
                    TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip.Value, take.Value);
                    TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip.Value);

            TotalItemCount = TopDocs.TotalHits;

Пример #16
 public SnTopFieldCollector(int size, SearchParams searchParams, Sort sort)
     : base()
     _searchParams = searchParams;
     _wrapped      = TopFieldCollector.Create(sort, size, false, true, false, false);
Пример #17
        /// <summary>
        /// 多索引文件联合查询搜索(带分页)
        /// </summary>
        /// <typeparam name="T">返回结果数据类型</typeparam>
        /// <param name="indexPaths">索引文件路径集合</param>
        /// <param name="query">Query</param>
        /// <param name="sort">排序</param>
        /// <param name="pageIndex">当前搜索页</param>
        /// <param name="pageSize">每页显示数</param>
        /// <param name="count">总搜索结果数</param>
        /// <returns></returns>
        public static List <T> Search <T>(string[] indexPaths, Query query, Sort sort, int pageIndex, int pageSize, out int count) where T : BaseIndexModel
            count = 0;

            if (null == query)

            if (pageIndex < 1)
                pageIndex = 1;

            if (pageSize < 1)
                pageSize = 1;

            int start = (pageIndex - 1) * pageSize;

            if (null == indexPaths || indexPaths.Length < 1)

            List <IndexSearcher> searchers = new List <IndexSearcher>();

            foreach (var indexPath in indexPaths)
                if (string.IsNullOrWhiteSpace(indexPath))

                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());

                if (null == directory)

                bool isExist = IndexReader.IndexExists(directory);

                if (!isExist)

                IndexReader reader = IndexReader.Open(directory, true);

                IndexSearcher searcher = new IndexSearcher(reader);


            if (searchers.Count < 1)

            MultiSearcher multiSearcher = new MultiSearcher(searchers.ToArray());

            TopFieldCollector results = TopFieldCollector.Create(sort, start + pageSize, false, false, false, false);

            multiSearcher.Search(query, results);

            count = results.TotalHits;

            var docs = results.TopDocs(start, pageSize).ScoreDocs;

            List <T> list = new List <T>();

            foreach (var scoreDoc in docs)
                Document doc = multiSearcher.Doc(scoreDoc.Doc);

                var data = new IndexFactory(doc).Result as T;

                if (null != data)

Пример #18
        public IList <T> Retrieve <T>(string keyword, out int totalCount, int pageIndex = 1, int pageSize = 10) where T : class, new()
            using (IndexSearcher searcher = new IndexSearcher(directory, true))
                List <string>    queries    = new List <string>();
                List <string>    fields     = new List <string>();
                List <Occur>     flags      = new List <Occur>();
                List <SortField> sortFields = new List <SortField>();
                PropertyInfo[]   properties = typeof(T).GetProperties();

                foreach (var property in properties)
                    if (property.IsDefined(typeof(OccurAttribute), false))
                        OccurAttribute attribute = property.GetCustomAttribute(typeof(OccurAttribute)) as OccurAttribute;
                        Occur          occur     = attribute.Occur;
                        if (!occur.Equals(Occur.MUST_NOT))
                    if (property.IsDefined(typeof(SortAttribute), false))
                        SortAttribute attribute = property.GetCustomAttribute(typeof(SortAttribute)) as SortAttribute;
                        int           sortField = attribute.Type;
                        bool          reverse   = attribute.Reverse;
                        sortFields.Add(new SortField(property.Name, sortField, reverse));

                Query query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_30, queries?.ToArray(), fields?.ToArray(), flags?.ToArray(), analyzer);

                //Query queryR= new TermRangeQuery()

                TopDocs tds;
                int     startRowIndex = (pageIndex - 1) * pageSize;              //分页
                if (sortFields.Count > 0)
                    Sort sort = new Sort(sortFields?.ToArray());
                    TopFieldCollector collector = TopFieldCollector.Create(sort, pageIndex * pageSize, false, false, false, false);
                    searcher.Search(query, collector);                     //返回结果
                    tds = collector.TopDocs(startRowIndex, pageSize);
                    TopScoreDocCollector collector = TopScoreDocCollector.Create(pageIndex * pageSize, false);
                    searcher.Search(query, collector);
                    tds = collector.TopDocs(startRowIndex, pageSize);
                totalCount = tds.TotalHits;

                IList <T> list = new List <T>();
                foreach (ScoreDoc sd in tds.ScoreDocs)
                    Document doc          = searcher.Doc(sd.Doc);
                    T        searchResult = new T();
                    foreach (var property in properties)
                        string value = doc.Get(property.Name);
                        if (!string.IsNullOrEmpty(value))
                            Action <object, object> setValue = ReappearMember.CreatePropertySetter(property);
                            if (property.IsDefined(typeof(OccurAttribute), false))
                                setValue(searchResult, Preview(value, keyword));
                                setValue(searchResult, value);
Пример #19
        /// <summary>
        /// Executa a pesquisa
        /// </summary>
        /// <returns></returns>
        private Collector ExecuteSearch(int pageIndex)
            Collector        collector = null;
            TopDocsCollector result    = null;
            var totalHits = _options.RandomResult ? 1 : (pageIndex + 1) * (_options.HitsPerPage > 0 ? _options.HitsPerPage : 50);

            if (totalHits == 0)
                totalHits = 1;
            global::Lucene.Net.Search.Query query = _query;
            if (_sort != null && query == null)
                var parser = new global::Lucene.Net.QueryParsers.QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "test", new global::Lucene.Net.Analysis.Standard.StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29, Stopwords.PORTUGUESE_SET));
                query  = parser.Parse("test:1");
                result = TopFieldCollector.create(_sort, totalHits, true, false, false, true);
            if (query != null)
                var weight = query.Weight(Searcher);
                collector = result = TopFieldCollector.create(_sort, totalHits, true, false, false, !weight.ScoresDocsOutOfOrder());
                if (_options.RandomResult)
                    collector = new RandomCollectorWrapper(this, this.Searcher, result);
                else if (_channelFields == null || _channelFields.Length == 0)
                    collector = new SummaryCollectorWrapper(this, this.Searcher, result);
                this.Searcher.Search(weight, _filter, collector);
                if (collector is SummaryCollectorWrapper)
                    var sCollector = ((SummaryCollectorWrapper)collector);
                    _summaries     = sCollector.GetSummaries().ToList();
                    _channelFields = sCollector.GetChannelsFields().ToArray();
                if (collector is RandomCollectorWrapper)
                    var wrapper       = (RandomCollectorWrapper)collector;
                    var wrapperResult = wrapper.GetResult();
                    _result = wrapperResult is Element[] ? (Element[])wrapperResult : wrapperResult.ToArray();
                    _count  = _result.Length;
                _count = result.GetTotalHits();
                SummaryCollectorWrapper wrapper = null;
                if (_options.RandomResult)
                    wrapper = new RandomCollectorWrapper(this);
                    _result = ((RandomCollectorWrapper)wrapper).GetResult().ToArray();
                    collector      = wrapper = new SummaryCollectorWrapper(this);
                    _channelFields = wrapper.GetChannelsFields().ToArray();
                _count     = wrapper.GetTotalHits();
                _summaries = wrapper.GetSummaries().ToList();
        /// <summary>
        /// Accumulates groups for the BlockJoinQuery specified by its slot.
        /// </summary>
        /// <param name="slot"> Search query's slot </param>
        /// <param name="offset"> Parent docs offset </param>
        /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
        /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
        /// <param name="withinGroupSort"> Sort criteria within groups </param>
        /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
        /// <returns> <see cref="ITopGroups{T}"/> for the query specified by slot </returns>
        /// <exception cref="IOException"> if there is a low-level I/O error </exception>
        private ITopGroups <int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields)
            var groups     = new GroupDocs <int> [sortedGroups.Length - offset];
            var fakeScorer = new FakeScorer();

            int totalGroupedHitCount = 0;

            //System.out.println("slot=" + slot);

            for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++)
                OneGroup og = sortedGroups[groupIdx];
                int      numChildDocs;
                if (slot == -1 || slot >= og.counts.Length)
                    numChildDocs = 0;
                    numChildDocs = og.counts[slot];

                // Number of documents in group should be bounded to prevent redundant memory allocation
                int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup));
                //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);

                // At this point we hold all docs w/ in each group, unsorted; we now sort them:
                ICollector collector;
                if (withinGroupSort == null)
                    //System.out.println("sort by score");
                    // Sort by score
                    if (!trackScores)
                        throw new ArgumentException("cannot sort by relevance within group: trackScores=false");
                    collector = TopScoreDocCollector.Create(numDocsInGroup, true);
                    // Sort by fields
                    collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);

                for (int docIdx = 0; docIdx < numChildDocs; docIdx++)
                    //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
                    int doc = og.docs[slot][docIdx];
                    fakeScorer.doc = doc;
                    if (trackScores)
                        fakeScorer._score = og.scores[slot][docIdx];
                totalGroupedHitCount += numChildDocs;

                object[] groupSortValues;

                if (fillSortFields)
                    groupSortValues = new object[comparers.Length];
                    for (int sortFieldIdx = 0; sortFieldIdx < comparers.Length; sortFieldIdx++)
                        groupSortValues[sortFieldIdx] = comparers[sortFieldIdx][og.Slot];
                    groupSortValues = null;

                TopDocs topDocs;
                if (withinGroupSort == null)
                    var tempCollector = (TopScoreDocCollector)collector;
                    topDocs = tempCollector.GetTopDocs(withinGroupOffset, numDocsInGroup);
                    var tempCollector = (TopFieldCollector)collector;
                    topDocs = tempCollector.GetTopDocs(withinGroupOffset, numDocsInGroup);

                groups[groupIdx - offset] = new GroupDocs <int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues);

            return(new TopGroups <int>(new TopGroups <int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount));
Пример #21
        /// <summary>
        /// 分页查询(高效率)。wyp
        /// http://blog.csdn.net/smallearth/article/details/7980226
        /// </summary>
        /// <param name="indexSearcher"></param>
        /// <param name="pageSize"></param>
        /// <param name="pageIndex"></param>
        /// <param name="query"></param>
        /// <param name="recordCount"></param>
        /// <param name="filter"></param>
        /// <param name="sortFields"></param>
        /// <returns></returns>
        public static Dictionary <Document, ScoreDoc> SelectAfter(IndexSearcher indexSearcher, int pageSize, int pageIndex, Query query, out int recordCount, string culture = "", Filter filter = null, params SortField[] sortFields)
            recordCount = 0;
            Dictionary <Document, ScoreDoc> dictPager = new Dictionary <Document, ScoreDoc>(pageSize);
            int maxDoc = indexSearcher.IndexReader.MaxDoc;

            if (maxDoc == 0)
            TopDocs docs = null;
            string  key  = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString(), culture.ToUpper());

            #region 先取出某(PageIndex)页文档结果中的最后一个文档,然后在从这个文档开始继续往下取出PageSize大小的文档记录
            ScoreDoc lastScoreDoc = GetLastScoreDoc(indexSearcher, pageSize, pageIndex, query, filter, culture, sortFields);
            if (lastScoreDoc == null)
                key += string.Format(",PAGE_SIZE:{0}", pageSize);
                docs = MemCache.MemoryCacheBus.Get(key) as TopDocs;
                if (docs == null)
                    if (sortFields.Length > 0)
                        Sort sort = new Sort();
                        docs = indexSearcher.Search(query, filter, pageSize, sort);//只返回前pageSize条记录
                    {                                                         //不支持排序
                        docs = indexSearcher.Search(query, filter, pageSize); //只返回前pageSize条记录
                    if (docs != null)
                        MemCache.MemoryCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME));
                if (lastScoreDoc.Doc < maxDoc)
                    key += string.Format(",DOC:{0},PAGE_INDEX:{1},PAGE_SIZE:{2}", lastScoreDoc.Doc, pageIndex, pageSize);
                    docs = MemCache.MemoryCacheBus.Get(key) as TopDocs;
                    if (docs == null)
                        if (sortFields.Length > 0)
                            int start = pageIndex * pageSize;
                            start = Math.Min(start, maxDoc);
                            Sort sort = new Sort();
                            TopFieldCollector topFieldCollector = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                            indexSearcher.Search(query, filter, topFieldCollector);
                            start = start - pageSize;
                            if (start < 0)
                                start = 0;
                            docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录
                            //http://search-lucene.com/c/Lucene:core/src/java/org/apache/lucene/search/IndexSearcher.java||IndexSearcher 482行
                            TopScoreDocCollectorEx topScoreDocCollectorEx = TopScoreDocCollectorEx.Create(pageSize, lastScoreDoc, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                            indexSearcher.Search(query, filter, topScoreDocCollectorEx);
                            docs = topScoreDocCollectorEx.TopDocs();
                        if (docs != null)
                            MemCache.MemoryCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME));
            #region 先取出前(PageIndex+1)页的文档数,然后在从文档结果中取出最后一页的文档记录。
            key += string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex, pageSize);
            docs = WebCache.DataCacheBus.Get(key) as TopDocs;
            if (docs == null)
                int start = pageIndex * pageSize;
                start = Math.Min(start, maxDoc);
                if (sortFields.Length > 0)
                    Sort sort = new Sort();
                    TopFieldCollector topFieldCollector = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                    indexSearcher.Search(query, filter, topFieldCollector);
                    start = start - pageSize;
                    if (start < 0)
                        start = 0;
                    docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(start + 1, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder());
                    indexSearcher.Search(query, filter, topScoreDocCollector);
                    start = start - pageSize;
                    if (start < 0)
                        start = 0;
                    docs = topScoreDocCollector.TopDocs(start, pageSize);//只返回前start条记录
                if (docs != null)
                    WebCache.DataCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME));
            #region 返回搜索的结果集合
            if (docs != null)
                recordCount = docs.TotalHits;          //搜索结果总数量
                ScoreDoc[] scoreDocs = docs.ScoreDocs; //搜索的结果集合
                if (scoreDocs != null)
                    foreach (ScoreDoc scoreDoc in scoreDocs)
                        if (scoreDoc.Doc != int.MaxValue && scoreDoc.Score != System.Single.NegativeInfinity)
                            lastScoreDoc = scoreDoc;//获取搜索结果中当前页中最后一个ScoreDoc对象
                            dictPager.Add(indexSearcher.Doc(scoreDoc.Doc), scoreDoc);
            if (dictPager.Count == 0)
            else if (lastScoreDoc != null)
            {                                                                                                                                                                                                                                                                   //提前设置好当用户搜索下一页时,需要用到当前这一页的最后一个ScoreDoc对象用于下次搜索使用。wyp
                key = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString(), culture.ToUpper()) + string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex + 1, pageSize); //提前把下一页用到的LastScoreDoc放入缓存中。wyp
                MemCache.MemoryCacheBus.Insert(key, lastScoreDoc, TimeSpan.FromMinutes(CACHE_TIME));
