private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits)
        {
            var set = new FixedBitSet(numBits);
            int doc;

            while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                set.Set(doc);
            }
            return(set);
        }
Ejemplo n.º 2
0
        /// <summary> Create a SortedVIntList.</summary>
        /// <param name="docIdSetIterator"> An iterator providing document numbers as a set of integers.
        /// This DocIdSetIterator is iterated completely when this constructor
        /// is called and it must provide the integers in non
        /// decreasing order.
        /// </param>
        public SortedVIntList(DocIdSetIterator docIdSetIterator)
        {
            SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
            int doc;

            while ((doc = docIdSetIterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                builder.AddInt(doc);
            }
            builder.Done();
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Encode the document ids from a DocIdSetIterator. </summary>
 /// <param name="disi"> This DocIdSetIterator should provide document ids that are consistent
 ///              with <c>numValues</c> and <c>upperBound</c> as provided to the constructor.   </param>
 public virtual void EncodeFromDisi(DocIdSetIterator disi)
 {
     while (efEncoder.numEncoded < efEncoder.numValues)
     {
         int x = disi.NextDoc();
         if (x == DocIdSetIterator.NO_MORE_DOCS)
         {
             throw new ArgumentException("disi: " + disi.ToString() + "\nhas " + efEncoder.numEncoded + " docs, but at least " + efEncoder.numValues + " are required.");
         }
         efEncoder.EncodeNext(x);
     }
 }
Ejemplo n.º 4
0
        internal virtual void DoIterate2(BitSet a, FixedBitSet b)
        {
            int aa = -1, bb = -1;
            DocIdSetIterator iterator = b.GetIterator();

            do
            {
                aa = a.NextSetBit(aa + 1);
                bb = Random.NextBoolean() ? iterator.NextDoc() : iterator.Advance(bb + 1);
                Assert.AreEqual(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb);
            } while (aa >= 0);
        }
Ejemplo n.º 5
0
            public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter)
                : base(context.AtomicReader)
            {
                int         maxDoc = m_input.MaxDoc;
                FixedBitSet bits   = new FixedBitSet(maxDoc);
                // ignore livedocs here, as we filter them later:
                DocIdSet docs = preserveFilter.GetDocIdSet(context, null);

                if (docs != null)
                {
                    DocIdSetIterator it = docs.GetIterator();
                    if (it != null)
                    {
                        bits.Or(it);
                    }
                }
                if (negateFilter)
                {
                    bits.Flip(0, maxDoc);
                }

                if (m_input.HasDeletions)
                {
                    IBits oldLiveDocs = m_input.LiveDocs;
                    Debug.Assert(oldLiveDocs != null);
                    DocIdSetIterator it = bits.GetIterator();
                    for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc())
                    {
                        if (!oldLiveDocs.Get(i))
                        {
                            // we can safely modify the current bit, as the iterator already stepped over it:
                            bits.Clear(i);
                        }
                    }
                }

                this.liveDocs = bits;
                this.numDocs  = bits.Cardinality();
            }
Ejemplo n.º 6
0
        public void TestMissingTermAndField()
        {
            string            fieldName = @"field1";
            Directory         rd        = NewDirectory();
            RandomIndexWriter w         = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, rd);
            Document doc = new Document();

            doc.Add(NewStringField(fieldName, @"value1", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader reader = SlowCompositeReaderWrapper.Wrap(w.GetReader());

            assertTrue(reader.Context is AtomicReaderContext);
            var context = (AtomicReaderContext)reader.Context;

            w.Dispose();

            DocIdSet idSet = TermFilter(fieldName, @"value1").GetDocIdSet(context, context.AtomicReader.LiveDocs);

            assertNotNull(@"must not be null", idSet);
            DocIdSetIterator iter = idSet.GetIterator();

            assertEquals(iter.NextDoc(), 0);
            assertEquals(iter.NextDoc(), DocIdSetIterator.NO_MORE_DOCS);

            idSet = TermFilter(fieldName, @"value2").GetDocIdSet(context, context.AtomicReader.LiveDocs);
            assertNull(@"must be null", idSet);

            idSet = TermFilter(@"field2", @"value1").GetDocIdSet(context, context.AtomicReader.LiveDocs);
            assertNull(@"must be null", idSet);

            reader.Dispose();
            rd.Dispose();
        }
Ejemplo n.º 7
0
            private void Initialize()
            {
                it1 = parent.innerSet.Iterator();

                try
                {
                    if ((innerDocid = it1.NextDoc()) == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        it1 = null;
                    }
                }
                catch
                {
                }
            }
Ejemplo n.º 8
0
        private void TstFilterCard(string mes, int expected, Filter filt)
        {
            DocIdSet docIdSet = filt.GetDocIdSet(reader.AtomicContext, reader.LiveDocs);
            int      actual   = 0;

            if (docIdSet != null)
            {
                DocIdSetIterator disi = docIdSet.GetIterator();
                while (disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    actual++;
                }
            }

            assertEquals(mes, expected, actual);
        }
Ejemplo n.º 9
0
            private void Initialize()
            {
                it1 = innerSet.GetIterator();

                try
                {
                    if ((innerDocid = it1.NextDoc()) == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        it1 = null;
                    }
                }
                catch (Exception)
                {
                    //e.printStackTrace();
                }
            }
Ejemplo n.º 10
0
        private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            // LUCENENET specific - performance is significantly better if we instantiate
            // this outside of the outer loop.
            BytesRef bytesRef = new BytesRef();

            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(m_indexFieldName);
                if (dv is null) // this reader does not have DocValues for the requested category list
                {
                    continue;
                }

                DocIdSetIterator docs = hits.Bits.GetIterator();

                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    dv.Get(doc, bytesRef);
                    var bytes  = bytesRef.Bytes;
                    int end    = bytesRef.Offset + bytesRef.Length;
                    int ord    = 0;
                    int offset = bytesRef.Offset;
                    int prev   = 0;
                    while (offset < end)
                    {
                        byte b = bytes[offset++];
                        if (b <= sbyte.MaxValue) // LUCENENET: Optimized equivalent of "if ((sbyte)b >= 0)"
                        {
                            prev = ord = ((ord << 7) | b) + prev;
                            ++m_values[ord];
                            ord = 0;
                        }
                        else
                        {
                            ord = (ord << 7) | (b & 0x7F);
                        }
                    }
                }
            }

            Rollup();
        }
Ejemplo n.º 11
0
            public override int NextDoc()
            {
                if (lastReturn == DocIdSetIterator.NO_MORE_DOCS)
                {
                    return(DocIdSetIterator.NO_MORE_DOCS);
                }

                DocIdSetIterator dcit = iterators[0];
                int target            = dcit.NextDoc();
                int size = iterators.Length;
                int skip = 0;
                int i    = 1;

                while (i < size)
                {
                    if (i != skip)
                    {
                        dcit = iterators[i];
                        int docid = dcit.Advance(target);

                        if (docid > target)
                        {
                            target = docid;
                            if (i != 0)
                            {
                                skip = i;
                                i    = 0;
                                continue;
                            }
                            else
                            {
                                skip = 0;
                            }
                        }
                    }
                    i++;
                }
                //      if(target != DocIdSetIterator.NO_MORE_DOCS)
                //        _interSectionResult.Add(target);
                return(lastReturn = target);
            }
Ejemplo n.º 12
0
        // Delete by query
        private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            long delCount = 0;
            AtomicReaderContext readerContext = reader.AtomicContext;
            bool any = false;

            foreach (QueryAndLimit ent in queriesIter)
            {
                Query    query = ent.Query;
                int      limit = ent.Limit;
                DocIdSet docs  = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs);
                if (docs != null)
                {
                    DocIdSetIterator it = docs.GetIterator();
                    if (it != null)
                    {
                        while (true)
                        {
                            int doc = it.NextDoc();
                            if (doc >= limit)
                            {
                                break;
                            }

                            if (!any)
                            {
                                rld.InitWritableLiveDocs();
                                any = true;
                            }

                            if (rld.Delete(doc))
                            {
                                delCount++;
                            }
                        }
                    }
                }
            }

            return(delCount);
        }
Ejemplo n.º 13
0
        private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(indexFieldName);
                if (dv == null) // this reader does not have DocValues for the requested category list
                {
                    continue;
                }

                DocIdSetIterator docs = hits.Bits.GetIterator();

                int      doc;
                BytesRef bytesRef = new BytesRef();
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    dv.Get(doc, bytesRef);
                    var bytes  = bytesRef.Bytes;
                    int end    = bytesRef.Offset + bytesRef.Length;
                    int ord    = 0;
                    int offset = bytesRef.Offset;
                    int prev   = 0;
                    while (offset < end)
                    {
                        byte b = bytes[offset++];
                        if ((sbyte)b >= 0)
                        {
                            prev = ord = ((ord << 7) | b) + prev;
                            ++values[ord];
                            ord = 0;
                        }
                        else
                        {
                            ord = (ord << 7) | (b & 0x7F);
                        }
                    }
                }
            }

            Rollup();
        }
Ejemplo n.º 14
0
 public override int Size()
 {
     // Do the size if we haven't done it so far.
     if (size < 0)
     {
         DocIdSetIterator dcit = Iterator();
         size = 0;
         try
         {
             while (dcit.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
             {
                 size++;
             }
         }
         catch
         {
             return(-1);
         }
     }
     return(size);
 }
Ejemplo n.º 15
0
        public override int Size()
        {
            if (_size == INVALID)
            {
                _size = 0;
                DocIdSetIterator it = this.Iterator();

                try
                {
                    while (it.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        _size++;
                    }
                }
                catch
                {
                    _size = INVALID;
                }
            }
            return(_size);
        }
Ejemplo n.º 16
0
        private void SumValues(IList <MatchingDocs> matchingDocs, bool keepScores, ValueSource valueSource)
        {
            FakeScorer  scorer  = new FakeScorer();
            IDictionary context = new Dictionary <string, Scorer>();

            if (keepScores)
            {
                context["scorer"] = scorer;
            }
            Int32sRef scratch = new Int32sRef();

            foreach (MatchingDocs hits in matchingDocs)
            {
                OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context);

                int     scoresIdx = 0;
                float[] scores    = hits.Scores;

                FunctionValues   functionValues = valueSource.GetValues(context, hits.Context);
                DocIdSetIterator docs           = hits.Bits.GetIterator();

                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    ords.Get(doc, scratch);
                    if (keepScores)
                    {
                        scorer.docID = doc;
                        scorer.score = scores[scoresIdx++];
                    }
                    float value = (float)functionValues.DoubleVal(doc);
                    for (int i = 0; i < scratch.Length; i++)
                    {
                        m_values[scratch.Int32s[i]] += value;
                    }
                }
            }

            Rollup();
        }
Ejemplo n.º 17
0
        private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            Int32sRef scratch = new Int32sRef();

            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context);
                DocIdSetIterator docs = hits.Bits.GetIterator();

                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    ords.Get(doc, scratch);
                    for (int i = 0; i < scratch.Length; i++)
                    {
                        m_values[scratch.Int32s[scratch.Offset + i]]++;
                    }
                }
            }

            Rollup();
        }
Ejemplo n.º 18
0
        public static string AsString(this DocIdSet docIdSet)
        {
            DocIdSetIterator iter      = docIdSet.Iterator();
            StringBuilder    buf       = new StringBuilder();
            bool             firstTime = true;

            buf.Append("[");
            while (iter.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                if (firstTime)
                {
                    firstTime = false;
                }
                else
                {
                    buf.Append(",");
                }
                buf.Append(iter.DocID());
            }
            buf.Append("]");
            return(buf.ToString());
        }
Ejemplo n.º 19
0
        private void SumValues(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(m_indexFieldName);
                if (dv == null) // this reader does not have DocValues for the requested category list
                {
                    continue;
                }

                BytesRef         scratch = new BytesRef();
                DocIdSetIterator docs    = hits.Bits.GetIterator();

                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    //System.out.println("  doc=" + doc);
                    // TODO: use OrdinalsReader?  we'd need to add a
                    // BytesRef getAssociation()?
                    dv.Get(doc, scratch);
                    byte[] bytes  = scratch.Bytes;
                    int    end    = scratch.Offset + scratch.Length;
                    int    offset = scratch.Offset;
                    while (offset < end)
                    {
                        int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) |
                                  ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
                        offset += 4;
                        int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) |
                                    ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
                        offset        += 4;
                        m_values[ord] += J2N.BitConversion.Int32BitsToSingle(value);
                    }
                }
            }
        }
Ejemplo n.º 20
0
        public virtual void Search(Weight weight, Filter filter, Collector collector, int start, IBoboMapFunctionWrapper mapReduceWrapper)
        {
            FacetValidator validator = CreateFacetValidator();
            int            target    = 0;

            if (filter == null)
            {
                for (int i = 0; i < _subReaders.Length; i++)
                { // search each subreader
                    int docStart = start + _docStarts[i];
                    collector.SetNextReader(_subReaders[i], docStart);
                    validator.SetNextReader(_subReaders[i], docStart);


                    Scorer scorer = weight.Scorer(_subReaders[i], true, true);
                    if (scorer != null)
                    {
                        collector.SetScorer(scorer);
                        target = scorer.NextDoc();
                        while (target != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            if (validator.Validate(target))
                            {
                                collector.Collect(target);
                                target = scorer.NextDoc();
                            }
                            else
                            {
                                target = validator._nextTarget;
                                target = scorer.Advance(target);
                            }
                        }
                    }
                    if (mapReduceWrapper != null)
                    {
                        mapReduceWrapper.MapFullIndexReader(_subReaders[i], validator.GetCountCollectors());
                    }
                }
                return;
            }

            for (int i = 0; i < _subReaders.Length; i++)
            {
                DocIdSet filterDocIdSet = filter.GetDocIdSet(_subReaders[i]);
                if (filterDocIdSet == null)
                {
                    return;                          //shall we use return or continue here ??
                }
                int docStart = start + _docStarts[i];
                collector.SetNextReader(_subReaders[i], docStart);
                validator.SetNextReader(_subReaders[i], docStart);
                Scorer scorer = weight.Scorer(_subReaders[i], true, false);
                if (scorer != null)
                {
                    collector.SetScorer(scorer);
                    DocIdSetIterator filterDocIdIterator = filterDocIdSet.Iterator(); // CHECKME: use ConjunctionScorer here?

                    if (filterDocIdIterator == null)
                    {
                        continue;
                    }

                    int doc = -1;
                    target = filterDocIdIterator.NextDoc();
                    if (mapReduceWrapper == null)
                    {
                        while (target < DocIdSetIterator.NO_MORE_DOCS)
                        {
                            if (doc < target)
                            {
                                doc = scorer.Advance(target);
                            }

                            if (doc == target) // permitted by filter
                            {
                                if (validator.Validate(doc))
                                {
                                    collector.Collect(doc);

                                    target = filterDocIdIterator.NextDoc();
                                }
                                else
                                {
                                    // skip to the next possible docid
                                    target = filterDocIdIterator.Advance(validator._nextTarget);
                                }
                            }
                            else // doc > target
                            {
                                if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                target = filterDocIdIterator.Advance(doc);
                            }
                        }
                    }
                    else
                    {
                        //MapReduce wrapper is not null
                        while (target < DocIdSetIterator.NO_MORE_DOCS)
                        {
                            if (doc < target)
                            {
                                doc = scorer.Advance(target);
                            }

                            if (doc == target) // permitted by filter
                            {
                                if (validator.Validate(doc))
                                {
                                    mapReduceWrapper.MapSingleDocument(doc, _subReaders[i]);
                                    collector.Collect(doc);

                                    target = filterDocIdIterator.NextDoc();
                                }
                                else
                                {
                                    // skip to the next possible docid
                                    target = filterDocIdIterator.Advance(validator._nextTarget);
                                }
                            }
                            else // doc > target
                            {
                                if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                target = filterDocIdIterator.Advance(doc);
                            }
                        }
                        mapReduceWrapper.FinalizeSegment(_subReaders[i], validator.GetCountCollectors());
                    }
                }
            }
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Used when drill downs are highly constraining vs
        /// baseQuery.
        /// </summary>
        private void DoDrillDownAdvanceScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors)
        {
            int maxDoc  = context.Reader.MaxDoc;
            int numDims = dims.Length;

            //if (DEBUG) {
            //  System.out.println("  doDrillDownAdvanceScoring");
            //}

            // TODO: maybe a class like BS, instead of parallel arrays
            int[]   filledSlots = new int[CHUNK];
            int[]   docIDs      = new int[CHUNK];
            float[] scores      = new float[CHUNK];
            int[]   missingDims = new int[CHUNK];
            int[]   counts      = new int[CHUNK];

            docIDs[0] = -1;
            int nextChunkStart = CHUNK;

            FixedBitSet seen = new FixedBitSet(CHUNK);

            while (true)
            {
                //if (DEBUG) {
                //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
                //}

                // First dim:
                //if (DEBUG) {
                //  System.out.println("  dim0");
                //}
                DocIdSetIterator disi = disis[0];
                if (disi != null)
                {
                    int docID = disi.DocID;
                    while (docID < nextChunkStart)
                    {
                        int slot = docID & MASK;

                        if (docIDs[slot] != docID)
                        {
                            seen.Set(slot);
                            // Mark slot as valid:
                            //if (DEBUG) {
                            //  System.out.println("    set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
                            //}
                            docIDs[slot]      = docID;
                            missingDims[slot] = 1;
                            counts[slot]      = 1;
                        }

                        docID = disi.NextDoc();
                    }
                }

                // Second dim:
                //if (DEBUG) {
                //  System.out.println("  dim1");
                //}
                disi = disis[1];
                if (disi != null)
                {
                    int docID = disi.DocID;
                    while (docID < nextChunkStart)
                    {
                        int slot = docID & MASK;

                        if (docIDs[slot] != docID)
                        {
                            // Mark slot as valid:
                            seen.Set(slot);
                            //if (DEBUG) {
                            //  System.out.println("    set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
                            //}
                            docIDs[slot]      = docID;
                            missingDims[slot] = 0;
                            counts[slot]      = 1;
                        }
                        else
                        {
                            // TODO: single-valued dims will always be true
                            // below; we could somehow specialize
                            if (missingDims[slot] >= 1)
                            {
                                missingDims[slot] = 2;
                                counts[slot]      = 2;
                                //if (DEBUG) {
                                //  System.out.println("    set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
                                //}
                            }
                            else
                            {
                                counts[slot] = 1;
                                //if (DEBUG) {
                                //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
                                //}
                            }
                        }

                        docID = disi.NextDoc();
                    }
                }

                // After this we can "upgrade" to conjunction, because
                // any doc not seen by either dim 0 or dim 1 cannot be
                // a hit or a near miss:

                //if (DEBUG) {
                //  System.out.println("  baseScorer");
                //}

                // Fold in baseScorer, using advance:
                int filledCount = 0;
                int slot0       = 0;
                while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1)
                {
                    int ddDocID = docIDs[slot0];
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ddDocID != -1);
                    }

                    int baseDocID = baseScorer.DocID;
                    if (baseDocID < ddDocID)
                    {
                        baseDocID = baseScorer.Advance(ddDocID);
                    }
                    if (baseDocID == ddDocID)
                    {
                        //if (DEBUG) {
                        //  System.out.println("    keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
                        //}
                        scores[slot0] = baseScorer.GetScore();
                        filledSlots[filledCount++] = slot0;
                        counts[slot0]++;
                    }
                    else
                    {
                        //if (DEBUG) {
                        //  System.out.println("    no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
                        //}
                        docIDs[slot0] = -1;

                        // TODO: we could jump slot0 forward to the
                        // baseDocID ... but we'd need to set docIDs for
                        // intervening slots to -1
                    }
                    slot0++;
                }
                seen.Clear(0, CHUNK);

                if (filledCount == 0)
                {
                    if (nextChunkStart >= maxDoc)
                    {
                        break;
                    }
                    nextChunkStart += CHUNK;
                    continue;
                }

                // TODO: factor this out & share w/ union scorer,
                // except we start from dim=2 instead:
                for (int dim = 2; dim < numDims; dim++)
                {
                    //if (DEBUG) {
                    //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
                    //}
                    disi = disis[dim];
                    if (disi != null)
                    {
                        int docID = disi.DocID;
                        while (docID < nextChunkStart)
                        {
                            int slot = docID & MASK;
                            if (docIDs[slot] == docID && counts[slot] >= dim)
                            {
                                // TODO: single-valued dims will always be true
                                // below; we could somehow specialize
                                if (missingDims[slot] >= dim)
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
                                    //}
                                    missingDims[slot] = dim + 1;
                                    counts[slot]      = dim + 2;
                                }
                                else
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
                                    //}
                                    counts[slot] = dim + 1;
                                }
                            }

                            // TODO: sometimes use advance?
                            docID = disi.NextDoc();
                        }
                    }
                }

                // Collect:
                //if (DEBUG) {
                //  System.out.println("  now collect: " + filledCount + " hits");
                //}
                for (int i = 0; i < filledCount; i++)
                {
                    int slot = filledSlots[i];
                    collectDocID = docIDs[slot];
                    collectScore = scores[slot];
                    //if (DEBUG) {
                    //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
                    //}
                    if (counts[slot] == 1 + numDims)
                    {
                        CollectHit(collector, sidewaysCollectors);
                    }
                    else if (counts[slot] == numDims)
                    {
                        CollectNearMiss(sidewaysCollectors[missingDims[slot]]);
                    }
                }

                if (nextChunkStart >= maxDoc)
                {
                    break;
                }

                nextChunkStart += CHUNK;
            }
        }
Ejemplo n.º 22
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (Verbose)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (Verbose)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousClass2(scoreDocsInOrder, actualResult,
                                                                      topScoreDocCollector));
                    // Asserting bit set...
                    if (Verbose)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality);
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality);
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc));
                            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score));
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Ejemplo n.º 23
0
 /// <summary>
 /// Does in-place AND NOT of the bits provided by the
 ///  iterator.
 /// </summary>
 public void AndNot(DocIdSetIterator iter)
 {
     if (iter is OpenBitSetIterator && iter.DocID() == -1)
     {
         OpenBitSetIterator obs = (OpenBitSetIterator)iter;
         AndNot(obs.Arr, obs.Words);
         // advance after last doc that would be accepted if standard
         // iteration is used (to exhaust it):
         obs.Advance(NumBits);
     }
     else if (iter is FixedBitSetIterator && iter.DocID() == -1)
     {
         FixedBitSetIterator fbs = (FixedBitSetIterator)iter;
         AndNot(fbs.bits, fbs.NumWords);
         // advance after last doc that would be accepted if standard
         // iteration is used (to exhaust it):
         fbs.Advance(NumBits);
     }
     else
     {
         int doc;
         while ((doc = iter.NextDoc()) < NumBits)
         {
             Clear(doc);
         }
     }
 }
Ejemplo n.º 24
0
 /// <summary>
 /// Does in-place XOR of the bits provided by the iterator. </summary>
 public void Xor(DocIdSetIterator iter)
 {
     int doc;
     while ((doc = iter.NextDoc()) < NumBits)
     {
         Flip(doc, doc + 1);
     }
 }
Ejemplo n.º 25
0
        private void Count(ValueSource valueSource, IList <MatchingDocs> matchingDocs)
        {
            Int64Range[] ranges = (Int64Range[])this.m_ranges;

            Int64RangeCounter counter = new Int64RangeCounter(ranges);

            int missingCount = 0;

            foreach (MatchingDocs hits in matchingDocs)
            {
                FunctionValues fv = valueSource.GetValues(Collections.EmptyMap <string, object>(), hits.Context);

                m_totCount += hits.TotalHits;
                IBits bits;
                if (m_fastMatchFilter != null)
                {
                    DocIdSet dis = m_fastMatchFilter.GetDocIdSet(hits.Context, null);
                    if (dis is null)
                    {
                        // No documents match
                        continue;
                    }
                    bits = dis.Bits;
                    if (bits is null)
                    {
                        throw new ArgumentException("fastMatchFilter does not implement DocIdSet.Bits");
                    }
                }
                else
                {
                    bits = null;
                }

                DocIdSetIterator docs = hits.Bits.GetIterator();
                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (bits != null && bits.Get(doc) == false)
                    {
                        doc++;
                        continue;
                    }
                    // Skip missing docs:
                    if (fv.Exists(doc))
                    {
                        counter.Add(fv.Int64Val(doc));
                    }
                    else
                    {
                        missingCount++;
                    }
                }
            }

            int x = counter.FillCounts(m_counts);

            missingCount += x;

            //System.out.println("totCount " + totCount + " missingCount " + counter.missingCount);
            m_totCount -= missingCount;
        }
Ejemplo n.º 26
0
        private void DoUnionScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors)
        {
            //if (DEBUG) {
            //  System.out.println("  doUnionScoring");
            //}

            int maxDoc  = context.Reader.MaxDoc;
            int numDims = dims.Length;

            // TODO: maybe a class like BS, instead of parallel arrays
            int[]   filledSlots = new int[CHUNK];
            int[]   docIDs      = new int[CHUNK];
            float[] scores      = new float[CHUNK];
            int[]   missingDims = new int[CHUNK];
            int[]   counts      = new int[CHUNK];

            docIDs[0] = -1;

            // NOTE: this is basically a specialized version of
            // BooleanScorer, to the minShouldMatch=N-1 case, but
            // carefully tracking which dimension failed to match

            int nextChunkStart = CHUNK;

            while (true)
            {
                //if (DEBUG) {
                //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
                //}
                int filledCount = 0;
                int docID       = baseScorer.DocID;
                //if (DEBUG) {
                //  System.out.println("  base docID=" + docID);
                //}
                while (docID < nextChunkStart)
                {
                    int slot = docID & MASK;
                    //if (DEBUG) {
                    //  System.out.println("    docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id"));
                    //}

                    // Mark slot as valid:
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docIDs[slot] != docID, () => "slot=" + slot + " docID=" + docID);
                    }
                    docIDs[slot] = docID;
                    scores[slot] = baseScorer.GetScore();
                    filledSlots[filledCount++] = slot;
                    missingDims[slot]          = 0;
                    counts[slot] = 1;

                    docID = baseScorer.NextDoc();
                }

                if (filledCount == 0)
                {
                    if (nextChunkStart >= maxDoc)
                    {
                        break;
                    }
                    nextChunkStart += CHUNK;
                    continue;
                }

                // First drill-down dim, basically adds SHOULD onto
                // the baseQuery:
                //if (DEBUG) {
                //  System.out.println("  dim=0 [" + dims[0].dim + "]");
                //}
                DocIdSetIterator disi = disis[0];
                if (disi != null)
                {
                    docID = disi.DocID;
                    //if (DEBUG) {
                    //  System.out.println("    start docID=" + docID);
                    //}
                    while (docID < nextChunkStart)
                    {
                        int slot = docID & MASK;
                        if (docIDs[slot] == docID)
                        {
                            //if (DEBUG) {
                            //  System.out.println("      set docID=" + docID + " count=2");
                            //}
                            missingDims[slot] = 1;
                            counts[slot]      = 2;
                        }
                        docID = disi.NextDoc();
                    }
                }

                for (int dim = 1; dim < numDims; dim++)
                {
                    //if (DEBUG) {
                    //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
                    //}

                    disi = disis[dim];
                    if (disi != null)
                    {
                        docID = disi.DocID;
                        //if (DEBUG) {
                        //  System.out.println("    start docID=" + docID);
                        //}
                        while (docID < nextChunkStart)
                        {
                            int slot = docID & MASK;
                            if (docIDs[slot] == docID && counts[slot] >= dim)
                            {
                                // This doc is still in the running...
                                // TODO: single-valued dims will always be true
                                // below; we could somehow specialize
                                if (missingDims[slot] >= dim)
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("      set docID=" + docID + " count=" + (dim+2));
                                    //}
                                    missingDims[slot] = dim + 1;
                                    counts[slot]      = dim + 2;
                                }
                                else
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("      set docID=" + docID + " missing count=" + (dim+1));
                                    //}
                                    counts[slot] = dim + 1;
                                }
                            }
                            docID = disi.NextDoc();
                        }
                    }
                }

                // Collect:
                //System.out.println("  now collect: " + filledCount + " hits");
                for (int i = 0; i < filledCount; i++)
                {
                    // NOTE: This is actually in-order collection,
                    // because we only accept docs originally returned by
                    // the baseScorer (ie that Scorer is AND'd)
                    int slot = filledSlots[i];
                    collectDocID = docIDs[slot];
                    collectScore = scores[slot];
                    //if (DEBUG) {
                    //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
                    //}
                    //System.out.println("  collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims);
                    if (counts[slot] == 1 + numDims)
                    {
                        //System.out.println("    hit");
                        CollectHit(collector, sidewaysCollectors);
                    }
                    else if (counts[slot] == numDims)
                    {
                        //System.out.println("    sw");
                        CollectNearMiss(sidewaysCollectors[missingDims[slot]]);
                    }
                }

                if (nextChunkStart >= maxDoc)
                {
                    break;
                }

                nextChunkStart += CHUNK;
            }
        }
Ejemplo n.º 27
0
 /// <summary>
 /// Convenience method to add the content of a <seealso cref="DocIdSetIterator"/> to this builder. </summary>
 public virtual Builder Add(DocIdSetIterator it)
 {
     for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc())
     {
         Add(doc);
     }
     return this;
 }
Ejemplo n.º 28
0
        /// <summary>
        /// Does all the "real work" of tallying up the counts. </summary>
        private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            //System.out.println("ssdv count");

            MultiDocValues.OrdinalMap ordinalMap;

            // TODO: is this right?  really, we need a way to
            // verify that this ordinalMap "matches" the leaves in
            // matchingDocs...
            if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1)
            {
                ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping;
            }
            else
            {
                ordinalMap = null;
            }

            IndexReader origReader = state.OrigReader;

            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                var reader = hits.context.AtomicReader;
                //System.out.println("  reader=" + reader);
                // LUCENE-5090: make sure the provided reader context "matches"
                // the top-level reader passed to the
                // SortedSetDocValuesReaderState, else cryptic
                // AIOOBE can happen:
                if (!Equals(ReaderUtil.GetTopLevelContext(hits.context).Reader, origReader))
                {
                    throw new ThreadStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
                }

                SortedSetDocValues segValues = reader.GetSortedSetDocValues(field);
                if (segValues == null)
                {
                    continue;
                }

                DocIdSetIterator docs = hits.bits.GetIterator();

                // TODO: yet another option is to count all segs
                // first, only in seg-ord space, and then do a
                // merge-sort-PQ in the end to only "resolve to
                // global" those seg ords that can compete, if we know
                // we just want top K?  ie, this is the same algo
                // that'd be used for merging facets across shards
                // (distributed faceting).  but this has much higher
                // temp ram req'ts (sum of number of ords across all
                // segs)
                if (ordinalMap != null)
                {
                    int segOrd = hits.context.Ord;

                    int numSegOrds = (int)segValues.ValueCount;

                    if (hits.totalHits < numSegOrds / 10)
                    {
                        //System.out.println("    remap as-we-go");
                        // Remap every ord to global ord as we iterate:
                        int doc;
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println("    doc=" + doc);
                            segValues.Document = doc;
                            int term = (int)segValues.NextOrd();
                            while (term != SortedSetDocValues.NO_MORE_ORDS)
                            {
                                //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++;
                                term = (int)segValues.NextOrd();
                            }
                        }
                    }
                    else
                    {
                        //System.out.println("    count in seg ord first");

                        // First count in seg-ord space:
                        int[] segCounts = new int[numSegOrds];
                        int   doc;
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println("    doc=" + doc);
                            segValues.Document = doc;
                            int term = (int)segValues.NextOrd();
                            while (term != SortedSetDocValues.NO_MORE_ORDS)
                            {
                                //System.out.println("      ord=" + term);
                                segCounts[term]++;
                                term = (int)segValues.NextOrd();
                            }
                        }

                        // Then, migrate to global ords:
                        for (int ord = 0; ord < numSegOrds; ord++)
                        {
                            int count = segCounts[ord];
                            if (count != 0)
                            {
                                //System.out.println("    migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count;
                            }
                        }
                    }
                }
                else
                {
                    // No ord mapping (e.g., single segment index):
                    // just aggregate directly into counts:
                    int doc;
                    while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        segValues.Document = doc;
                        int term = (int)segValues.NextOrd();
                        while (term != SortedSetDocValues.NO_MORE_ORDS)
                        {
                            counts[term]++;
                            term = (int)segValues.NextOrd();
                        }
                    }
                }
            }
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="BitSet"/>.
        /// </summary>
#pragma warning disable xUnit1013
        public virtual void AssertEquals(int numBits, BitSet ds1, T ds2)
#pragma warning restore xUnit1013
        {
            // nextDoc
            DocIdSetIterator it2 = ds2.GetIterator();

            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                Assert.AreEqual(-1, it2.DocID);
                for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1))
                {
                    Assert.AreEqual(doc, it2.NextDoc());
                    Assert.AreEqual(doc, it2.DocID);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID);
            }

            // nextDoc / advance
            it2 = ds2.GetIterator();
            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;)
                {
                    if (Random.NextBoolean())
                    {
                        doc = ds1.NextSetBit(doc + 1);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.NextDoc());
                        Assert.AreEqual(doc, it2.DocID);
                    }
                    else
                    {
                        int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1));
                        doc = ds1.NextSetBit(target);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.Advance(target));
                        Assert.AreEqual(doc, it2.DocID);
                    }
                }
            }

            // bits()
            IBits bits = ds2.Bits;

            if (bits != null)
            {
                // test consistency between bits and iterator
                it2 = ds2.GetIterator();
                for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc())
                {
                    int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc;
                    for (int i = previousDoc + 1; i < max; ++i)
                    {
                        Assert.AreEqual(false, bits.Get(i));
                    }
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    Assert.AreEqual(true, bits.Get(doc));
                }
            }
        }
Ejemplo n.º 30
0
 public override int NextDoc()
 {
     return(currentDoc = matchingDocsIterator.NextDoc());
 }
Ejemplo n.º 31
0
        /// <summary>
        /// Create a sampled of the given hits.
        /// </summary>
        private MatchingDocs CreateSample(MatchingDocs docs)
        {
            int maxdoc = docs.Context.Reader.MaxDoc;

            // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
            FixedBitSet sampleDocs = new FixedBitSet(maxdoc);

            int binSize = (int)(1.0 / samplingRate);

            try
            {
                int counter = 0;
                int limit, randomIndex;
                if (leftoverBin != NOT_CALCULATED)
                {
                    limit = leftoverBin;
                    // either NOT_CALCULATED, which means we already sampled from that bin,
                    // or the next document to sample
                    randomIndex = leftoverIndex;
                }
                else
                {
                    limit       = binSize;
                    randomIndex = random.NextInt32(binSize);
                }
                DocIdSetIterator it = docs.Bits.GetIterator();
                for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc())
                {
                    if (counter == randomIndex)
                    {
                        sampleDocs.Set(doc);
                    }
                    counter++;
                    if (counter >= limit)
                    {
                        counter     = 0;
                        limit       = binSize;
                        randomIndex = random.NextInt32(binSize);
                    }
                }

                if (counter == 0)
                {
                    // we either exhausted the bin and the iterator at the same time, or
                    // this segment had no results. in the latter case we might want to
                    // carry leftover to the next segment as is, but that complicates the
                    // code and doesn't seem so important.
                    leftoverBin = leftoverIndex = NOT_CALCULATED;
                }
                else
                {
                    leftoverBin = limit - counter;
                    if (randomIndex > counter)
                    {
                        // the document to sample is in the next bin
                        leftoverIndex = randomIndex - counter;
                    }
                    else if (randomIndex < counter)
                    {
                        // we sampled a document from the bin, so just skip over remaining
                        // documents in the bin in the next segment.
                        leftoverIndex = NOT_CALCULATED;
                    }
                }

                return(new MatchingDocs(docs.Context, sampleDocs, docs.TotalHits, null));
            }
            catch (IOException)
            {
                throw new Exception();
            }
        }
Ejemplo n.º 32
0
 public override int NextDoc()
 {
     return(m_docSetIter.NextDoc());
 }
Ejemplo n.º 33
0
        private void Count(ValueSource valueSource, IEnumerable <MatchingDocs> matchingDocs)
        {
            DoubleRange[] ranges = (DoubleRange[])this.m_ranges;

            Int64Range[] longRanges = new Int64Range[ranges.Length];
            for (int i = 0; i < ranges.Length; i++)
            {
                DoubleRange range = ranges[i];
                longRanges[i] = new Int64Range(range.Label, NumericUtils.DoubleToSortableInt64(range.minIncl), true, NumericUtils.DoubleToSortableInt64(range.maxIncl), true);
            }

            Int64RangeCounter counter = new Int64RangeCounter(longRanges);

            int missingCount = 0;

            foreach (MatchingDocs hits in matchingDocs)
            {
                FunctionValues fv = valueSource.GetValues(new Dictionary <string, object>(), hits.Context);

                m_totCount += hits.TotalHits;
                IBits bits;
                if (m_fastMatchFilter != null)
                {
                    DocIdSet dis = m_fastMatchFilter.GetDocIdSet(hits.Context, null);
                    if (dis == null)
                    {
                        // No documents match
                        continue;
                    }
                    bits = dis.Bits;
                    if (bits == null)
                    {
                        throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits");
                    }
                }
                else
                {
                    bits = null;
                }

                DocIdSetIterator docs = hits.Bits.GetIterator();

                int doc;
                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (bits != null && bits.Get(doc) == false)
                    {
                        doc++;
                        continue;
                    }
                    // Skip missing docs:
                    if (fv.Exists(doc))
                    {
                        counter.Add(NumericUtils.DoubleToSortableInt64(fv.DoubleVal(doc)));
                    }
                    else
                    {
                        missingCount++;
                    }
                }
            }

            missingCount += counter.FillCounts(m_counts);
            m_totCount   -= missingCount;
        }