/// <summary> /// Constructor. </summary> /// <param name="innerIter"> Underlying DocIdSetIterator. </param> public FilteredDocIdSetIterator(DocIdSetIterator innerIter) { if (innerIter == null) { throw new System.ArgumentException("null iterator"); } _innerIter = innerIter; Doc = -1; }
/// <summary> Constructor.</summary> /// <param name="innerIter">Underlying DocIdSetIterator. /// </param> protected FilteredDocIdSetIterator(DocIdSetIterator innerIter) { if (innerIter == null) { throw new System.ArgumentException("null iterator"); } internalInnerIter = innerIter; doc = - 1; }
private void Count(ValueSource valueSource, IList <MatchingDocs> matchingDocs) { Int64Range[] ranges = (Int64Range[])this.m_ranges; Int64RangeCounter counter = new Int64RangeCounter(ranges); int missingCount = 0; foreach (MatchingDocs hits in matchingDocs) { FunctionValues fv = valueSource.GetValues(new Dictionary <string, object>(), hits.Context); m_totCount += hits.TotalHits; IBits bits; if (m_fastMatchFilter != null) { DocIdSet dis = m_fastMatchFilter.GetDocIdSet(hits.Context, null); if (dis == null) { // No documents match continue; } bits = dis.Bits; if (bits == null) { throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits"); } } else { bits = null; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.Get(doc) == false) { doc++; continue; } // Skip missing docs: if (fv.Exists(doc)) { counter.Add(fv.Int64Val(doc)); } else { missingCount++; } } } int x = counter.FillCounts(m_counts); missingCount += x; //System.out.println("totCount " + totCount + " missingCount " + counter.missingCount); m_totCount -= missingCount; }
/// <summary> /// Construct an <see cref="OpenBitSetDISI"/> with its bits set /// from the doc ids of the given <see cref="DocIdSetIterator"/>. /// Also give a maximum size one larger than the largest doc id for which a /// bit may ever be set on this <see cref="OpenBitSetDISI"/>. /// </summary> public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) : base(maxSize) { InPlaceOr(disi); }
/// <summary> /// Create a sampled of the given hits. /// </summary> private MatchingDocs CreateSample(MatchingDocs docs) { int maxdoc = docs.Context.Reader.MaxDoc; // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse FixedBitSet sampleDocs = new FixedBitSet(maxdoc); int binSize = (int)(1.0 / samplingRate); try { int counter = 0; int limit, randomIndex; if (leftoverBin != NOT_CALCULATED) { limit = leftoverBin; // either NOT_CALCULATED, which means we already sampled from that bin, // or the next document to sample randomIndex = leftoverIndex; } else { limit = binSize; randomIndex = random.NextInt32(binSize); } DocIdSetIterator it = docs.Bits.GetIterator(); for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc()) { if (counter == randomIndex) { sampleDocs.Set(doc); } counter++; if (counter >= limit) { counter = 0; limit = binSize; randomIndex = random.NextInt32(binSize); } } if (counter == 0) { // we either exhausted the bin and the iterator at the same time, or // this segment had no results. in the latter case we might want to // carry leftover to the next segment as is, but that complicates the // code and doesn't seem so important. leftoverBin = leftoverIndex = NOT_CALCULATED; } else { leftoverBin = limit - counter; if (randomIndex > counter) { // the document to sample is in the next bin leftoverIndex = randomIndex - counter; } else if (randomIndex < counter) { // we sampled a document from the bin, so just skip over remaining // documents in the bin in the next segment. leftoverIndex = NOT_CALCULATED; } } return(new MatchingDocs(docs.Context, sampleDocs, docs.TotalHits, null)); } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } }
/// <summary> /// Does in-place AND of the bits provided by the /// iterator. /// </summary> public void And(DocIdSetIterator iter) { if (iter is OpenBitSetIterator && iter.DocID() == -1) { OpenBitSetIterator obs = (OpenBitSetIterator)iter; And(obs.Arr, obs.Words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.Advance(NumBits); } else if (iter is FixedBitSetIterator && iter.DocID() == -1) { FixedBitSetIterator fbs = (FixedBitSetIterator)iter; And(fbs.bits, fbs.NumWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.Advance(NumBits); } else { if (NumBits == 0) { return; } int disiDoc, bitSetDoc = NextSetBit(0); while (bitSetDoc != -1 && (disiDoc = iter.Advance(bitSetDoc)) < NumBits) { Clear(bitSetDoc, disiDoc); disiDoc++; bitSetDoc = (disiDoc < NumBits) ? NextSetBit(disiDoc) : -1; } if (bitSetDoc != -1) { Clear(bitSetDoc, NumBits); } } }
/// <summary> /// Convenience method to add the content of a <seealso cref="DocIdSetIterator"/> to this builder. </summary> public virtual Builder Add(DocIdSetIterator it) { for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc()) { Add(doc); } return this; }
/// <summary>Construct a <c>ReqExclScorer</c>.</summary> /// <param name="reqScorer">The scorer that must match, except where /// </param> /// <param name="exclDisi">indicates exclusion. /// </param> public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi):base(null) { // No similarity used. this.reqScorer = reqScorer; this.exclDisi = exclDisi; }
private void DoUnionScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors) { //if (DEBUG) { // System.out.println(" doUnionScoring"); //} int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; // NOTE: this is basically a specialized version of // BooleanScorer, to the minShouldMatch=N-1 case, but // carefully tracking which dimension failed to match int nextChunkStart = CHUNK; while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} int filledCount = 0; int docID = baseScorer.DocID; //if (DEBUG) { // System.out.println(" base docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; //if (DEBUG) { // System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id")); //} // Mark slot as valid: if (Debugging.AssertsEnabled) { Debugging.Assert(docIDs[slot] != docID, "slot={0} docID={1}", slot, docID); } docIDs[slot] = docID; scores[slot] = baseScorer.GetScore(); filledSlots[filledCount++] = slot; missingDims[slot] = 0; counts[slot] = 1; docID = baseScorer.NextDoc(); } if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // First drill-down dim, basically adds SHOULD onto // the baseQuery: //if (DEBUG) { // System.out.println(" dim=0 [" + dims[0].dim + "]"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { docID = disi.DocID; //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=2"); //} missingDims[slot] = 1; counts[slot] = 2; } docID = disi.NextDoc(); } } for (int dim = 1; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { docID = disi.DocID; //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // This doc is still in the running... // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } docID = disi.NextDoc(); } } } // Collect: //System.out.println(" now collect: " + filledCount + " hits"); for (int i = 0; i < filledCount; i++) { // NOTE: This is actually in-order collection, // because we only accept docs originally returned by // the baseScorer (ie that Scorer is AND'd) int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} //System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims); if (counts[slot] == 1 + numDims) { //System.out.println(" hit"); CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { //System.out.println(" sw"); CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/// <summary> /// Used when drill downs are highly constraining vs /// baseQuery. /// </summary> private void DoDrillDownAdvanceScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors) { int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; //if (DEBUG) { // System.out.println(" doDrillDownAdvanceScoring"); //} // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; int nextChunkStart = CHUNK; FixedBitSet seen = new FixedBitSet(CHUNK); while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} // First dim: //if (DEBUG) { // System.out.println(" dim0"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { seen.Set(slot); // Mark slot as valid: //if (DEBUG) { // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 1; counts[slot] = 1; } docID = disi.NextDoc(); } } // Second dim: //if (DEBUG) { // System.out.println(" dim1"); //} disi = disis[1]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { // Mark slot as valid: seen.Set(slot); //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 0; counts[slot] = 1; } else { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= 1) { missingDims[slot] = 2; counts[slot] = 2; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id")); //} } else { counts[slot] = 1; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id")); //} } } docID = disi.NextDoc(); } } // After this we can "upgrade" to conjunction, because // any doc not seen by either dim 0 or dim 1 cannot be // a hit or a near miss: //if (DEBUG) { // System.out.println(" baseScorer"); //} // Fold in baseScorer, using advance: int filledCount = 0; int slot0 = 0; while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1) { int ddDocID = docIDs[slot0]; if (Debugging.AssertsEnabled) { Debugging.Assert(ddDocID != -1); } int baseDocID = baseScorer.DocID; if (baseDocID < ddDocID) { baseDocID = baseScorer.Advance(ddDocID); } if (baseDocID == ddDocID) { //if (DEBUG) { // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} scores[slot0] = baseScorer.GetScore(); filledSlots[filledCount++] = slot0; counts[slot0]++; } else { //if (DEBUG) { // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} docIDs[slot0] = -1; // TODO: we could jump slot0 forward to the // baseDocID ... but we'd need to set docIDs for // intervening slots to -1 } slot0++; } seen.Clear(0, CHUNK); if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // TODO: factor this out & share w/ union scorer, // except we start from dim=2 instead: for (int dim = 2; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } // TODO: sometimes use advance? docID = disi.NextDoc(); } } } // Collect: //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} for (int i = 0; i < filledCount; i++) { int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} if (counts[slot] == 1 + numDims) { CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/// <summary> /// Used when base query is highly constraining vs the /// drilldowns, or when the docs must be scored at once /// (i.e., like <see cref="Search.BooleanScorer2"/>, not <see cref="Search.BooleanScorer"/>). In /// this case we just .Next() on base and .Advance() on /// the dim filters. /// </summary> private void DoQueryFirstScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors, IBits[] bits, ICollector[] bitsSidewaysCollectors) { //if (DEBUG) { // System.out.println(" doQueryFirstScoring"); //} int docID = baseScorer.DocID; while (docID != DocsEnum.NO_MORE_DOCS) { ICollector failedCollector = null; for (int i = 0; i < disis.Length; i++) { // TODO: should we sort this 2nd dimension of // docsEnums from most frequent to least? DocIdSetIterator disi = disis[i]; if (disi != null && disi.DocID < docID) { disi.Advance(docID); } if (disi == null || disi.DocID > docID) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.NextDoc(); goto nextDocContinue; } else { failedCollector = sidewaysCollectors[i]; } } } // TODO: for the "non-costly Bits" we really should // have passed them down as acceptDocs, but // unfortunately we cannot distinguish today betwen // "bits() is so costly that you should apply it last" // from "bits() is so cheap that you should apply it // everywhere down low" // Fold in Filter Bits last, since they may be costly: for (int i = 0; i < bits.Length; i++) { if (bits[i].Get(docID) == false) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.NextDoc(); goto nextDocContinue; } else { failedCollector = bitsSidewaysCollectors[i]; } } } collectDocID = docID; // TODO: we could score on demand instead since we are // daat here: collectScore = baseScorer.GetScore(); if (failedCollector == null) { // Hit passed all filters, so it's "real": CollectHit(collector, sidewaysCollectors, bitsSidewaysCollectors); } else { // Hit missed exactly one filter: CollectNearMiss(failedCollector); } docID = baseScorer.NextDoc(); nextDocContinue :; } //nextDocBreak:; // Not referenced }
/// <summary> /// Does all the "real work" of tallying up the counts. </summary> private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("ssdv count"); MultiDocValues.OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in // matchingDocs... if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping; } else { ordinalMap = null; } IndexReader origReader = state.OrigReader; foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { var reader = hits.Context.AtomicReader; //System.out.println(" reader=" + reader); // LUCENE-5090: make sure the provided reader context "matches" // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: if (!Equals(ReaderUtil.GetTopLevelContext(hits.Context).Reader, origReader)) { throw new InvalidOperationException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } SortedSetDocValues segValues = reader.GetSortedSetDocValues(field); if (segValues == null) { continue; } DocIdSetIterator docs = hits.Bits.GetIterator(); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a // merge-sort-PQ in the end to only "resolve to // global" those seg ords that can compete, if we know // we just want top K? ie, this is the same algo // that'd be used for merging facets across shards // (distributed faceting). but this has much higher // temp ram req'ts (sum of number of ords across all // segs) if (ordinalMap != null) { int segOrd = hits.Context.Ord; int numSegOrds = (int)segValues.ValueCount; if (hits.TotalHits < numSegOrds / 10) { //System.out.println(" remap as-we-go"); // Remap every ord to global ord as we iterate: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++; term = (int)segValues.NextOrd(); } } } else { //System.out.println(" count in seg ord first"); // First count in seg-ord space: int[] segCounts = new int[numSegOrds]; int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" ord=" + term); segCounts[term]++; term = (int)segValues.NextOrd(); } } // Then, migrate to global ords: for (int ord = 0; ord < numSegOrds; ord++) { int count = segCounts[ord]; if (count != 0) { //System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count; } } } } else { // No ord mapping (e.g., single segment index): // just aggregate directly into counts: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { counts[term]++; term = (int)segValues.NextOrd(); } } } } }
/// <summary>Construct a <c>ReqExclScorer</c>.</summary> /// <param name="reqScorer">The scorer that must match, except where /// </param> /// <param name="exclDisi">indicates exclusion. /// </param> public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) : base(null) { // No similarity used. this.reqScorer = reqScorer; this.exclDisi = exclDisi; }
public virtual void Search(Weight weight, Filter filter, ICollector collector, int start, IBoboMapFunctionWrapper mapReduceWrapper) { FacetValidator validator = CreateFacetValidator(); int target = 0; IndexReader reader = this.IndexReader; IndexReaderContext indexReaderContext = reader.Context; if (filter == null) { for (int i = 0; i < m_subReaders.Length; i++) { AtomicReaderContext atomicContext = indexReaderContext.Children == null ? (AtomicReaderContext)indexReaderContext : (AtomicReaderContext)(indexReaderContext.Children.Get(i)); int docStart = start; // NOTE: This code calls an internal constructor. Apparently, this was in the same namespace as Lucene, // but was added to this project, which presumably allows you to call internal constructors in Java. // In .NET, we can just use Activator.CreateInstance. Not great, but this code will be removed // when applying commit https://github.com/senseidb/bobo/commit/924c8579d90dbb5d56103976d39b47daa2242ef3 // which includes several major changes after the 4.0.2 release. // atomicContext = AtomicReaderContextUtil.UpdateDocBase(atomicContext, docStart); object[] args = new object[] { (CompositeReaderContext)null, atomicContext.AtomicReader, 0, 0, 0, docStart }; Type[] constructorSignature = { typeof(CompositeReaderContext), typeof(AtomicReader), typeof(int), typeof(int), typeof(int), typeof(int) }; var constr = typeof(AtomicReaderContext).GetTypeInfo().DeclaredConstructors .Single(constructor => constructor.GetParameters() .Select(parameter => parameter.ParameterType) .SequenceEqual(constructorSignature)); atomicContext = (AtomicReaderContext)constr.Invoke(args); if (reader is BoboMultiReader) { docStart = start + ((BoboMultiReader)reader).SubReaderBase(i); } collector.SetNextReader(atomicContext); validator.SetNextReader(m_subReaders[i], docStart); // NOTE: The Weight.Scorer method lost the scoreDocsInOrder and topScorer parameters between // Lucene 4.3.0 and 4.8.0. They are not used by BoboBrowse anyway, so the code here diverges // from the original Java source to remove these two parameters. // Scorer scorer = weight.Scorer(atomicContext, true, true, _subReaders[i].LiveDocs); Scorer scorer = weight.GetScorer(atomicContext, m_subReaders[i].LiveDocs); if (scorer != null) { collector.SetScorer(scorer); target = scorer.NextDoc(); while (target != DocIdSetIterator.NO_MORE_DOCS) { if (validator.Validate(target)) { collector.Collect(target); target = scorer.NextDoc(); } else { target = validator.m_nextTarget; target = scorer.Advance(target); } } } if (mapReduceWrapper != null) { mapReduceWrapper.MapFullIndexReader(m_subReaders[i], validator.GetCountCollectors()); } } return; } for (int i = 0; i < m_subReaders.Length; i++) { AtomicReaderContext atomicContext = indexReaderContext.Children == null ? (AtomicReaderContext)indexReaderContext : (AtomicReaderContext)(indexReaderContext.Children.Get(i)); DocIdSet filterDocIdSet = filter.GetDocIdSet(atomicContext, m_subReaders[i].LiveDocs); if (filterDocIdSet == null) { return; //shall we use return or continue here ?? } int docStart = start; if (reader is BoboMultiReader) { docStart = start + ((BoboMultiReader)reader).SubReaderBase(i); } collector.SetNextReader(atomicContext); validator.SetNextReader(m_subReaders[i], docStart); // NOTE: The Weight.Scorer method lost the scoreDocsInOrder and topScorer parameters between // Lucene 4.3.0 and 4.8.0. They are not used by BoboBrowse anyway, so the code here diverges // from the original Java source to remove these two parameters. // Scorer scorer = weight.Scorer(atomicContext, true, false, _subReaders[i].LiveDocs); Scorer scorer = weight.GetScorer(atomicContext, m_subReaders[i].LiveDocs); if (scorer != null) { collector.SetScorer(scorer); DocIdSetIterator filterDocIdIterator = filterDocIdSet.GetIterator(); // CHECKME: use ConjunctionScorer here? if (filterDocIdIterator == null) { continue; } int doc = -1; target = filterDocIdIterator.NextDoc(); if (mapReduceWrapper == null) { while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.Advance(target); } if (doc == target) // permitted by filter { if (validator.Validate(doc)) { collector.Collect(doc); target = filterDocIdIterator.NextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.Advance(validator.m_nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } target = filterDocIdIterator.Advance(doc); } } } else { //MapReduce wrapper is not null while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.Advance(target); } if (doc == target) // permitted by filter { if (validator.Validate(doc)) { mapReduceWrapper.MapSingleDocument(doc, m_subReaders[i]); collector.Collect(doc); target = filterDocIdIterator.NextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.Advance(validator.m_nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } target = filterDocIdIterator.Advance(doc); } } mapReduceWrapper.FinalizeSegment(m_subReaders[i], validator.GetCountCollectors()); } } } }
/// <summary> /// Returns the a <see cref="DocIdSetIterator"/> representing the Boolean composition /// of the filters that have been added. /// </summary> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { FixedBitSet res = null; AtomicReader reader = context.AtomicReader; bool hasShouldClauses = false; foreach (FilterClause fc in clauses) { if (fc.Occur == Occur.SHOULD) { hasShouldClauses = true; DocIdSetIterator disi = GetDISI(fc.Filter, context); if (disi == null) { continue; } if (res == null) { res = new FixedBitSet(reader.MaxDoc); } res.Or(disi); } } if (hasShouldClauses && res == null) { return(null); } foreach (FilterClause fc in clauses) { if (fc.Occur == Occur.MUST_NOT) { if (res == null) { Debug.Assert(!hasShouldClauses); res = new FixedBitSet(reader.MaxDoc); res.Set(0, reader.MaxDoc); // NOTE: may set bits on deleted docs } DocIdSetIterator disi = GetDISI(fc.Filter, context); if (disi != null) { res.AndNot(disi); } } } foreach (FilterClause fc in clauses) { if (fc.Occur == Occur.MUST) { DocIdSetIterator disi = GetDISI(fc.Filter, context); if (disi == null) { return(null); // no documents can match } if (res == null) { res = new FixedBitSet(reader.MaxDoc); res.Or(disi); } else { res.And(disi); } } } return(BitsFilteredDocIdSet.Wrap(res, acceptDocs)); }
public Item(DocIdSetIterator iter) { Iter = iter; Doc = -1; }
public override bool Score(ICollector collector, int maxDoc) { if (maxDoc != int.MaxValue) { throw new ArgumentOutOfRangeException(nameof(maxDoc), "maxDoc must be System.Int32.MaxValue"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) } //if (DEBUG) { // System.out.println("\nscore: reader=" + context.reader()); //} //System.out.println("score r=" + context.reader()); FakeScorer scorer = new FakeScorer(this); collector.SetScorer(scorer); if (drillDownCollector != null) { drillDownCollector.SetScorer(scorer); drillDownCollector.SetNextReader(context); } foreach (DocsAndCost dim in dims) { dim.sidewaysCollector.SetScorer(scorer); dim.sidewaysCollector.SetNextReader(context); } // TODO: if we ever allow null baseScorer ... it will // mean we DO score docs out of order ... hmm, or if we // change up the order of the conjuntions below if (Debugging.AssertsEnabled) { Debugging.Assert(baseScorer != null); } // some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc long baseQueryCost = baseScorer.GetCost(); int numDims = dims.Length; long drillDownCost = 0; for (int dim = 0; dim < numDims; dim++) { DocIdSetIterator disi = dims[dim].disi; if (dims[dim].bits == null && disi != null) { drillDownCost += disi.GetCost(); } } long drillDownAdvancedCost = 0; if (numDims > 1 && dims[1].disi != null) { drillDownAdvancedCost = dims[1].disi.GetCost(); } // Position all scorers to their first matching doc: baseScorer.NextDoc(); int numBits = 0; foreach (DocsAndCost dim in dims) { if (dim.disi != null) { dim.disi.NextDoc(); } else if (dim.bits != null) { numBits++; } } IBits[] bits = new IBits[numBits]; ICollector[] bitsSidewaysCollectors = new ICollector[numBits]; DocIdSetIterator[] disis = new DocIdSetIterator[numDims - numBits]; ICollector[] sidewaysCollectors = new ICollector[numDims - numBits]; int disiUpto = 0; int bitsUpto = 0; for (int dim = 0; dim < numDims; dim++) { DocIdSetIterator disi = dims[dim].disi; if (dims[dim].bits == null) { disis[disiUpto] = disi; sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector; disiUpto++; } else { bits[bitsUpto] = dims[dim].bits; bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector; bitsUpto++; } } /* * System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount); * System.out.println(" maxDoc=" + context.reader().maxDoc()); * System.out.println(" maxCost=" + maxCost); * System.out.println(" dims[0].freq=" + dims[0].freq); * if (numDims > 1) { * System.out.println(" dims[1].freq=" + dims[1].freq); * } */ if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) { //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length); DoQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors); } else if (numDims > 1 && (dims[1].disi == null || drillDownAdvancedCost < baseQueryCost / 10)) { //System.out.println("drillDownAdvance"); DoDrillDownAdvanceScoring(collector, disis, sidewaysCollectors); } else { //System.out.println("union"); DoUnionScoring(collector, disis, sidewaysCollectors); } return(false); }
private void Count(ValueSource valueSource, IEnumerable <MatchingDocs> matchingDocs) { DoubleRange[] ranges = (DoubleRange[])this.ranges; LongRange[] longRanges = new LongRange[ranges.Length]; for (int i = 0; i < ranges.Length; i++) { DoubleRange range = ranges[i]; longRanges[i] = new LongRange(range.Label, NumericUtils.DoubleToSortableLong(range.minIncl), true, NumericUtils.DoubleToSortableLong(range.maxIncl), true); } LongRangeCounter counter = new LongRangeCounter(longRanges); int missingCount = 0; foreach (MatchingDocs hits in matchingDocs) { FunctionValues fv = valueSource.GetValues(new Dictionary <string, object>(), hits.Context); totCount += hits.TotalHits; Bits bits; if (fastMatchFilter != null) { DocIdSet dis = fastMatchFilter.GetDocIdSet(hits.Context, null); if (dis == null) { // No documents match continue; } bits = dis.GetBits(); if (bits == null) { throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits"); } } else { bits = null; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.Get(doc) == false) { doc++; continue; } // Skip missing docs: if (fv.Exists(doc)) { counter.Add(NumericUtils.DoubleToSortableLong(fv.DoubleVal(doc))); } else { missingCount++; } } } missingCount += counter.FillCounts(counts); totCount -= missingCount; }
/// <summary> /// Assert that the content of the <seealso cref="DocIdSet"/> is the same as the content of the <seealso cref="BitSet"/>. /// </summary> public virtual void AssertEquals(int numBits, BitArray ds1, T ds2) { // nextDoc DocIdSetIterator it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { Assert.AreEqual(-1, it2.DocID()); for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1)) { Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID()); } // nextDoc / advance it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) { if (Random().NextBoolean()) { doc = ds1.NextSetBit(doc + 1); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID()); } else { int target = doc + 1 + Random().Next(Random().NextBoolean() ? 64 : Math.Max(numBits / 8, 1)); doc = ds1.NextSetBit(target); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.Advance(target)); Assert.AreEqual(doc, it2.DocID()); } } } // bits() Bits bits = ds2.GetBits(); if (bits != null) { // test consistency between bits and iterator it2 = ds2.GetIterator(); for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc()) { int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length() : doc; for (int i = previousDoc + 1; i < max; ++i) { Assert.AreEqual(false, bits.Get(i)); } if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } Assert.AreEqual(true, bits.Get(doc)); } } }
/// <summary>Advance to non excluded doc. /// <br/>On entry: /// <list type="bullet"> /// <item>reqScorer != null, </item> /// <item>exclScorer != null, </item> /// <item>reqScorer was advanced once via next() or skipTo() /// and reqScorer.doc() may still be excluded.</item> /// </list> /// Advances reqScorer a non excluded required doc, if any. /// </summary> /// <returns> true iff there is a non excluded required doc. /// </returns> private int ToNonExcluded() { int exclDoc = exclDisi.DocID(); int reqDoc = reqScorer.DocID(); // may be excluded do { if (reqDoc < exclDoc) { return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded } else if (reqDoc > exclDoc) { exclDoc = exclDisi.Advance(reqDoc); if (exclDoc == NO_MORE_DOCS) { exclDisi = null; // exhausted, no more exclusions return reqDoc; } if (exclDoc > reqDoc) { return reqDoc; // not excluded } } } while ((reqDoc = reqScorer.NextDoc()) != NO_MORE_DOCS); reqScorer = null; // exhausted, nothing left return NO_MORE_DOCS; }
/// <exception cref="System.IO.IOException"/> private void DoChain(FixedBitSet result, int logic, DocIdSet dis) { if (dis is FixedBitSet) { // optimized case for FixedBitSets switch (logic) { case OR: result.Or((FixedBitSet)dis); break; case AND: result.And((FixedBitSet)dis); break; case ANDNOT: result.AndNot((FixedBitSet)dis); break; case XOR: result.Xor((FixedBitSet)dis); break; default: DoChain(result, DEFAULT, dis); break; } } else { DocIdSetIterator disi; if (dis == null) { disi = DocIdSetIterator.GetEmpty(); } else { disi = dis.GetIterator() ?? DocIdSetIterator.GetEmpty(); } switch (logic) { case OR: result.Or(disi); break; case AND: result.And(disi); break; case ANDNOT: result.AndNot(disi); break; case XOR: result.Xor(disi); break; default: DoChain(result, DEFAULT, dis); break; } } }
/// <summary> /// Does in-place XOR of the bits provided by the iterator. </summary> public void Xor(DocIdSetIterator iter) { int doc; while ((doc = iter.NextDoc()) < NumBits) { Flip(doc, doc + 1); } }
public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs) { // TODO: it could be better if we take acceptDocs // into account instead of baseScorer? Scorer baseScorer = baseWeight.GetScorer(context, acceptDocs); DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.Length]; int nullCount = 0; for (int dim = 0; dim < dims.Length; dim++) { dims[dim] = new DrillSidewaysScorer.DocsAndCost(); dims[dim].sidewaysCollector = outerInstance.drillSidewaysCollectors[dim]; if (drillDowns[dim] is Filter) { // Pass null for acceptDocs because we already // passed it to baseScorer and baseScorer is // MUST'd here DocIdSet dis = ((Filter)drillDowns[dim]).GetDocIdSet(context, null); if (dis == null) { continue; } IBits bits = dis.Bits; if (bits != null) { // TODO: this logic is too naive: the // existence of bits() in DIS today means // either "I'm a cheap FixedBitSet so apply me down // low as you decode the postings" or "I'm so // horribly expensive so apply me after all // other Query/Filter clauses pass" // Filter supports random access; use that to // prevent .advance() on costly filters: dims[dim].bits = bits; // TODO: Filter needs to express its expected // cost somehow, before pulling the iterator; // we should use that here to set the order to // check the filters: } else { DocIdSetIterator disi = dis.GetIterator(); if (disi == null) { nullCount++; continue; } dims[dim].disi = disi; } } else { DocIdSetIterator disi = ((Weight)drillDowns[dim]).GetScorer(context, null); if (disi == null) { nullCount++; continue; } dims[dim].disi = disi; } } // If more than one dim has no matches, then there // are no hits nor drill-sideways counts. Or, if we // have only one dim and that dim has no matches, // same thing. //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) { if (nullCount > 1) { return(null); } // Sort drill-downs by most restrictive first: Array.Sort(dims); if (baseScorer == null) { return(null); } return(new DrillSidewaysScorer(context, baseScorer, outerInstance.drillDownCollector, dims, outerInstance.scoreSubDocsAtOnce)); }
/// <summary> /// Does in-place AND NOT of the bits provided by the /// iterator. /// </summary> public void AndNot(DocIdSetIterator iter) { if (iter is OpenBitSetIterator && iter.DocID() == -1) { OpenBitSetIterator obs = (OpenBitSetIterator)iter; AndNot(obs.Arr, obs.Words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.Advance(NumBits); } else if (iter is FixedBitSetIterator && iter.DocID() == -1) { FixedBitSetIterator fbs = (FixedBitSetIterator)iter; AndNot(fbs.bits, fbs.NumWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.Advance(NumBits); } else { int doc; while ((doc = iter.NextDoc()) < NumBits) { Clear(doc); } } }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (Verbose) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (Verbose) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (Verbose) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (Verbose) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (Verbose) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult, topScoreDocCollector)); // Asserting bit set... if (Verbose) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality()); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality()); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (Verbose) { string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }