/// <summary> /// Does in-place AND NOT of the bits provided by the /// iterator. /// </summary> public void AndNot(DocIdSetIterator iter) { if (iter is OpenBitSetIterator && iter.DocID() == -1) { OpenBitSetIterator obs = (OpenBitSetIterator)iter; AndNot(obs.Arr, obs.Words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.Advance(NumBits); } else if (iter is FixedBitSetIterator && iter.DocID() == -1) { FixedBitSetIterator fbs = (FixedBitSetIterator)iter; AndNot(fbs.bits, fbs.NumWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.Advance(NumBits); } else { int doc; while ((doc = iter.NextDoc()) < NumBits) { Clear(doc); } } }
/// <summary>Advance to non excluded doc. /// <br/>On entry: /// <list type="bullet"> /// <item>reqScorer != null, </item> /// <item>exclScorer != null, </item> /// <item>reqScorer was advanced once via next() or skipTo() /// and reqScorer.doc() may still be excluded.</item> /// </list> /// Advances reqScorer a non excluded required doc, if any. /// </summary> /// <returns> true iff there is a non excluded required doc. /// </returns> private int ToNonExcluded() { int exclDoc = exclDisi.DocID(); int reqDoc = reqScorer.DocID(); // may be excluded do { if (reqDoc < exclDoc) { return(reqDoc); // reqScorer advanced to before exclScorer, ie. not excluded } else if (reqDoc > exclDoc) { exclDoc = exclDisi.Advance(reqDoc); if (exclDoc == NO_MORE_DOCS) { exclDisi = null; // exhausted, no more exclusions return(reqDoc); } if (exclDoc > reqDoc) { return(reqDoc); // not excluded } } }while ((reqDoc = reqScorer.NextDoc()) != NO_MORE_DOCS); reqScorer = null; // exhausted, nothing left return(NO_MORE_DOCS); }
/// <summary> /// Does in-place AND of the bits provided by the /// iterator. /// </summary> public void And(DocIdSetIterator iter) { if (iter is OpenBitSetIterator && iter.DocID() == -1) { OpenBitSetIterator obs = (OpenBitSetIterator)iter; And(obs.Arr, obs.Words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.Advance(NumBits); } else if (iter is FixedBitSetIterator && iter.DocID() == -1) { FixedBitSetIterator fbs = (FixedBitSetIterator)iter; And(fbs.bits, fbs.NumWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.Advance(NumBits); } else { if (NumBits == 0) { return; } int disiDoc, bitSetDoc = NextSetBit(0); while (bitSetDoc != -1 && (disiDoc = iter.Advance(bitSetDoc)) < NumBits) { Clear(bitSetDoc, disiDoc); disiDoc++; bitSetDoc = (disiDoc < NumBits) ? NextSetBit(disiDoc) : -1; } if (bitSetDoc != -1) { Clear(bitSetDoc, NumBits); } } }
///<summary>Adds a DocIdSetIterator to the DisiDocQueue in log(size) time if either /// * the DisiDocQueue is not full, or not lessThan(disi, top()). </summary> /// * <param name="disi"> </param> /// * <returns> true if DocIdSetIterator is added, false otherwise. </returns> public bool Insert(DocIdSetIterator disi) { if (size < maxSize) { Put(disi); return(true); } else { int docNr = disi.DocID(); if ((size > 0) && (!(docNr < topHDD.Doc))) // heap[1] is top() { heap[1] = new HeapedDisiDoc(disi, docNr); DownHeap(); return(true); } else { return(false); } } }
public static string AsString(this DocIdSet docIdSet) { DocIdSetIterator iter = docIdSet.Iterator(); StringBuilder buf = new StringBuilder(); bool firstTime = true; buf.Append("["); while (iter.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (firstTime) { firstTime = false; } else { buf.Append(","); } buf.Append(iter.DocID()); } buf.Append("]"); return(buf.ToString()); }
public override float Score() { return(_scorer == null ? 1.0f : _scorer.Score(_docSetIter.DocID()) * _parent.Boost); }
/// <summary> /// Assert that the content of the <seealso cref="DocIdSet"/> is the same as the content of the <seealso cref="BitSet"/>. /// </summary> public virtual void AssertEquals(int numBits, BitArray ds1, T ds2) { // nextDoc DocIdSetIterator it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { Assert.AreEqual(-1, it2.DocID()); for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1)) { Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID()); } // nextDoc / advance it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) { if (Random().NextBoolean()) { doc = ds1.NextSetBit(doc + 1); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID()); } else { int target = doc + 1 + Random().Next(Random().NextBoolean() ? 64 : Math.Max(numBits / 8, 1)); doc = ds1.NextSetBit(target); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.Advance(target)); Assert.AreEqual(doc, it2.DocID()); } } } // bits() Bits bits = ds2.GetBits(); if (bits != null) { // test consistency between bits and iterator it2 = ds2.GetIterator(); for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc()) { int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length() : doc; for (int i = previousDoc + 1; i < max; ++i) { Assert.AreEqual(false, bits.Get(i)); } if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } Assert.AreEqual(true, bits.Get(doc)); } } }
private void DoUnionScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) { //if (DEBUG) { // System.out.println(" doUnionScoring"); //} int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; // NOTE: this is basically a specialized version of // BooleanScorer, to the minShouldMatch=N-1 case, but // carefully tracking which dimension failed to match int nextChunkStart = CHUNK; while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} int filledCount = 0; int docID = baseScorer.DocID(); //if (DEBUG) { // System.out.println(" base docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; //if (DEBUG) { // System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id")); //} // Mark slot as valid: Debug.Assert(docIDs[slot] != docID, "slot=" + slot + " docID=" + docID); docIDs[slot] = docID; scores[slot] = baseScorer.Score(); filledSlots[filledCount++] = slot; missingDims[slot] = 0; counts[slot] = 1; docID = baseScorer.NextDoc(); } if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // First drill-down dim, basically adds SHOULD onto // the baseQuery: //if (DEBUG) { // System.out.println(" dim=0 [" + dims[0].dim + "]"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { docID = disi.DocID(); //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=2"); //} missingDims[slot] = 1; counts[slot] = 2; } docID = disi.NextDoc(); } } for (int dim = 1; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { docID = disi.DocID(); //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // This doc is still in the running... // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } docID = disi.NextDoc(); } } } // Collect: //System.out.println(" now collect: " + filledCount + " hits"); for (int i = 0; i < filledCount; i++) { // NOTE: This is actually in-order collection, // because we only accept docs originally returned by // the baseScorer (ie that Scorer is AND'd) int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} //System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims); if (counts[slot] == 1 + numDims) { //System.out.println(" hit"); CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { //System.out.println(" sw"); CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/// <summary> /// Used when drill downs are highly constraining vs /// baseQuery. /// </summary> private void DoDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) { int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; //if (DEBUG) { // System.out.println(" doDrillDownAdvanceScoring"); //} // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; int nextChunkStart = CHUNK; FixedBitSet seen = new FixedBitSet(CHUNK); while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} // First dim: //if (DEBUG) { // System.out.println(" dim0"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { int docID = disi.DocID(); while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { seen.Set(slot); // Mark slot as valid: //if (DEBUG) { // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 1; counts[slot] = 1; } docID = disi.NextDoc(); } } // Second dim: //if (DEBUG) { // System.out.println(" dim1"); //} disi = disis[1]; if (disi != null) { int docID = disi.DocID(); while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { // Mark slot as valid: seen.Set(slot); //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 0; counts[slot] = 1; } else { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= 1) { missingDims[slot] = 2; counts[slot] = 2; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id")); //} } else { counts[slot] = 1; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id")); //} } } docID = disi.NextDoc(); } } // After this we can "upgrade" to conjunction, because // any doc not seen by either dim 0 or dim 1 cannot be // a hit or a near miss: //if (DEBUG) { // System.out.println(" baseScorer"); //} // Fold in baseScorer, using advance: int filledCount = 0; int slot0 = 0; while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1) { int ddDocID = docIDs[slot0]; Debug.Assert(ddDocID != -1); int baseDocID = baseScorer.DocID(); if (baseDocID < ddDocID) { baseDocID = baseScorer.Advance(ddDocID); } if (baseDocID == ddDocID) { //if (DEBUG) { // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} scores[slot0] = baseScorer.Score(); filledSlots[filledCount++] = slot0; counts[slot0]++; } else { //if (DEBUG) { // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} docIDs[slot0] = -1; // TODO: we could jump slot0 forward to the // baseDocID ... but we'd need to set docIDs for // intervening slots to -1 } slot0++; } seen.Clear(0, CHUNK); if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // TODO: factor this out & share w/ union scorer, // except we start from dim=2 instead: for (int dim = 2; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { int docID = disi.DocID(); while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } // TODO: sometimes use advance? docID = disi.NextDoc(); } } } // Collect: //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} for (int i = 0; i < filledCount; i++) { int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} if (counts[slot] == 1 + numDims) { CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/// <summary> /// Used when base query is highly constraining vs the /// drilldowns, or when the docs must be scored at once /// (i.e., like BooleanScorer2, not BooleanScorer). In /// this case we just .next() on base and .advance() on /// the dim filters. /// </summary> private void DoQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors, Bits[] bits, Collector[] bitsSidewaysCollectors) { //if (DEBUG) { // System.out.println(" doQueryFirstScoring"); //} int docID = baseScorer.DocID(); while (docID != DocsEnum.NO_MORE_DOCS) { Collector failedCollector = null; for (int i = 0; i < disis.Length; i++) { // TODO: should we sort this 2nd dimension of // docsEnums from most frequent to least? DocIdSetIterator disi = disis[i]; if (disi != null && disi.DocID() < docID) { disi.Advance(docID); } if (disi == null || disi.DocID() > docID) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.NextDoc(); goto nextDocContinue; } else { failedCollector = sidewaysCollectors[i]; } } } // TODO: for the "non-costly Bits" we really should // have passed them down as acceptDocs, but // unfortunately we cannot distinguish today betwen // "bits() is so costly that you should apply it last" // from "bits() is so cheap that you should apply it // everywhere down low" // Fold in Filter Bits last, since they may be costly: for (int i = 0; i < bits.Length; i++) { if (bits[i].Get(docID) == false) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.NextDoc(); goto nextDocContinue; } else { failedCollector = bitsSidewaysCollectors[i]; } } } collectDocID = docID; // TODO: we could score on demand instead since we are // daat here: collectScore = baseScorer.Score(); if (failedCollector == null) { // Hit passed all filters, so it's "real": CollectHit(collector, sidewaysCollectors, bitsSidewaysCollectors); } else { // Hit missed exactly one filter: CollectNearMiss(failedCollector); } docID = baseScorer.NextDoc(); nextDocContinue :; } nextDocBreak :; }
internal void Adjust() { Doc = Disi.DocID(); }
internal HeapedDisiDoc(DocIdSetIterator disi) : this(disi, disi.DocID()) { }