/// <summary> /// pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser /// of the two colliding pps. Note that there can only be one collision, as by the initialization /// there were no collisions before pp was advanced. /// </summary> private bool AdvanceRpts(PhrasePositions pp) { if (pp.rptGroup < 0) { return(true); // not a repeater } PhrasePositions[] rg = rptGroups[pp.rptGroup]; FixedBitSet bits = new FixedBitSet(rg.Length); // for re-queuing after collisions are resolved int k0 = pp.rptInd; int k; while ((k = Collide(pp)) >= 0) { pp = Lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps if (!AdvancePP(pp)) { return(false); // exhausted } if (k != k0) // careful: mark only those currently in the queue { bits = FixedBitSet.EnsureCapacity(bits, k); bits.Set(k); // mark that pp2 need to be re-queued } } // collisions resolved, now re-queue // empty (partially) the queue until seeing all pps advanced for resolving collisions int n = 0; // TODO would be good if we can avoid calling cardinality() in each iteration! int numBits = bits.Length; // larges bit we set while (bits.Cardinality() > 0) { PhrasePositions pp2 = pq.Pop(); rptStack[n++] = pp2; if (pp2.rptGroup >= 0 && pp2.rptInd < numBits && bits.Get(pp2.rptInd)) // this bit may not have been set { bits.Clear(pp2.rptInd); } } // add back to queue for (int i = n - 1; i >= 0; i--) { pq.Add(rptStack[i]); } return(true); }
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.AtomicReader) { int maxDoc = m_input.MaxDoc; FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: DocIdSet docs = preserveFilter.GetDocIdSet(context, null); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { bits.Or(it); } } if (negateFilter) { bits.Flip(0, maxDoc); } if (m_input.HasDeletions) { IBits oldLiveDocs = m_input.LiveDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(oldLiveDocs != null); } DocIdSetIterator it = bits.GetIterator(); for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc()) { if (!oldLiveDocs.Get(i)) { // we can safely modify the current bit, as the iterator already stepped over it: bits.Clear(i); } } } this.liveDocs = bits; this.numDocs = bits.Cardinality(); }
protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count; Assert.AreEqual(termCount, terms.Size()); Assert.AreEqual(termCount, terms.SumDocFreq); Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions()); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets()); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads()); HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); foreach (string term in tk.Freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparator); TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { BytesRef nextTerm = termsEnum.Next(); Assert.AreEqual(sortedTerms[i], nextTerm); Assert.AreEqual(sortedTerms[i], termsEnum.Term()); Assert.AreEqual(1, termsEnum.DocFreq()); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID()); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq()); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions() || terms.HasOffsets()) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq(); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int?> indexes; if (terms.HasPositions()) { indexes = tk.PositionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()]; Assert.IsNotNull(indexes); } if (terms.HasPositions()) { bool foundPosition = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets()) { bool foundOffset = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset()) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads()) { bool foundPayload = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload)) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < 5; ++i) { if (Random().NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes))); } } }
protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = (new HashSet<string>(Arrays.AsList(tk.Terms))).Count; Assert.AreEqual(termCount, terms.Size()); Assert.AreEqual(termCount, terms.SumDocFreq); Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions()); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets()); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads()); HashSet<BytesRef> uniqueTerms = new HashSet<BytesRef>(); foreach (string term in tk.Freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparator); TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { BytesRef nextTerm = termsEnum.Next(); Assert.AreEqual(sortedTerms[i], nextTerm); Assert.AreEqual(sortedTerms[i], termsEnum.Term()); Assert.AreEqual(1, termsEnum.DocFreq()); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID()); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq()); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions() || terms.HasOffsets()) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq(); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet<int?> indexes; if (terms.HasPositions()) { indexes = tk.PositionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()]; Assert.IsNotNull(indexes); } if (terms.HasPositions()) { bool foundPosition = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets()) { bool foundOffset = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset()) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads()) { bool foundPayload = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload)) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < 5; ++i) { if (Random().NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes))); } } }
public void DeleteDocument(int n) { liveDocs.Clear(n); }
/// <summary> /// Used when drill downs are highly constraining vs /// baseQuery. /// </summary> private void DoDrillDownAdvanceScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors) { int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; //if (DEBUG) { // System.out.println(" doDrillDownAdvanceScoring"); //} // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; int nextChunkStart = CHUNK; FixedBitSet seen = new FixedBitSet(CHUNK); while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} // First dim: //if (DEBUG) { // System.out.println(" dim0"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { seen.Set(slot); // Mark slot as valid: //if (DEBUG) { // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 1; counts[slot] = 1; } docID = disi.NextDoc(); } } // Second dim: //if (DEBUG) { // System.out.println(" dim1"); //} disi = disis[1]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { // Mark slot as valid: seen.Set(slot); //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 0; counts[slot] = 1; } else { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= 1) { missingDims[slot] = 2; counts[slot] = 2; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id")); //} } else { counts[slot] = 1; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id")); //} } } docID = disi.NextDoc(); } } // After this we can "upgrade" to conjunction, because // any doc not seen by either dim 0 or dim 1 cannot be // a hit or a near miss: //if (DEBUG) { // System.out.println(" baseScorer"); //} // Fold in baseScorer, using advance: int filledCount = 0; int slot0 = 0; while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1) { int ddDocID = docIDs[slot0]; if (Debugging.AssertsEnabled) { Debugging.Assert(ddDocID != -1); } int baseDocID = baseScorer.DocID; if (baseDocID < ddDocID) { baseDocID = baseScorer.Advance(ddDocID); } if (baseDocID == ddDocID) { //if (DEBUG) { // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} scores[slot0] = baseScorer.GetScore(); filledSlots[filledCount++] = slot0; counts[slot0]++; } else { //if (DEBUG) { // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} docIDs[slot0] = -1; // TODO: we could jump slot0 forward to the // baseDocID ... but we'd need to set docIDs for // intervening slots to -1 } slot0++; } seen.Clear(0, CHUNK); if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // TODO: factor this out & share w/ union scorer, // except we start from dim=2 instead: for (int dim = 2; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } // TODO: sometimes use advance? docID = disi.NextDoc(); } } } // Collect: //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} for (int i = 0; i < filledCount; i++) { int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} if (counts[slot] == 1 + numDims) { CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = new JCG.HashSet <string>(tk.terms).Count; Assert.AreEqual((long)termCount, terms.Count); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); foreach (string term in tk.freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparer); TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual(sortedTerms[i], termsEnum.Term); Assert.AreEqual(1, termsEnum.DocFreq); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID); Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions || terms.HasOffsets) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq; Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int> indexes; if (terms.HasPositions) { indexes = tk.positionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; Assert.IsNotNull(indexes); } if (terms.HasPositions) { bool foundPosition = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets) { bool foundOffset = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads) { bool foundPayload = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) when(e.IsException()) { // ok } catch (Exception e) when(e.IsAssertionError()) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsFalse(termsEnum.MoveNext()); for (int i = 0; i < 5; ++i) { if (Random.NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); } } }