private static FixedBitSet ToFixedBitSet(DocIdSetIterator iterator, int numBits) { var set = new FixedBitSet(numBits); int doc; while ((doc = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { set.Set(doc); } return(set); }
/// <summary> Create a SortedVIntList.</summary> /// <param name="docIdSetIterator"> An iterator providing document numbers as a set of integers. /// This DocIdSetIterator is iterated completely when this constructor /// is called and it must provide the integers in non /// decreasing order. /// </param> public SortedVIntList(DocIdSetIterator docIdSetIterator) { SortedVIntListBuilder builder = new SortedVIntListBuilder(this); int doc; while ((doc = docIdSetIterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { builder.AddInt(doc); } builder.Done(); }
/// <summary> /// Encode the document ids from a DocIdSetIterator. </summary> /// <param name="disi"> This DocIdSetIterator should provide document ids that are consistent /// with <c>numValues</c> and <c>upperBound</c> as provided to the constructor. </param> public virtual void EncodeFromDisi(DocIdSetIterator disi) { while (efEncoder.numEncoded < efEncoder.numValues) { int x = disi.NextDoc(); if (x == DocIdSetIterator.NO_MORE_DOCS) { throw new ArgumentException("disi: " + disi.ToString() + "\nhas " + efEncoder.numEncoded + " docs, but at least " + efEncoder.numValues + " are required."); } efEncoder.EncodeNext(x); } }
internal virtual void DoIterate2(BitSet a, FixedBitSet b) { int aa = -1, bb = -1; DocIdSetIterator iterator = b.GetIterator(); do { aa = a.NextSetBit(aa + 1); bb = Random.NextBoolean() ? iterator.NextDoc() : iterator.Advance(bb + 1); Assert.AreEqual(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb); } while (aa >= 0); }
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.AtomicReader) { int maxDoc = m_input.MaxDoc; FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: DocIdSet docs = preserveFilter.GetDocIdSet(context, null); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { bits.Or(it); } } if (negateFilter) { bits.Flip(0, maxDoc); } if (m_input.HasDeletions) { IBits oldLiveDocs = m_input.LiveDocs; Debug.Assert(oldLiveDocs != null); DocIdSetIterator it = bits.GetIterator(); for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc()) { if (!oldLiveDocs.Get(i)) { // we can safely modify the current bit, as the iterator already stepped over it: bits.Clear(i); } } } this.liveDocs = bits; this.numDocs = bits.Cardinality(); }
public void TestMissingTermAndField() { string fieldName = @"field1"; Directory rd = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, rd); Document doc = new Document(); doc.Add(NewStringField(fieldName, @"value1", Field.Store.NO)); w.AddDocument(doc); IndexReader reader = SlowCompositeReaderWrapper.Wrap(w.GetReader()); assertTrue(reader.Context is AtomicReaderContext); var context = (AtomicReaderContext)reader.Context; w.Dispose(); DocIdSet idSet = TermFilter(fieldName, @"value1").GetDocIdSet(context, context.AtomicReader.LiveDocs); assertNotNull(@"must not be null", idSet); DocIdSetIterator iter = idSet.GetIterator(); assertEquals(iter.NextDoc(), 0); assertEquals(iter.NextDoc(), DocIdSetIterator.NO_MORE_DOCS); idSet = TermFilter(fieldName, @"value2").GetDocIdSet(context, context.AtomicReader.LiveDocs); assertNull(@"must be null", idSet); idSet = TermFilter(@"field2", @"value1").GetDocIdSet(context, context.AtomicReader.LiveDocs); assertNull(@"must be null", idSet); reader.Dispose(); rd.Dispose(); }
private void Initialize() { it1 = parent.innerSet.Iterator(); try { if ((innerDocid = it1.NextDoc()) == DocIdSetIterator.NO_MORE_DOCS) { it1 = null; } } catch { } }
private void TstFilterCard(string mes, int expected, Filter filt) { DocIdSet docIdSet = filt.GetDocIdSet(reader.AtomicContext, reader.LiveDocs); int actual = 0; if (docIdSet != null) { DocIdSetIterator disi = docIdSet.GetIterator(); while (disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; } } assertEquals(mes, expected, actual); }
private void Initialize() { it1 = innerSet.GetIterator(); try { if ((innerDocid = it1.NextDoc()) == DocIdSetIterator.NO_MORE_DOCS) { it1 = null; } } catch (Exception) { //e.printStackTrace(); } }
private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { // LUCENENET specific - performance is significantly better if we instantiate // this outside of the outer loop. BytesRef bytesRef = new BytesRef(); foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(m_indexFieldName); if (dv is null) // this reader does not have DocValues for the requested category list { continue; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { dv.Get(doc, bytesRef); var bytes = bytesRef.Bytes; int end = bytesRef.Offset + bytesRef.Length; int ord = 0; int offset = bytesRef.Offset; int prev = 0; while (offset < end) { byte b = bytes[offset++]; if (b <= sbyte.MaxValue) // LUCENENET: Optimized equivalent of "if ((sbyte)b >= 0)" { prev = ord = ((ord << 7) | b) + prev; ++m_values[ord]; ord = 0; } else { ord = (ord << 7) | (b & 0x7F); } } } } Rollup(); }
public override int NextDoc() { if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) { return(DocIdSetIterator.NO_MORE_DOCS); } DocIdSetIterator dcit = iterators[0]; int target = dcit.NextDoc(); int size = iterators.Length; int skip = 0; int i = 1; while (i < size) { if (i != skip) { dcit = iterators[i]; int docid = dcit.Advance(target); if (docid > target) { target = docid; if (i != 0) { skip = i; i = 0; continue; } else { skip = 0; } } } i++; } // if(target != DocIdSetIterator.NO_MORE_DOCS) // _interSectionResult.Add(target); return(lastReturn = target); }
// Delete by query private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader) { long delCount = 0; AtomicReaderContext readerContext = reader.AtomicContext; bool any = false; foreach (QueryAndLimit ent in queriesIter) { Query query = ent.Query; int limit = ent.Limit; DocIdSet docs = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { while (true) { int doc = it.NextDoc(); if (doc >= limit) { break; } if (!any) { rld.InitWritableLiveDocs(); any = true; } if (rld.Delete(doc)) { delCount++; } } } } } return(delCount); }
private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(indexFieldName); if (dv == null) // this reader does not have DocValues for the requested category list { continue; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; BytesRef bytesRef = new BytesRef(); while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { dv.Get(doc, bytesRef); var bytes = bytesRef.Bytes; int end = bytesRef.Offset + bytesRef.Length; int ord = 0; int offset = bytesRef.Offset; int prev = 0; while (offset < end) { byte b = bytes[offset++]; if ((sbyte)b >= 0) { prev = ord = ((ord << 7) | b) + prev; ++values[ord]; ord = 0; } else { ord = (ord << 7) | (b & 0x7F); } } } } Rollup(); }
public override int Size() { // Do the size if we haven't done it so far. if (size < 0) { DocIdSetIterator dcit = Iterator(); size = 0; try { while (dcit.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { size++; } } catch { return(-1); } } return(size); }
public override int Size() { if (_size == INVALID) { _size = 0; DocIdSetIterator it = this.Iterator(); try { while (it.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { _size++; } } catch { _size = INVALID; } } return(_size); }
private void SumValues(IList <MatchingDocs> matchingDocs, bool keepScores, ValueSource valueSource) { FakeScorer scorer = new FakeScorer(); IDictionary context = new Dictionary <string, Scorer>(); if (keepScores) { context["scorer"] = scorer; } Int32sRef scratch = new Int32sRef(); foreach (MatchingDocs hits in matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context); int scoresIdx = 0; float[] scores = hits.Scores; FunctionValues functionValues = valueSource.GetValues(context, hits.Context); DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.Get(doc, scratch); if (keepScores) { scorer.docID = doc; scorer.score = scores[scoresIdx++]; } float value = (float)functionValues.DoubleVal(doc); for (int i = 0; i < scratch.Length; i++) { m_values[scratch.Int32s[i]] += value; } } } Rollup(); }
private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { Int32sRef scratch = new Int32sRef(); foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context); DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.Get(doc, scratch); for (int i = 0; i < scratch.Length; i++) { m_values[scratch.Int32s[scratch.Offset + i]]++; } } } Rollup(); }
public static string AsString(this DocIdSet docIdSet) { DocIdSetIterator iter = docIdSet.Iterator(); StringBuilder buf = new StringBuilder(); bool firstTime = true; buf.Append("["); while (iter.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (firstTime) { firstTime = false; } else { buf.Append(","); } buf.Append(iter.DocID()); } buf.Append("]"); return(buf.ToString()); }
private void SumValues(IList <FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName); foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { BinaryDocValues dv = hits.Context.AtomicReader.GetBinaryDocValues(m_indexFieldName); if (dv == null) // this reader does not have DocValues for the requested category list { continue; } BytesRef scratch = new BytesRef(); DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); // TODO: use OrdinalsReader? we'd need to add a // BytesRef getAssociation()? dv.Get(doc, scratch); byte[] bytes = scratch.Bytes; int end = scratch.Offset + scratch.Length; int offset = scratch.Offset; while (offset < end) { int ord = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF); offset += 4; int value = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF); offset += 4; m_values[ord] += J2N.BitConversion.Int32BitsToSingle(value); } } } }
public virtual void Search(Weight weight, Filter filter, Collector collector, int start, IBoboMapFunctionWrapper mapReduceWrapper) { FacetValidator validator = CreateFacetValidator(); int target = 0; if (filter == null) { for (int i = 0; i < _subReaders.Length; i++) { // search each subreader int docStart = start + _docStarts[i]; collector.SetNextReader(_subReaders[i], docStart); validator.SetNextReader(_subReaders[i], docStart); Scorer scorer = weight.Scorer(_subReaders[i], true, true); if (scorer != null) { collector.SetScorer(scorer); target = scorer.NextDoc(); while (target != DocIdSetIterator.NO_MORE_DOCS) { if (validator.Validate(target)) { collector.Collect(target); target = scorer.NextDoc(); } else { target = validator._nextTarget; target = scorer.Advance(target); } } } if (mapReduceWrapper != null) { mapReduceWrapper.MapFullIndexReader(_subReaders[i], validator.GetCountCollectors()); } } return; } for (int i = 0; i < _subReaders.Length; i++) { DocIdSet filterDocIdSet = filter.GetDocIdSet(_subReaders[i]); if (filterDocIdSet == null) { return; //shall we use return or continue here ?? } int docStart = start + _docStarts[i]; collector.SetNextReader(_subReaders[i], docStart); validator.SetNextReader(_subReaders[i], docStart); Scorer scorer = weight.Scorer(_subReaders[i], true, false); if (scorer != null) { collector.SetScorer(scorer); DocIdSetIterator filterDocIdIterator = filterDocIdSet.Iterator(); // CHECKME: use ConjunctionScorer here? if (filterDocIdIterator == null) { continue; } int doc = -1; target = filterDocIdIterator.NextDoc(); if (mapReduceWrapper == null) { while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.Advance(target); } if (doc == target) // permitted by filter { if (validator.Validate(doc)) { collector.Collect(doc); target = filterDocIdIterator.NextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.Advance(validator._nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } target = filterDocIdIterator.Advance(doc); } } } else { //MapReduce wrapper is not null while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.Advance(target); } if (doc == target) // permitted by filter { if (validator.Validate(doc)) { mapReduceWrapper.MapSingleDocument(doc, _subReaders[i]); collector.Collect(doc); target = filterDocIdIterator.NextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.Advance(validator._nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } target = filterDocIdIterator.Advance(doc); } } mapReduceWrapper.FinalizeSegment(_subReaders[i], validator.GetCountCollectors()); } } } }
/// <summary> /// Used when drill downs are highly constraining vs /// baseQuery. /// </summary> private void DoDrillDownAdvanceScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors) { int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; //if (DEBUG) { // System.out.println(" doDrillDownAdvanceScoring"); //} // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; int nextChunkStart = CHUNK; FixedBitSet seen = new FixedBitSet(CHUNK); while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} // First dim: //if (DEBUG) { // System.out.println(" dim0"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { seen.Set(slot); // Mark slot as valid: //if (DEBUG) { // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 1; counts[slot] = 1; } docID = disi.NextDoc(); } } // Second dim: //if (DEBUG) { // System.out.println(" dim1"); //} disi = disis[1]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] != docID) { // Mark slot as valid: seen.Set(slot); //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id")); //} docIDs[slot] = docID; missingDims[slot] = 0; counts[slot] = 1; } else { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= 1) { missingDims[slot] = 2; counts[slot] = 2; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id")); //} } else { counts[slot] = 1; //if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id")); //} } } docID = disi.NextDoc(); } } // After this we can "upgrade" to conjunction, because // any doc not seen by either dim 0 or dim 1 cannot be // a hit or a near miss: //if (DEBUG) { // System.out.println(" baseScorer"); //} // Fold in baseScorer, using advance: int filledCount = 0; int slot0 = 0; while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1) { int ddDocID = docIDs[slot0]; if (Debugging.AssertsEnabled) { Debugging.Assert(ddDocID != -1); } int baseDocID = baseScorer.DocID; if (baseDocID < ddDocID) { baseDocID = baseScorer.Advance(ddDocID); } if (baseDocID == ddDocID) { //if (DEBUG) { // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} scores[slot0] = baseScorer.GetScore(); filledSlots[filledCount++] = slot0; counts[slot0]++; } else { //if (DEBUG) { // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); //} docIDs[slot0] = -1; // TODO: we could jump slot0 forward to the // baseDocID ... but we'd need to set docIDs for // intervening slots to -1 } slot0++; } seen.Clear(0, CHUNK); if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // TODO: factor this out & share w/ union scorer, // except we start from dim=2 instead: for (int dim = 2; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { int docID = disi.DocID; while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } // TODO: sometimes use advance? docID = disi.NextDoc(); } } } // Collect: //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} for (int i = 0; i < filledCount; i++) { int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} if (counts[slot] == 1 + numDims) { CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (Verbose) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (Verbose) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (Verbose) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (Verbose) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (Verbose) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousClass2(scoreDocsInOrder, actualResult, topScoreDocCollector)); // Asserting bit set... if (Verbose) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (Verbose) { Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc)); Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score)); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
/// <summary> /// Does in-place AND NOT of the bits provided by the /// iterator. /// </summary> public void AndNot(DocIdSetIterator iter) { if (iter is OpenBitSetIterator && iter.DocID() == -1) { OpenBitSetIterator obs = (OpenBitSetIterator)iter; AndNot(obs.Arr, obs.Words); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): obs.Advance(NumBits); } else if (iter is FixedBitSetIterator && iter.DocID() == -1) { FixedBitSetIterator fbs = (FixedBitSetIterator)iter; AndNot(fbs.bits, fbs.NumWords); // advance after last doc that would be accepted if standard // iteration is used (to exhaust it): fbs.Advance(NumBits); } else { int doc; while ((doc = iter.NextDoc()) < NumBits) { Clear(doc); } } }
/// <summary> /// Does in-place XOR of the bits provided by the iterator. </summary> public void Xor(DocIdSetIterator iter) { int doc; while ((doc = iter.NextDoc()) < NumBits) { Flip(doc, doc + 1); } }
private void Count(ValueSource valueSource, IList <MatchingDocs> matchingDocs) { Int64Range[] ranges = (Int64Range[])this.m_ranges; Int64RangeCounter counter = new Int64RangeCounter(ranges); int missingCount = 0; foreach (MatchingDocs hits in matchingDocs) { FunctionValues fv = valueSource.GetValues(Collections.EmptyMap <string, object>(), hits.Context); m_totCount += hits.TotalHits; IBits bits; if (m_fastMatchFilter != null) { DocIdSet dis = m_fastMatchFilter.GetDocIdSet(hits.Context, null); if (dis is null) { // No documents match continue; } bits = dis.Bits; if (bits is null) { throw new ArgumentException("fastMatchFilter does not implement DocIdSet.Bits"); } } else { bits = null; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.Get(doc) == false) { doc++; continue; } // Skip missing docs: if (fv.Exists(doc)) { counter.Add(fv.Int64Val(doc)); } else { missingCount++; } } } int x = counter.FillCounts(m_counts); missingCount += x; //System.out.println("totCount " + totCount + " missingCount " + counter.missingCount); m_totCount -= missingCount; }
private void DoUnionScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors) { //if (DEBUG) { // System.out.println(" doUnionScoring"); //} int maxDoc = context.Reader.MaxDoc; int numDims = dims.Length; // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; // NOTE: this is basically a specialized version of // BooleanScorer, to the minShouldMatch=N-1 case, but // carefully tracking which dimension failed to match int nextChunkStart = CHUNK; while (true) { //if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); //} int filledCount = 0; int docID = baseScorer.DocID; //if (DEBUG) { // System.out.println(" base docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; //if (DEBUG) { // System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id")); //} // Mark slot as valid: if (Debugging.AssertsEnabled) { Debugging.Assert(docIDs[slot] != docID, () => "slot=" + slot + " docID=" + docID); } docIDs[slot] = docID; scores[slot] = baseScorer.GetScore(); filledSlots[filledCount++] = slot; missingDims[slot] = 0; counts[slot] = 1; docID = baseScorer.NextDoc(); } if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // First drill-down dim, basically adds SHOULD onto // the baseQuery: //if (DEBUG) { // System.out.println(" dim=0 [" + dims[0].dim + "]"); //} DocIdSetIterator disi = disis[0]; if (disi != null) { docID = disi.DocID; //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=2"); //} missingDims[slot] = 1; counts[slot] = 2; } docID = disi.NextDoc(); } } for (int dim = 1; dim < numDims; dim++) { //if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); //} disi = disis[dim]; if (disi != null) { docID = disi.DocID; //if (DEBUG) { // System.out.println(" start docID=" + docID); //} while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim) { // This doc is still in the running... // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { //if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); //} missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { //if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); //} counts[slot] = dim + 1; } } docID = disi.NextDoc(); } } } // Collect: //System.out.println(" now collect: " + filledCount + " hits"); for (int i = 0; i < filledCount; i++) { // NOTE: This is actually in-order collection, // because we only accept docs originally returned by // the baseScorer (ie that Scorer is AND'd) int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; //if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); //} //System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims); if (counts[slot] == 1 + numDims) { //System.out.println(" hit"); CollectHit(collector, sidewaysCollectors); } else if (counts[slot] == numDims) { //System.out.println(" sw"); CollectNearMiss(sidewaysCollectors[missingDims[slot]]); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/// <summary> /// Convenience method to add the content of a <seealso cref="DocIdSetIterator"/> to this builder. </summary> public virtual Builder Add(DocIdSetIterator it) { for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc()) { Add(doc); } return this; }
/// <summary> /// Does all the "real work" of tallying up the counts. </summary> private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("ssdv count"); MultiDocValues.OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in // matchingDocs... if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping; } else { ordinalMap = null; } IndexReader origReader = state.OrigReader; foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { var reader = hits.context.AtomicReader; //System.out.println(" reader=" + reader); // LUCENE-5090: make sure the provided reader context "matches" // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: if (!Equals(ReaderUtil.GetTopLevelContext(hits.context).Reader, origReader)) { throw new ThreadStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } SortedSetDocValues segValues = reader.GetSortedSetDocValues(field); if (segValues == null) { continue; } DocIdSetIterator docs = hits.bits.GetIterator(); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a // merge-sort-PQ in the end to only "resolve to // global" those seg ords that can compete, if we know // we just want top K? ie, this is the same algo // that'd be used for merging facets across shards // (distributed faceting). but this has much higher // temp ram req'ts (sum of number of ords across all // segs) if (ordinalMap != null) { int segOrd = hits.context.Ord; int numSegOrds = (int)segValues.ValueCount; if (hits.totalHits < numSegOrds / 10) { //System.out.println(" remap as-we-go"); // Remap every ord to global ord as we iterate: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++; term = (int)segValues.NextOrd(); } } } else { //System.out.println(" count in seg ord first"); // First count in seg-ord space: int[] segCounts = new int[numSegOrds]; int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" ord=" + term); segCounts[term]++; term = (int)segValues.NextOrd(); } } // Then, migrate to global ords: for (int ord = 0; ord < numSegOrds; ord++) { int count = segCounts[ord]; if (count != 0) { //System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count; } } } } else { // No ord mapping (e.g., single segment index): // just aggregate directly into counts: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { counts[term]++; term = (int)segValues.NextOrd(); } } } } }
/// <summary> /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="BitSet"/>. /// </summary> #pragma warning disable xUnit1013 public virtual void AssertEquals(int numBits, BitSet ds1, T ds2) #pragma warning restore xUnit1013 { // nextDoc DocIdSetIterator it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { Assert.AreEqual(-1, it2.DocID); for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1)) { Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID); } // nextDoc / advance it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) { if (Random.NextBoolean()) { doc = ds1.NextSetBit(doc + 1); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } else { int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1)); doc = ds1.NextSetBit(target); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.Advance(target)); Assert.AreEqual(doc, it2.DocID); } } } // bits() IBits bits = ds2.Bits; if (bits != null) { // test consistency between bits and iterator it2 = ds2.GetIterator(); for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc()) { int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc; for (int i = previousDoc + 1; i < max; ++i) { Assert.AreEqual(false, bits.Get(i)); } if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } Assert.AreEqual(true, bits.Get(doc)); } } }
public override int NextDoc() { return(currentDoc = matchingDocsIterator.NextDoc()); }
/// <summary> /// Create a sampled of the given hits. /// </summary> private MatchingDocs CreateSample(MatchingDocs docs) { int maxdoc = docs.Context.Reader.MaxDoc; // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse FixedBitSet sampleDocs = new FixedBitSet(maxdoc); int binSize = (int)(1.0 / samplingRate); try { int counter = 0; int limit, randomIndex; if (leftoverBin != NOT_CALCULATED) { limit = leftoverBin; // either NOT_CALCULATED, which means we already sampled from that bin, // or the next document to sample randomIndex = leftoverIndex; } else { limit = binSize; randomIndex = random.NextInt32(binSize); } DocIdSetIterator it = docs.Bits.GetIterator(); for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc()) { if (counter == randomIndex) { sampleDocs.Set(doc); } counter++; if (counter >= limit) { counter = 0; limit = binSize; randomIndex = random.NextInt32(binSize); } } if (counter == 0) { // we either exhausted the bin and the iterator at the same time, or // this segment had no results. in the latter case we might want to // carry leftover to the next segment as is, but that complicates the // code and doesn't seem so important. leftoverBin = leftoverIndex = NOT_CALCULATED; } else { leftoverBin = limit - counter; if (randomIndex > counter) { // the document to sample is in the next bin leftoverIndex = randomIndex - counter; } else if (randomIndex < counter) { // we sampled a document from the bin, so just skip over remaining // documents in the bin in the next segment. leftoverIndex = NOT_CALCULATED; } } return(new MatchingDocs(docs.Context, sampleDocs, docs.TotalHits, null)); } catch (IOException) { throw new Exception(); } }
public override int NextDoc() { return(m_docSetIter.NextDoc()); }
private void Count(ValueSource valueSource, IEnumerable <MatchingDocs> matchingDocs) { DoubleRange[] ranges = (DoubleRange[])this.m_ranges; Int64Range[] longRanges = new Int64Range[ranges.Length]; for (int i = 0; i < ranges.Length; i++) { DoubleRange range = ranges[i]; longRanges[i] = new Int64Range(range.Label, NumericUtils.DoubleToSortableInt64(range.minIncl), true, NumericUtils.DoubleToSortableInt64(range.maxIncl), true); } Int64RangeCounter counter = new Int64RangeCounter(longRanges); int missingCount = 0; foreach (MatchingDocs hits in matchingDocs) { FunctionValues fv = valueSource.GetValues(new Dictionary <string, object>(), hits.Context); m_totCount += hits.TotalHits; IBits bits; if (m_fastMatchFilter != null) { DocIdSet dis = m_fastMatchFilter.GetDocIdSet(hits.Context, null); if (dis == null) { // No documents match continue; } bits = dis.Bits; if (bits == null) { throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits"); } } else { bits = null; } DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.Get(doc) == false) { doc++; continue; } // Skip missing docs: if (fv.Exists(doc)) { counter.Add(NumericUtils.DoubleToSortableInt64(fv.DoubleVal(doc))); } else { missingCount++; } } } missingCount += counter.FillCounts(m_counts); m_totCount -= missingCount; }