/// <summary> /// Returns <see cref="DocsEnum"/> for the specified term. /// This will return <c>null</c> if either the field or /// term does not exist. /// </summary> /// <seealso cref="TermsEnum.Docs(IBits, DocsEnum)"/> public DocsEnum GetTermDocsEnum(Term term) // LUCENENET specific: Renamed from TermDocsEnum() { Debug.Assert(term.Field != null); Debug.Assert(term.Bytes != null); Fields fields = Fields; if (fields != null) { Terms terms = fields.GetTerms(term.Field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); if (termsEnum.SeekExact(term.Bytes)) { return(termsEnum.Docs(LiveDocs, null)); } } } return(null); }
/// <summary> /// checks the terms enum sequentially /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums /// </summary> public void AssertTermsEnumEquals(string info, IndexReader leftReader, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep) { BytesRef term; Bits randomBits = new RandomBits(leftReader.MaxDoc, Random().NextDouble(), Random()); DocsAndPositionsEnum leftPositions = null; DocsAndPositionsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; while ((term = leftTermsEnum.Next()) != null) { Assert.AreEqual(term, rightTermsEnum.Next(), info); AssertTermStatsEquals(info, leftTermsEnum, rightTermsEnum); if (deep) { AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); // with freqs: AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true); AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true); // w/o freqs: AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false); AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false); // with freqs: AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true); AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true); // w/o freqs: AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false); AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false); } } Assert.IsNull(rightTermsEnum.Next(), info); }
/// <summary> /// checks the terms enum sequentially /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums /// </summary> public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep) { BytesRef term; Bits randomBits = new RandomBits(MAXDOC, Random().NextDouble(), Random()); DocsAndPositionsEnum leftPositions = null; DocsAndPositionsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; while ((term = leftTermsEnum.Next()) != null) { Assert.AreEqual(term, rightTermsEnum.Next()); AssertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { // with payloads + off AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); // with payloads only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); // with offsets only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); // with positions only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); // with freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); // with freqs: AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE)); AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); } } Assert.IsNull(rightTermsEnum.Next()); }
public virtual void TestPositionReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random())); BytesRef[] terms; Terms vector = reader.Get(0).Terms(TestFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(TestTerms.Length, vector.Size()); TermsEnum termsEnum = vector.Iterator(null); DocsAndPositionsEnum dpEnum = null; for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(TestTerms[i], term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); int doc = dpEnum.DocID(); Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); doc = dpEnum.DocID(); Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.IsNotNull(dpEnum); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); Assert.AreEqual(j * 10, dpEnum.StartOffset()); Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); } Terms freqVector = reader.Get(0).Terms(TestFields[1]); //no pos, no offset Assert.IsNotNull(freqVector); Assert.AreEqual(TestTerms.Length, freqVector.Size()); termsEnum = freqVector.Iterator(null); Assert.IsNotNull(termsEnum); for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(TestTerms[i], term); Assert.IsNotNull(termsEnum.Docs(null, null)); Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos } reader.Dispose(); }
private void VerifyEnum(ThreadState threadState, string field, BytesRef term, TermsEnum termsEnum, FieldInfo.IndexOptions maxTestOptions, FieldInfo.IndexOptions maxIndexOptions, ISet<Option> options, bool alwaysTestMax) // Maximum options (docs/freqs/positions/offsets) to test: { if (VERBOSE) { Console.WriteLine(" verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions); } // Make sure TermsEnum really is positioned on the // expected term: Assert.AreEqual(term, termsEnum.Term()); // 50% of the time time pass liveDocs: bool useLiveDocs = options.Contains(Option.LIVE_DOCS) && Random().NextBoolean(); Bits liveDocs; if (useLiveDocs) { liveDocs = GlobalLiveDocs; if (VERBOSE) { Console.WriteLine(" use liveDocs"); } } else { liveDocs = null; if (VERBOSE) { Console.WriteLine(" no liveDocs"); } } FieldInfo fieldInfo = CurrentFieldInfos.FieldInfo(field); // NOTE: can be empty list if we are using liveDocs: SeedPostings expected = GetSeedPostings(term.Utf8ToString(), Fields[field][term], useLiveDocs, maxIndexOptions); Assert.AreEqual(expected.DocFreq, termsEnum.DocFreq()); bool allowFreqs = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0; bool doCheckFreqs = allowFreqs && (alwaysTestMax || Random().Next(3) <= 2); bool allowPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; bool doCheckPositions = allowPositions && (alwaysTestMax || Random().Next(3) <= 2); bool allowOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; bool doCheckOffsets = allowOffsets && (alwaysTestMax || Random().Next(3) <= 2); bool doCheckPayloads = options.Contains(Option.PAYLOADS) && allowPositions && fieldInfo.HasPayloads() && (alwaysTestMax || Random().Next(3) <= 2); DocsEnum prevDocsEnum = null; DocsEnum docsEnum; DocsAndPositionsEnum docsAndPositionsEnum; if (!doCheckPositions) { if (allowPositions && Random().Next(10) == 7) { // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9) { prevDocsEnum = threadState.ReuseDocsAndPositionsEnum; } int flags = 0; if (alwaysTestMax || Random().NextBoolean()) { flags |= DocsAndPositionsEnum.FLAG_OFFSETS; } if (alwaysTestMax || Random().NextBoolean()) { flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; } if (VERBOSE) { Console.WriteLine(" get DocsAndPositionsEnum (but we won't check positions) flags=" + flags); } threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags); docsEnum = threadState.ReuseDocsAndPositionsEnum; docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum; } else { if (VERBOSE) { Console.WriteLine(" get DocsEnum"); } if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9) { prevDocsEnum = threadState.ReuseDocsEnum; } threadState.ReuseDocsEnum = termsEnum.Docs(liveDocs, prevDocsEnum, doCheckFreqs ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); docsEnum = threadState.ReuseDocsEnum; docsAndPositionsEnum = null; } } else { if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9) { prevDocsEnum = threadState.ReuseDocsAndPositionsEnum; } int flags = 0; if (alwaysTestMax || doCheckOffsets || Random().Next(3) == 1) { flags |= DocsAndPositionsEnum.FLAG_OFFSETS; } if (alwaysTestMax || doCheckPayloads || Random().Next(3) == 1) { flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; } if (VERBOSE) { Console.WriteLine(" get DocsAndPositionsEnum flags=" + flags); } threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags); docsEnum = threadState.ReuseDocsAndPositionsEnum; docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum; } Assert.IsNotNull(docsEnum, "null DocsEnum"); int initialDocID = docsEnum.DocID(); Assert.AreEqual(-1, initialDocID, "inital docID should be -1" + docsEnum); if (VERBOSE) { if (prevDocsEnum == null) { Console.WriteLine(" got enum=" + docsEnum); } else if (prevDocsEnum == docsEnum) { Console.WriteLine(" got reuse enum=" + docsEnum); } else { Console.WriteLine(" got enum=" + docsEnum + " (reuse of " + prevDocsEnum + " failed)"); } } // 10% of the time don't consume all docs: int stopAt; if (!alwaysTestMax && options.Contains(Option.PARTIAL_DOC_CONSUME) && expected.DocFreq > 1 && Random().Next(10) == 7) { stopAt = Random().Next(expected.DocFreq - 1); if (VERBOSE) { Console.WriteLine(" will not consume all docs (" + stopAt + " vs " + expected.DocFreq + ")"); } } else { stopAt = expected.DocFreq; if (VERBOSE) { Console.WriteLine(" consume all docs"); } } double skipChance = alwaysTestMax ? 0.5 : Random().NextDouble(); int numSkips = expected.DocFreq < 3 ? 1 : TestUtil.NextInt(Random(), 1, Math.Min(20, expected.DocFreq / 3)); int skipInc = expected.DocFreq / numSkips; int skipDocInc = MaxDoc / numSkips; // Sometimes do 100% skipping: bool doAllSkipping = options.Contains(Option.SKIPPING) && Random().Next(7) == 1; double freqAskChance = alwaysTestMax ? 1.0 : Random().NextDouble(); double payloadCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble(); double offsetCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble(); if (VERBOSE) { if (options.Contains(Option.SKIPPING)) { Console.WriteLine(" skipChance=" + skipChance + " numSkips=" + numSkips); } else { Console.WriteLine(" no skipping"); } if (doCheckFreqs) { Console.WriteLine(" freqAskChance=" + freqAskChance); } if (doCheckPayloads) { Console.WriteLine(" payloadCheckChance=" + payloadCheckChance); } if (doCheckOffsets) { Console.WriteLine(" offsetCheckChance=" + offsetCheckChance); } } while (expected.Upto <= stopAt) { if (expected.Upto == stopAt) { if (stopAt == expected.DocFreq) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc(), "DocsEnum should have ended but didn't"); // Common bug is to forget to set this.Doc=NO_MORE_DOCS in the enum!: Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.DocID(), "DocsEnum should have ended but didn't"); } break; } if (options.Contains(Option.SKIPPING) && (doAllSkipping || Random().NextDouble() <= skipChance)) { int targetDocID = -1; if (expected.Upto < stopAt && Random().NextBoolean()) { // Pick target we know exists: int skipCount = TestUtil.NextInt(Random(), 1, skipInc); for (int skip = 0; skip < skipCount; skip++) { if (expected.NextDoc() == DocsEnum.NO_MORE_DOCS) { break; } } } else { // Pick random target (might not exist): int skipDocIDs = TestUtil.NextInt(Random(), 1, skipDocInc); if (skipDocIDs > 0) { targetDocID = expected.DocID() + skipDocIDs; expected.Advance(targetDocID); } } if (expected.Upto >= stopAt) { int target = Random().NextBoolean() ? MaxDoc : DocsEnum.NO_MORE_DOCS; if (VERBOSE) { Console.WriteLine(" now advance to end (target=" + target + ")"); } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.Advance(target), "DocsEnum should have ended but didn't"); break; } else { if (VERBOSE) { if (targetDocID != -1) { Console.WriteLine(" now advance to random target=" + targetDocID + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID()); } else { Console.WriteLine(" now advance to known-exists target=" + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID()); } } int docID = docsEnum.Advance(targetDocID != -1 ? targetDocID : expected.DocID()); Assert.AreEqual(expected.DocID(), docID, "docID is wrong"); } } else { expected.NextDoc(); if (VERBOSE) { Console.WriteLine(" now nextDoc to " + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ")"); } int docID = docsEnum.NextDoc(); Assert.AreEqual(expected.DocID(), docID, "docID is wrong"); if (docID == DocsEnum.NO_MORE_DOCS) { break; } } if (doCheckFreqs && Random().NextDouble() <= freqAskChance) { if (VERBOSE) { Console.WriteLine(" now freq()=" + expected.Freq()); } int freq = docsEnum.Freq(); Assert.AreEqual(expected.Freq(), freq, "freq is wrong"); } if (doCheckPositions) { int freq = docsEnum.Freq(); int numPosToConsume; if (!alwaysTestMax && options.Contains(Option.PARTIAL_POS_CONSUME) && Random().Next(5) == 1) { numPosToConsume = Random().Next(freq); } else { numPosToConsume = freq; } for (int i = 0; i < numPosToConsume; i++) { int pos = expected.NextPosition(); if (VERBOSE) { Console.WriteLine(" now nextPosition to " + pos); } Assert.AreEqual(pos, docsAndPositionsEnum.NextPosition(), "position is wrong"); if (doCheckPayloads) { BytesRef expectedPayload = expected.Payload; if (Random().NextDouble() <= payloadCheckChance) { if (VERBOSE) { Console.WriteLine(" now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.Length)); } if (expectedPayload == null || expectedPayload.Length == 0) { Assert.IsNull(docsAndPositionsEnum.Payload, "should not have payload"); } else { BytesRef payload = docsAndPositionsEnum.Payload; Assert.IsNotNull(payload, "should have payload but doesn't"); Assert.AreEqual(expectedPayload.Length, payload.Length, "payload length is wrong"); for (int byteUpto = 0; byteUpto < expectedPayload.Length; byteUpto++) { Assert.AreEqual(expectedPayload.Bytes[expectedPayload.Offset + byteUpto], payload.Bytes[payload.Offset + byteUpto], "payload bytes are wrong"); } // make a deep copy payload = BytesRef.DeepCopyOf(payload); Assert.AreEqual(payload, docsAndPositionsEnum.Payload, "2nd call to getPayload returns something different!"); } } else { if (VERBOSE) { Console.WriteLine(" skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.Length)); } } } if (doCheckOffsets) { if (Random().NextDouble() <= offsetCheckChance) { if (VERBOSE) { Console.WriteLine(" now check offsets: startOff=" + expected.StartOffset() + " endOffset=" + expected.EndOffset()); } Assert.AreEqual(expected.StartOffset(), docsAndPositionsEnum.StartOffset(), "startOffset is wrong"); Assert.AreEqual(expected.EndOffset(), docsAndPositionsEnum.EndOffset(), "endOffset is wrong"); } else { if (VERBOSE) { Console.WriteLine(" skip check offsets"); } } } else if (fieldInfo.FieldIndexOptions < FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { if (VERBOSE) { Console.WriteLine(" now check offsets are -1"); } Assert.AreEqual(-1, docsAndPositionsEnum.StartOffset(), "startOffset isn't -1"); Assert.AreEqual(-1, docsAndPositionsEnum.EndOffset(), "endOffset isn't -1"); } } } } }
public override DocsEnum Docs(Bits bits, DocsEnum reuse, int flags) { return(Tenum.Docs(bits, reuse, flags)); }
public override DocsEnum Docs(IBits bits, DocsEnum reuse, DocsFlags flags) { return(tenum.Docs(bits, reuse, flags)); }
public virtual void TestPositionReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random)); //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene Terms vector = reader.Get(0).GetTerms(testFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(testTerms.Length, vector.Count); TermsEnum termsEnum = vector.GetEnumerator(); DocsAndPositionsEnum dpEnum = null; for (int i = 0; i < testTerms.Length; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef text = termsEnum.Term; string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(testTerms[i], term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); int doc = dpEnum.DocID; Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(dpEnum.Freq, positions[i].Length); for (int j = 0; j < positions[i].Length; j++) { Assert.AreEqual(positions[i][j], dpEnum.NextPosition()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); doc = dpEnum.DocID; Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.IsNotNull(dpEnum); Assert.AreEqual(dpEnum.Freq, positions[i].Length); for (int j = 0; j < positions[i].Length; j++) { Assert.AreEqual(positions[i][j], dpEnum.NextPosition()); Assert.AreEqual(j * 10, dpEnum.StartOffset); Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); } Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset Assert.IsNotNull(freqVector); Assert.AreEqual(testTerms.Length, freqVector.Count); termsEnum = freqVector.GetEnumerator(); Assert.IsNotNull(termsEnum); for (int i = 0; i < testTerms.Length; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef text = termsEnum.Term; string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(testTerms[i], term); Assert.IsNotNull(termsEnum.Docs(null, null)); Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos } reader.Dispose(); }
protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < outerInstance._terms.Size(); i++) { if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare))) { docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE); float score = outerInstance._scores[outerInstance._ords[i]]; for (int doc = docsEnum.NextDoc(); doc != NO_MORE_DOCS; doc = docsEnum.NextDoc()) { matchingDocs.Set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) { BytesRef spare = new BytesRef(); DocsEnum docsEnum = null; for (int i = 0; i < outerInstance._terms.Size(); i++) { if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare))) { docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE); float score = outerInstance._scores[outerInstance._ords[i]]; for (int doc = docsEnum.NextDoc(); doc != NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // I prefer this: /*if (scores[doc] < score) { scores[doc] = score; matchingDocs.set(doc); }*/ // But this behaves the same as MVInnerScorer and only then the tests will pass: if (!matchingDocs.Get(doc)) { scores[doc] = score; matchingDocs.Set(doc); } } } } }
protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = new JCG.HashSet <string>(tk.terms).Count; Assert.AreEqual((long)termCount, terms.Count); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); foreach (string term in tk.freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparer); TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual(sortedTerms[i], termsEnum.Term); Assert.AreEqual(1, termsEnum.DocFreq); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID); Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions || terms.HasOffsets) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq; Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int> indexes; if (terms.HasPositions) { indexes = tk.positionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; Assert.IsNotNull(indexes); } if (terms.HasPositions) { bool foundPosition = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets) { bool foundOffset = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads) { bool foundPayload = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) when(e.IsException()) { // ok } catch (Exception e) when(e.IsAssertionError()) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsFalse(termsEnum.MoveNext()); for (int i = 0; i < 5; ++i) { if (Random.NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); } } }