Ejemplo n.º 1
0
        /// <summary>
        /// Returns <see cref="DocsEnum"/> for the specified term.
        /// This will return <c>null</c> if either the field or
        /// term does not exist.
        /// </summary>
        /// <seealso cref="TermsEnum.Docs(IBits, DocsEnum)"/>
        public DocsEnum GetTermDocsEnum(Term term) // LUCENENET specific: Renamed from TermDocsEnum()
        {
            Debug.Assert(term.Field != null);
            Debug.Assert(term.Bytes != null);
            Fields fields = Fields;

            if (fields != null)
            {
                Terms terms = fields.GetTerms(term.Field);
                if (terms != null)
                {
                    TermsEnum termsEnum = terms.GetIterator(null);
                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        return(termsEnum.Docs(LiveDocs, null));
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public void AssertTermsEnumEquals(string info, IndexReader leftReader, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef term;
            Bits randomBits = new RandomBits(leftReader.MaxDoc, Random().NextDouble(), Random());
            DocsAndPositionsEnum leftPositions = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum leftDocs = null;
            DocsEnum rightDocs = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next(), info);
                AssertTermStatsEquals(info, leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    // with freqs:
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true);
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true);

                    // w/o freqs:
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false);
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false);

                    // with freqs:
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true);
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true);

                    // w/o freqs:
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false);
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false);
                }
            }
            Assert.IsNull(rightTermsEnum.Next(), info);
        }
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef term;
            Bits randomBits = new RandomBits(MAXDOC, Random().NextDouble(), Random());
            DocsAndPositionsEnum leftPositions = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum leftDocs = null;
            DocsEnum rightDocs = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next());
                AssertTermStats(leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    // with payloads + off
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));
                    // with payloads only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    // with offsets only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    // with positions only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));
                }
            }
            Assert.IsNull(rightTermsEnum.Next());
        }
Ejemplo n.º 4
0
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));

            BytesRef[] terms;
            Terms      vector = reader.Get(0).Terms(TestFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(TestTerms.Length, vector.Size());
            TermsEnum            termsEnum = vector.Iterator(null);
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID();
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID();
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq(), Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset());
                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).Terms(TestFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(TestTerms.Length, freqVector.Size());
            termsEnum = freqVector.Iterator(null);
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
        private void VerifyEnum(ThreadState threadState, string field, BytesRef term, TermsEnum termsEnum, FieldInfo.IndexOptions maxTestOptions, FieldInfo.IndexOptions maxIndexOptions, ISet<Option> options, bool alwaysTestMax)
        // Maximum options (docs/freqs/positions/offsets) to test:
        {
            if (VERBOSE)
            {
                Console.WriteLine("  verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions);
            }

            // Make sure TermsEnum really is positioned on the
            // expected term:
            Assert.AreEqual(term, termsEnum.Term());

            // 50% of the time time pass liveDocs:
            bool useLiveDocs = options.Contains(Option.LIVE_DOCS) && Random().NextBoolean();
            Bits liveDocs;
            if (useLiveDocs)
            {
                liveDocs = GlobalLiveDocs;
                if (VERBOSE)
                {
                    Console.WriteLine("  use liveDocs");
                }
            }
            else
            {
                liveDocs = null;
                if (VERBOSE)
                {
                    Console.WriteLine("  no liveDocs");
                }
            }

            FieldInfo fieldInfo = CurrentFieldInfos.FieldInfo(field);

            // NOTE: can be empty list if we are using liveDocs:
            SeedPostings expected = GetSeedPostings(term.Utf8ToString(), Fields[field][term], useLiveDocs, maxIndexOptions);
            Assert.AreEqual(expected.DocFreq, termsEnum.DocFreq());

            bool allowFreqs = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0;
            bool doCheckFreqs = allowFreqs && (alwaysTestMax || Random().Next(3) <= 2);

            bool allowPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
            bool doCheckPositions = allowPositions && (alwaysTestMax || Random().Next(3) <= 2);

            bool allowOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
            bool doCheckOffsets = allowOffsets && (alwaysTestMax || Random().Next(3) <= 2);

            bool doCheckPayloads = options.Contains(Option.PAYLOADS) && allowPositions && fieldInfo.HasPayloads() && (alwaysTestMax || Random().Next(3) <= 2);

            DocsEnum prevDocsEnum = null;

            DocsEnum docsEnum;
            DocsAndPositionsEnum docsAndPositionsEnum;

            if (!doCheckPositions)
            {
                if (allowPositions && Random().Next(10) == 7)
                {
                    // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum

                    if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                    {
                        prevDocsEnum = threadState.ReuseDocsAndPositionsEnum;
                    }

                    int flags = 0;
                    if (alwaysTestMax || Random().NextBoolean())
                    {
                        flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
                    }
                    if (alwaysTestMax || Random().NextBoolean())
                    {
                        flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
                    }

                    if (VERBOSE)
                    {
                        Console.WriteLine("  get DocsAndPositionsEnum (but we won't check positions) flags=" + flags);
                    }

                    threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags);
                    docsEnum = threadState.ReuseDocsAndPositionsEnum;
                    docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum;
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  get DocsEnum");
                    }
                    if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                    {
                        prevDocsEnum = threadState.ReuseDocsEnum;
                    }
                    threadState.ReuseDocsEnum = termsEnum.Docs(liveDocs, prevDocsEnum, doCheckFreqs ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    docsEnum = threadState.ReuseDocsEnum;
                    docsAndPositionsEnum = null;
                }
            }
            else
            {
                if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                {
                    prevDocsEnum = threadState.ReuseDocsAndPositionsEnum;
                }

                int flags = 0;
                if (alwaysTestMax || doCheckOffsets || Random().Next(3) == 1)
                {
                    flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
                }
                if (alwaysTestMax || doCheckPayloads || Random().Next(3) == 1)
                {
                    flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  get DocsAndPositionsEnum flags=" + flags);
                }

                threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags);
                docsEnum = threadState.ReuseDocsAndPositionsEnum;
                docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum;
            }

            Assert.IsNotNull(docsEnum, "null DocsEnum");
            int initialDocID = docsEnum.DocID();
            Assert.AreEqual(-1, initialDocID, "inital docID should be -1" + docsEnum);

            if (VERBOSE)
            {
                if (prevDocsEnum == null)
                {
                    Console.WriteLine("  got enum=" + docsEnum);
                }
                else if (prevDocsEnum == docsEnum)
                {
                    Console.WriteLine("  got reuse enum=" + docsEnum);
                }
                else
                {
                    Console.WriteLine("  got enum=" + docsEnum + " (reuse of " + prevDocsEnum + " failed)");
                }
            }

            // 10% of the time don't consume all docs:
            int stopAt;
            if (!alwaysTestMax && options.Contains(Option.PARTIAL_DOC_CONSUME) && expected.DocFreq > 1 && Random().Next(10) == 7)
            {
                stopAt = Random().Next(expected.DocFreq - 1);
                if (VERBOSE)
                {
                    Console.WriteLine("  will not consume all docs (" + stopAt + " vs " + expected.DocFreq + ")");
                }
            }
            else
            {
                stopAt = expected.DocFreq;
                if (VERBOSE)
                {
                    Console.WriteLine("  consume all docs");
                }
            }

            double skipChance = alwaysTestMax ? 0.5 : Random().NextDouble();
            int numSkips = expected.DocFreq < 3 ? 1 : TestUtil.NextInt(Random(), 1, Math.Min(20, expected.DocFreq / 3));
            int skipInc = expected.DocFreq / numSkips;
            int skipDocInc = MaxDoc / numSkips;

            // Sometimes do 100% skipping:
            bool doAllSkipping = options.Contains(Option.SKIPPING) && Random().Next(7) == 1;

            double freqAskChance = alwaysTestMax ? 1.0 : Random().NextDouble();
            double payloadCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble();
            double offsetCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble();

            if (VERBOSE)
            {
                if (options.Contains(Option.SKIPPING))
                {
                    Console.WriteLine("  skipChance=" + skipChance + " numSkips=" + numSkips);
                }
                else
                {
                    Console.WriteLine("  no skipping");
                }
                if (doCheckFreqs)
                {
                    Console.WriteLine("  freqAskChance=" + freqAskChance);
                }
                if (doCheckPayloads)
                {
                    Console.WriteLine("  payloadCheckChance=" + payloadCheckChance);
                }
                if (doCheckOffsets)
                {
                    Console.WriteLine("  offsetCheckChance=" + offsetCheckChance);
                }
            }

            while (expected.Upto <= stopAt)
            {
                if (expected.Upto == stopAt)
                {
                    if (stopAt == expected.DocFreq)
                    {
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc(), "DocsEnum should have ended but didn't");

                        // Common bug is to forget to set this.Doc=NO_MORE_DOCS in the enum!:
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.DocID(), "DocsEnum should have ended but didn't");
                    }
                    break;
                }

                if (options.Contains(Option.SKIPPING) && (doAllSkipping || Random().NextDouble() <= skipChance))
                {
                    int targetDocID = -1;
                    if (expected.Upto < stopAt && Random().NextBoolean())
                    {
                        // Pick target we know exists:
                        int skipCount = TestUtil.NextInt(Random(), 1, skipInc);
                        for (int skip = 0; skip < skipCount; skip++)
                        {
                            if (expected.NextDoc() == DocsEnum.NO_MORE_DOCS)
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        // Pick random target (might not exist):
                        int skipDocIDs = TestUtil.NextInt(Random(), 1, skipDocInc);
                        if (skipDocIDs > 0)
                        {
                            targetDocID = expected.DocID() + skipDocIDs;
                            expected.Advance(targetDocID);
                        }
                    }

                    if (expected.Upto >= stopAt)
                    {
                        int target = Random().NextBoolean() ? MaxDoc : DocsEnum.NO_MORE_DOCS;
                        if (VERBOSE)
                        {
                            Console.WriteLine("  now advance to end (target=" + target + ")");
                        }
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.Advance(target), "DocsEnum should have ended but didn't");
                        break;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            if (targetDocID != -1)
                            {
                                Console.WriteLine("  now advance to random target=" + targetDocID + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID());
                            }
                            else
                            {
                                Console.WriteLine("  now advance to known-exists target=" + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID());
                            }
                        }
                        int docID = docsEnum.Advance(targetDocID != -1 ? targetDocID : expected.DocID());
                        Assert.AreEqual(expected.DocID(), docID, "docID is wrong");
                    }
                }
                else
                {
                    expected.NextDoc();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  now nextDoc to " + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ")");
                    }
                    int docID = docsEnum.NextDoc();
                    Assert.AreEqual(expected.DocID(), docID, "docID is wrong");
                    if (docID == DocsEnum.NO_MORE_DOCS)
                    {
                        break;
                    }
                }

                if (doCheckFreqs && Random().NextDouble() <= freqAskChance)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("    now freq()=" + expected.Freq());
                    }
                    int freq = docsEnum.Freq();
                    Assert.AreEqual(expected.Freq(), freq, "freq is wrong");
                }

                if (doCheckPositions)
                {
                    int freq = docsEnum.Freq();
                    int numPosToConsume;
                    if (!alwaysTestMax && options.Contains(Option.PARTIAL_POS_CONSUME) && Random().Next(5) == 1)
                    {
                        numPosToConsume = Random().Next(freq);
                    }
                    else
                    {
                        numPosToConsume = freq;
                    }

                    for (int i = 0; i < numPosToConsume; i++)
                    {
                        int pos = expected.NextPosition();
                        if (VERBOSE)
                        {
                            Console.WriteLine("    now nextPosition to " + pos);
                        }
                        Assert.AreEqual(pos, docsAndPositionsEnum.NextPosition(), "position is wrong");

                        if (doCheckPayloads)
                        {
                            BytesRef expectedPayload = expected.Payload;
                            if (Random().NextDouble() <= payloadCheckChance)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.Length));
                                }
                                if (expectedPayload == null || expectedPayload.Length == 0)
                                {
                                    Assert.IsNull(docsAndPositionsEnum.Payload, "should not have payload");
                                }
                                else
                                {
                                    BytesRef payload = docsAndPositionsEnum.Payload;
                                    Assert.IsNotNull(payload, "should have payload but doesn't");

                                    Assert.AreEqual(expectedPayload.Length, payload.Length, "payload length is wrong");
                                    for (int byteUpto = 0; byteUpto < expectedPayload.Length; byteUpto++)
                                    {
                                        Assert.AreEqual(expectedPayload.Bytes[expectedPayload.Offset + byteUpto], payload.Bytes[payload.Offset + byteUpto], "payload bytes are wrong");
                                    }

                                    // make a deep copy
                                    payload = BytesRef.DeepCopyOf(payload);
                                    Assert.AreEqual(payload, docsAndPositionsEnum.Payload, "2nd call to getPayload returns something different!");
                                }
                            }
                            else
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.Length));
                                }
                            }
                        }

                        if (doCheckOffsets)
                        {
                            if (Random().NextDouble() <= offsetCheckChance)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      now check offsets: startOff=" + expected.StartOffset() + " endOffset=" + expected.EndOffset());
                                }
                                Assert.AreEqual(expected.StartOffset(), docsAndPositionsEnum.StartOffset(), "startOffset is wrong");
                                Assert.AreEqual(expected.EndOffset(), docsAndPositionsEnum.EndOffset(), "endOffset is wrong");
                            }
                            else
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      skip check offsets");
                                }
                            }
                        }
                        else if (fieldInfo.FieldIndexOptions < FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("      now check offsets are -1");
                            }
                            Assert.AreEqual(-1, docsAndPositionsEnum.StartOffset(), "startOffset isn't -1");
                            Assert.AreEqual(-1, docsAndPositionsEnum.EndOffset(), "endOffset isn't -1");
                        }
                    }
                }
            }
        }
Ejemplo n.º 6
0
 public override DocsEnum Docs(Bits bits, DocsEnum reuse, int flags)
 {
     return(Tenum.Docs(bits, reuse, flags));
 }
Ejemplo n.º 7
0
 public override DocsEnum Docs(IBits bits, DocsEnum reuse, DocsFlags flags)
 {
     return(tenum.Docs(bits, reuse, flags));
 }
Ejemplo n.º 8
0
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));
            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
            Terms vector = reader.Get(0).GetTerms(testFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(testTerms.Length, vector.Count);
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(testTerms.Length, freqVector.Count);
            termsEnum = freqVector.GetEnumerator();
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
 protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
     TermsEnum termsEnum)
 {
     BytesRef spare = new BytesRef();
     DocsEnum docsEnum = null;
     for (int i = 0; i < outerInstance._terms.Size(); i++)
     {
         if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
         {
             docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
             float score = outerInstance._scores[outerInstance._ords[i]];
             for (int doc = docsEnum.NextDoc();
                 doc != NO_MORE_DOCS;
                 doc = docsEnum.NextDoc())
             {
                 matchingDocs.Set(doc);
                 // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
                 // can only happen in a many-to-many relation
                 scores[doc] = score;
             }
         }
     }
 }
Ejemplo n.º 10
0
 protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
     TermsEnum termsEnum)
 {
     BytesRef spare = new BytesRef();
     DocsEnum docsEnum = null;
     for (int i = 0; i < outerInstance._terms.Size(); i++)
     {
         if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
         {
             docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
             float score = outerInstance._scores[outerInstance._ords[i]];
             for (int doc = docsEnum.NextDoc();
                 doc != NO_MORE_DOCS;
                 doc = docsEnum.NextDoc())
             {
                 // I prefer this:
                 /*if (scores[doc] < score) {
                   scores[doc] = score;
                   matchingDocs.set(doc);
                 }*/
                 // But this behaves the same as MVInnerScorer and only then the tests will pass:
                 if (!matchingDocs.Get(doc))
                 {
                     scores[doc] = score;
                     matchingDocs.Set(doc);
                 }
             }
         }
     }
 }
Ejemplo n.º 11
0
        protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = new JCG.HashSet <string>(tk.terms).Count;

            Assert.AreEqual((long)termCount, terms.Count);      // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions);
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets);
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads);
            ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();

            foreach (string term in tk.freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparer);
            TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                Assert.AreEqual(sortedTerms[i], termsEnum.Term);
                Assert.AreEqual(1, termsEnum.DocFreq);

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID);
                Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions || terms.HasOffsets)
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq;
                    Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int        position = docsAndPositionsEnum.NextPosition();
                            ISet <int> indexes;
                            if (terms.HasPositions)
                            {
                                indexes = tk.positionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions)
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets)
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset)
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads)
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload()))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e) when(e.IsException())
                        {
                            // ok
                        }
                        catch (Exception e) when(e.IsAssertionError())
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsFalse(termsEnum.MoveNext());
            for (int i = 0; i < 5; ++i)
            {
                if (Random.NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
            }
        }