internal HighFrequencyIterator(HighFrequencyDictionary outerInstance)
 {
     this.outerInstance = outerInstance;
     Terms terms = MultiFields.GetTerms(outerInstance.reader, outerInstance.field);
     if (terms != null)
     {
         termsEnum = terms.Iterator(null);
     }
     else
     {
         termsEnum = null;
     }
     minNumDocs = (int)(outerInstance.thresh * (float)outerInstance.reader.NumDocs);
 }
Пример #2
0
        public static void VerifyEquals(Fields d1, Fields d2)
        {
            if (d1 == null)
            {
                Assert.IsTrue(d2 == null || d2.Count == 0);
                return;
            }
            Assert.IsTrue(d2 != null);

            IEnumerator <string> fieldsEnum2 = d2.GetEnumerator();

            foreach (string field1 in d1)
            {
                fieldsEnum2.MoveNext();
                string field2 = fieldsEnum2.Current;
                Assert.AreEqual(field1, field2);

                Terms terms1 = d1.GetTerms(field1);
                Assert.IsNotNull(terms1);
                TermsEnum termsEnum1 = terms1.GetEnumerator();

                Terms terms2 = d2.GetTerms(field2);
                Assert.IsNotNull(terms2);
                TermsEnum termsEnum2 = terms2.GetEnumerator();

                DocsAndPositionsEnum dpEnum1 = null;
                DocsAndPositionsEnum dpEnum2 = null;
                DocsEnum             dEnum1  = null;
                DocsEnum             dEnum2  = null;

                BytesRef term1;
                while (termsEnum1.MoveNext())
                {
                    term1 = termsEnum1.Term;
                    termsEnum2.MoveNext();
                    BytesRef term2 = termsEnum2.Term;
                    Assert.AreEqual(term1, term2);
                    Assert.AreEqual(termsEnum1.TotalTermFreq, termsEnum2.TotalTermFreq);

                    dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
                    dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
                    if (dpEnum1 != null)
                    {
                        Assert.IsNotNull(dpEnum2);
                        int docID1 = dpEnum1.NextDoc();
                        dpEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dpEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);

                        int freq1 = dpEnum1.Freq;
                        int freq2 = dpEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        IOffsetAttribute offsetAtt1 = dpEnum1.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes.GetAttribute <IOffsetAttribute>() : null;
                        IOffsetAttribute offsetAtt2 = dpEnum2.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes.GetAttribute <IOffsetAttribute>() : null;

                        if (offsetAtt1 != null)
                        {
                            Assert.IsNotNull(offsetAtt2);
                        }
                        else
                        {
                            Assert.IsNull(offsetAtt2);
                        }

                        for (int posUpto = 0; posUpto < freq1; posUpto++)
                        {
                            int pos1 = dpEnum1.NextPosition();
                            int pos2 = dpEnum2.NextPosition();
                            Assert.AreEqual(pos1, pos2);
                            if (offsetAtt1 != null)
                            {
                                Assert.AreEqual(offsetAtt1.StartOffset, offsetAtt2.StartOffset);
                                Assert.AreEqual(offsetAtt1.EndOffset, offsetAtt2.EndOffset);
                            }
                        }
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
                    }
                    else
                    {
                        dEnum1 = TestUtil.Docs(Random, termsEnum1, null, dEnum1, DocsFlags.FREQS);
                        dEnum2 = TestUtil.Docs(Random, termsEnum2, null, dEnum2, DocsFlags.FREQS);
                        Assert.IsNotNull(dEnum1);
                        Assert.IsNotNull(dEnum2);
                        int docID1 = dEnum1.NextDoc();
                        dEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
                        int freq1 = dEnum1.Freq;
                        int freq2 = dEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());
                    }
                }

                Assert.IsFalse(termsEnum2.MoveNext());
            }
            Assert.IsFalse(fieldsEnum2.MoveNext());
        }
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef term;
            Bits randomBits = new RandomBits(MAXDOC, Random().NextDouble(), Random());
            DocsAndPositionsEnum leftPositions = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum leftDocs = null;
            DocsEnum rightDocs = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next());
                AssertTermStats(leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    // with payloads + off
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));
                    // with payloads only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    // with offsets only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    // with positions only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));
                }
            }
            Assert.IsNull(rightTermsEnum.Next());
        }
        private void VerifyEnum(ThreadState threadState, string field, BytesRef term, TermsEnum termsEnum, FieldInfo.IndexOptions maxTestOptions, FieldInfo.IndexOptions maxIndexOptions, ISet<Option> options, bool alwaysTestMax)
        // Maximum options (docs/freqs/positions/offsets) to test:
        {
            if (VERBOSE)
            {
                Console.WriteLine("  verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions);
            }

            // Make sure TermsEnum really is positioned on the
            // expected term:
            Assert.AreEqual(term, termsEnum.Term());

            // 50% of the time time pass liveDocs:
            bool useLiveDocs = options.Contains(Option.LIVE_DOCS) && Random().NextBoolean();
            Bits liveDocs;
            if (useLiveDocs)
            {
                liveDocs = GlobalLiveDocs;
                if (VERBOSE)
                {
                    Console.WriteLine("  use liveDocs");
                }
            }
            else
            {
                liveDocs = null;
                if (VERBOSE)
                {
                    Console.WriteLine("  no liveDocs");
                }
            }

            FieldInfo fieldInfo = CurrentFieldInfos.FieldInfo(field);

            // NOTE: can be empty list if we are using liveDocs:
            SeedPostings expected = GetSeedPostings(term.Utf8ToString(), Fields[field][term], useLiveDocs, maxIndexOptions);
            Assert.AreEqual(expected.DocFreq, termsEnum.DocFreq());

            bool allowFreqs = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0;
            bool doCheckFreqs = allowFreqs && (alwaysTestMax || Random().Next(3) <= 2);

            bool allowPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
            bool doCheckPositions = allowPositions && (alwaysTestMax || Random().Next(3) <= 2);

            bool allowOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS && maxTestOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
            bool doCheckOffsets = allowOffsets && (alwaysTestMax || Random().Next(3) <= 2);

            bool doCheckPayloads = options.Contains(Option.PAYLOADS) && allowPositions && fieldInfo.HasPayloads() && (alwaysTestMax || Random().Next(3) <= 2);

            DocsEnum prevDocsEnum = null;

            DocsEnum docsEnum;
            DocsAndPositionsEnum docsAndPositionsEnum;

            if (!doCheckPositions)
            {
                if (allowPositions && Random().Next(10) == 7)
                {
                    // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum

                    if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                    {
                        prevDocsEnum = threadState.ReuseDocsAndPositionsEnum;
                    }

                    int flags = 0;
                    if (alwaysTestMax || Random().NextBoolean())
                    {
                        flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
                    }
                    if (alwaysTestMax || Random().NextBoolean())
                    {
                        flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
                    }

                    if (VERBOSE)
                    {
                        Console.WriteLine("  get DocsAndPositionsEnum (but we won't check positions) flags=" + flags);
                    }

                    threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags);
                    docsEnum = threadState.ReuseDocsAndPositionsEnum;
                    docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum;
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  get DocsEnum");
                    }
                    if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                    {
                        prevDocsEnum = threadState.ReuseDocsEnum;
                    }
                    threadState.ReuseDocsEnum = termsEnum.Docs(liveDocs, prevDocsEnum, doCheckFreqs ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    docsEnum = threadState.ReuseDocsEnum;
                    docsAndPositionsEnum = null;
                }
            }
            else
            {
                if (options.Contains(Option.REUSE_ENUMS) && Random().Next(10) < 9)
                {
                    prevDocsEnum = threadState.ReuseDocsAndPositionsEnum;
                }

                int flags = 0;
                if (alwaysTestMax || doCheckOffsets || Random().Next(3) == 1)
                {
                    flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
                }
                if (alwaysTestMax || doCheckPayloads || Random().Next(3) == 1)
                {
                    flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  get DocsAndPositionsEnum flags=" + flags);
                }

                threadState.ReuseDocsAndPositionsEnum = termsEnum.DocsAndPositions(liveDocs, (DocsAndPositionsEnum)prevDocsEnum, flags);
                docsEnum = threadState.ReuseDocsAndPositionsEnum;
                docsAndPositionsEnum = threadState.ReuseDocsAndPositionsEnum;
            }

            Assert.IsNotNull(docsEnum, "null DocsEnum");
            int initialDocID = docsEnum.DocID();
            Assert.AreEqual(-1, initialDocID, "inital docID should be -1" + docsEnum);

            if (VERBOSE)
            {
                if (prevDocsEnum == null)
                {
                    Console.WriteLine("  got enum=" + docsEnum);
                }
                else if (prevDocsEnum == docsEnum)
                {
                    Console.WriteLine("  got reuse enum=" + docsEnum);
                }
                else
                {
                    Console.WriteLine("  got enum=" + docsEnum + " (reuse of " + prevDocsEnum + " failed)");
                }
            }

            // 10% of the time don't consume all docs:
            int stopAt;
            if (!alwaysTestMax && options.Contains(Option.PARTIAL_DOC_CONSUME) && expected.DocFreq > 1 && Random().Next(10) == 7)
            {
                stopAt = Random().Next(expected.DocFreq - 1);
                if (VERBOSE)
                {
                    Console.WriteLine("  will not consume all docs (" + stopAt + " vs " + expected.DocFreq + ")");
                }
            }
            else
            {
                stopAt = expected.DocFreq;
                if (VERBOSE)
                {
                    Console.WriteLine("  consume all docs");
                }
            }

            double skipChance = alwaysTestMax ? 0.5 : Random().NextDouble();
            int numSkips = expected.DocFreq < 3 ? 1 : TestUtil.NextInt(Random(), 1, Math.Min(20, expected.DocFreq / 3));
            int skipInc = expected.DocFreq / numSkips;
            int skipDocInc = MaxDoc / numSkips;

            // Sometimes do 100% skipping:
            bool doAllSkipping = options.Contains(Option.SKIPPING) && Random().Next(7) == 1;

            double freqAskChance = alwaysTestMax ? 1.0 : Random().NextDouble();
            double payloadCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble();
            double offsetCheckChance = alwaysTestMax ? 1.0 : Random().NextDouble();

            if (VERBOSE)
            {
                if (options.Contains(Option.SKIPPING))
                {
                    Console.WriteLine("  skipChance=" + skipChance + " numSkips=" + numSkips);
                }
                else
                {
                    Console.WriteLine("  no skipping");
                }
                if (doCheckFreqs)
                {
                    Console.WriteLine("  freqAskChance=" + freqAskChance);
                }
                if (doCheckPayloads)
                {
                    Console.WriteLine("  payloadCheckChance=" + payloadCheckChance);
                }
                if (doCheckOffsets)
                {
                    Console.WriteLine("  offsetCheckChance=" + offsetCheckChance);
                }
            }

            while (expected.Upto <= stopAt)
            {
                if (expected.Upto == stopAt)
                {
                    if (stopAt == expected.DocFreq)
                    {
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc(), "DocsEnum should have ended but didn't");

                        // Common bug is to forget to set this.Doc=NO_MORE_DOCS in the enum!:
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.DocID(), "DocsEnum should have ended but didn't");
                    }
                    break;
                }

                if (options.Contains(Option.SKIPPING) && (doAllSkipping || Random().NextDouble() <= skipChance))
                {
                    int targetDocID = -1;
                    if (expected.Upto < stopAt && Random().NextBoolean())
                    {
                        // Pick target we know exists:
                        int skipCount = TestUtil.NextInt(Random(), 1, skipInc);
                        for (int skip = 0; skip < skipCount; skip++)
                        {
                            if (expected.NextDoc() == DocsEnum.NO_MORE_DOCS)
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        // Pick random target (might not exist):
                        int skipDocIDs = TestUtil.NextInt(Random(), 1, skipDocInc);
                        if (skipDocIDs > 0)
                        {
                            targetDocID = expected.DocID() + skipDocIDs;
                            expected.Advance(targetDocID);
                        }
                    }

                    if (expected.Upto >= stopAt)
                    {
                        int target = Random().NextBoolean() ? MaxDoc : DocsEnum.NO_MORE_DOCS;
                        if (VERBOSE)
                        {
                            Console.WriteLine("  now advance to end (target=" + target + ")");
                        }
                        Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.Advance(target), "DocsEnum should have ended but didn't");
                        break;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            if (targetDocID != -1)
                            {
                                Console.WriteLine("  now advance to random target=" + targetDocID + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID());
                            }
                            else
                            {
                                Console.WriteLine("  now advance to known-exists target=" + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ") current=" + docsEnum.DocID());
                            }
                        }
                        int docID = docsEnum.Advance(targetDocID != -1 ? targetDocID : expected.DocID());
                        Assert.AreEqual(expected.DocID(), docID, "docID is wrong");
                    }
                }
                else
                {
                    expected.NextDoc();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  now nextDoc to " + expected.DocID() + " (" + expected.Upto + " of " + stopAt + ")");
                    }
                    int docID = docsEnum.NextDoc();
                    Assert.AreEqual(expected.DocID(), docID, "docID is wrong");
                    if (docID == DocsEnum.NO_MORE_DOCS)
                    {
                        break;
                    }
                }

                if (doCheckFreqs && Random().NextDouble() <= freqAskChance)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("    now freq()=" + expected.Freq());
                    }
                    int freq = docsEnum.Freq();
                    Assert.AreEqual(expected.Freq(), freq, "freq is wrong");
                }

                if (doCheckPositions)
                {
                    int freq = docsEnum.Freq();
                    int numPosToConsume;
                    if (!alwaysTestMax && options.Contains(Option.PARTIAL_POS_CONSUME) && Random().Next(5) == 1)
                    {
                        numPosToConsume = Random().Next(freq);
                    }
                    else
                    {
                        numPosToConsume = freq;
                    }

                    for (int i = 0; i < numPosToConsume; i++)
                    {
                        int pos = expected.NextPosition();
                        if (VERBOSE)
                        {
                            Console.WriteLine("    now nextPosition to " + pos);
                        }
                        Assert.AreEqual(pos, docsAndPositionsEnum.NextPosition(), "position is wrong");

                        if (doCheckPayloads)
                        {
                            BytesRef expectedPayload = expected.Payload;
                            if (Random().NextDouble() <= payloadCheckChance)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.Length));
                                }
                                if (expectedPayload == null || expectedPayload.Length == 0)
                                {
                                    Assert.IsNull(docsAndPositionsEnum.Payload, "should not have payload");
                                }
                                else
                                {
                                    BytesRef payload = docsAndPositionsEnum.Payload;
                                    Assert.IsNotNull(payload, "should have payload but doesn't");

                                    Assert.AreEqual(expectedPayload.Length, payload.Length, "payload length is wrong");
                                    for (int byteUpto = 0; byteUpto < expectedPayload.Length; byteUpto++)
                                    {
                                        Assert.AreEqual(expectedPayload.Bytes[expectedPayload.Offset + byteUpto], payload.Bytes[payload.Offset + byteUpto], "payload bytes are wrong");
                                    }

                                    // make a deep copy
                                    payload = BytesRef.DeepCopyOf(payload);
                                    Assert.AreEqual(payload, docsAndPositionsEnum.Payload, "2nd call to getPayload returns something different!");
                                }
                            }
                            else
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.Length));
                                }
                            }
                        }

                        if (doCheckOffsets)
                        {
                            if (Random().NextDouble() <= offsetCheckChance)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      now check offsets: startOff=" + expected.StartOffset() + " endOffset=" + expected.EndOffset());
                                }
                                Assert.AreEqual(expected.StartOffset(), docsAndPositionsEnum.StartOffset(), "startOffset is wrong");
                                Assert.AreEqual(expected.EndOffset(), docsAndPositionsEnum.EndOffset(), "endOffset is wrong");
                            }
                            else
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("      skip check offsets");
                                }
                            }
                        }
                        else if (fieldInfo.FieldIndexOptions < FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("      now check offsets are -1");
                            }
                            Assert.AreEqual(-1, docsAndPositionsEnum.StartOffset(), "startOffset isn't -1");
                            Assert.AreEqual(-1, docsAndPositionsEnum.EndOffset(), "endOffset isn't -1");
                        }
                    }
                }
            }
        }
Пример #5
0
 public AssertingTermsEnum(TermsEnum @in)
     : base(@in)
 {
 }
Пример #6
0
            internal long Ord_Renamed; // force "real" seek

            public OrdWrappedTermsEnum(DocTermOrds outerInstance, AtomicReader reader)
            {
                this.OuterInstance = outerInstance;

                if (!InstanceFieldsInitialized)
                {
                    InitializeInstanceFields();
                    InstanceFieldsInitialized = true;
                }
                Debug.Assert(outerInstance.IndexedTermsArray != null);
                TermsEnum = reader.Fields.Terms(outerInstance.Field).Iterator(null);
            }
 public override TermsEnum Iterator(TermsEnum reuse)
 {
     TVTermsEnum termsEnum;
     if (reuse != null && reuse is TVTermsEnum)
     {
         termsEnum = (TVTermsEnum)reuse;
     }
     else
     {
         termsEnum = new TVTermsEnum();
     }
     termsEnum.Reset(NumTerms, Flags, PrefixLengths, SuffixLengths, TermFreqs, PositionIndex, Positions, StartOffsets, Lengths, PayloadIndex, PayloadBytes, new ByteArrayDataInput((byte[])(Array)TermBytes.Bytes, TermBytes.Offset, TermBytes.Length));
     return termsEnum;
 }
Пример #8
0
				/// <exception cref="System.IO.IOException"></exception>
				internal SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd
					, int endFacetOrd) : base(counts, total - counts[0], counts[0], endFacetOrd + 1)
				{
					this.tenum = tenum;
					this.mergePos = startFacetOrd == -1 ? 1 : startFacetOrd + 1;
					if (mergePos < maxTermPos)
					{
						tenum != null.SeekExact(startFacetOrd == -1 ? 0 : startFacetOrd);
						mergeTerm = tenum.Term();
					}
				}
Пример #9
0
            public virtual void _run()
            {
                for (int iter = 0; iter < NUM_TEST_ITER; iter++)
                {
                    FieldData field     = Fields[Random().Next(Fields.Length)];
                    TermsEnum termsEnum = TermsDict.GetTerms(field.FieldInfo.Name).GetIterator(null);
#pragma warning disable 612, 618
                    if (Si.Codec is Lucene3xCodec)
#pragma warning restore 612, 618
                    {
                        // code below expects unicode sort order
                        continue;
                    }

                    int upto = 0;
                    // Test straight enum of the terms:
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        BytesRef expected = new BytesRef(field.Terms[upto++].Text2);
                        Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term);
                    }
                    Assert.AreEqual(upto, field.Terms.Length);

                    // Test random seek:
                    TermData             term2  = field.Terms[Random().Next(field.Terms.Length)];
                    TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.Text2));
                    Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                    Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq);
                    if (field.OmitTF)
                    {
                        this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsFlags.NONE), false);
                    }
                    else
                    {
                        this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true);
                    }

                    // Test random seek by ord:
                    int idx = Random().Next(field.Terms.Length);
                    term2 = field.Terms[idx];
                    bool success = false;
                    try
                    {
                        termsEnum.SeekExact(idx);
                        success = true;
                    }
#pragma warning disable 168
                    catch (System.NotSupportedException uoe)
#pragma warning restore 168
                    {
                        // ok -- skip it
                    }
                    if (success)
                    {
                        Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                        Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(term2.Text2)));
                        Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq);
                        if (field.OmitTF)
                        {
                            this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsFlags.NONE), false);
                        }
                        else
                        {
                            this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true);
                        }
                    }

                    // Test seek to non-existent terms:
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek non-exist terms");
                    }
                    for (int i = 0; i < 100; i++)
                    {
                        string text2 = TestUtil.RandomUnicodeString(Random()) + ".";
                        status = termsEnum.SeekCeil(new BytesRef(text2));
                        Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
                    }

                    // Seek to each term, backwards:
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek terms backwards");
                    }
                    for (int i = field.Terms.Length - 1; i >= 0; i--)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.Terms[i].Text2)), Thread.CurrentThread.Name + ": field=" + field.FieldInfo.Name + " term=" + field.Terms[i].Text2);
                        Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq);
                    }

                    // Seek to each term by ord, backwards
                    for (int i = field.Terms.Length - 1; i >= 0; i--)
                    {
                        try
                        {
                            termsEnum.SeekExact(i);
                            Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq);
                            Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.Terms[i].Text2)));
                        }
#pragma warning disable 168
                        catch (System.NotSupportedException uoe)
#pragma warning restore 168
                        {
                        }
                    }

                    // Seek to non-existent empty-string term
                    status = termsEnum.SeekCeil(new BytesRef(""));
                    Assert.IsNotNull(status);
                    //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);

                    // Make sure we're now pointing to first term
                    Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.Terms[0].Text2)));

                    // Test docs enum
                    termsEnum.SeekCeil(new BytesRef(""));
                    upto = 0;
                    do
                    {
                        term2 = field.Terms[upto];
                        if (Random().Next(3) == 1)
                        {
                            DocsEnum             docs;
                            DocsEnum             docsAndFreqs;
                            DocsAndPositionsEnum postings;
                            if (!field.OmitTF)
                            {
                                postings = termsEnum.DocsAndPositions(null, null);
                                if (postings != null)
                                {
                                    docs = docsAndFreqs = postings;
                                }
                                else
                                {
                                    docs = docsAndFreqs = TestUtil.Docs(Random(), termsEnum, null, null, DocsFlags.FREQS);
                                }
                            }
                            else
                            {
                                postings     = null;
                                docsAndFreqs = null;
                                docs         = TestUtil.Docs(Random(), termsEnum, null, null, DocsFlags.NONE);
                            }
                            Assert.IsNotNull(docs);
                            int  upto2 = -1;
                            bool ended = false;
                            while (upto2 < term2.Docs.Length - 1)
                            {
                                // Maybe skip:
                                int left = term2.Docs.Length - upto2;
                                int doc;
                                if (Random().Next(3) == 1 && left >= 1)
                                {
                                    int inc = 1 + Random().Next(left - 1);
                                    upto2 += inc;
                                    if (Random().Next(2) == 1)
                                    {
                                        doc = docs.Advance(term2.Docs[upto2]);
                                        Assert.AreEqual(term2.Docs[upto2], doc);
                                    }
                                    else
                                    {
                                        doc = docs.Advance(1 + term2.Docs[upto2]);
                                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                        {
                                            // skipped past last doc
                                            Debug.Assert(upto2 == term2.Docs.Length - 1);
                                            ended = true;
                                            break;
                                        }
                                        else
                                        {
                                            // skipped to next doc
                                            Debug.Assert(upto2 < term2.Docs.Length - 1);
                                            if (doc >= term2.Docs[1 + upto2])
                                            {
                                                upto2++;
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    doc = docs.NextDoc();
                                    Assert.IsTrue(doc != -1);
                                    upto2++;
                                }
                                Assert.AreEqual(term2.Docs[upto2], doc);
                                if (!field.OmitTF)
                                {
                                    Assert.AreEqual(term2.Positions[upto2].Length, postings.Freq);
                                    if (Random().Next(2) == 1)
                                    {
                                        this.VerifyPositions(term2.Positions[upto2], postings);
                                    }
                                }
                            }

                            if (!ended)
                            {
                                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc());
                            }
                        }
                        upto++;
                    } while (termsEnum.Next() != null);

                    Assert.AreEqual(upto, field.Terms.Length);
                }
            }
Пример #10
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        BytesRef term = termsEnum.Next();
                        Assert.IsNotNull(term);
                        Assert.AreEqual(terms[i].Text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random(), termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].Docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsNull(termsEnum.Next());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].Text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
Пример #11
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }

            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
            {
                Assert.AreEqual(numTerms - 1, terms.Size());
            }
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  term_;

            while ((term_ = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(term_.Utf8ToString());
                Assert.AreEqual(value, termsEnum.DocFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #12
0
 public TestTermsEnum(TermsEnum @in)
     : base(@in)
 {
 }
Пример #13
0
 public override TermsEnum GetIterator(TermsEnum reuse)
 {
     return(new TestTermsEnum(base.GetIterator(reuse)));
 }
 public override void Run()
 {
     if (VERBOSE)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.CurrentSearcher;
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     string source;
                     diagnostics.TryGetValue("source", out source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    !outerInstance.assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetIterator(null);
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount.Get() < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount.Get() / 30;
                         shift   = Random().Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         BytesRef term = termsEnum.Next();
                         if (term == null)
                         {
                             totTermCount.Set(seenTermCount);
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.failed.Set(true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }
Пример #15
0
        public virtual void TestArbitraryFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int NUM_DOCS = AtLeast(27);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + NUM_DOCS + " docs");
            }
            int[] fieldsPerDoc = new int[NUM_DOCS];
            int   baseCount    = 0;

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int fieldCount = TestUtil.NextInt(Random(), 1, 17);
                fieldsPerDoc[docCount] = fieldCount - 1;

                int finalDocCount = docCount;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount);
                }

                int finalBaseCount = baseCount;
                baseCount += fieldCount - 1;

                w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount));
            }

            IndexReader r = w.Reader;

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           counter = 0;

            for (int id = 0; id < NUM_DOCS; id++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
                }
                TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1);
                Assert.AreEqual(1, hits.TotalHits);
                int      docID      = hits.ScoreDocs[0].Doc;
                Document doc        = s.Doc(docID);
                int      endCounter = counter + fieldsPerDoc[id];
                while (counter < endCounter)
                {
                    string name    = "f" + counter;
                    int    fieldID = counter % 10;

                    bool stored  = (counter & 1) == 0 || fieldID == 3;
                    bool binary  = fieldID == 3;
                    bool indexed = fieldID != 3;

                    string stringValue;
                    if (fieldID != 3 && fieldID != 9)
                    {
                        stringValue = "text " + counter;
                    }
                    else
                    {
                        stringValue = null;
                    }

                    // stored:
                    if (stored)
                    {
                        IIndexableField f = doc.GetField(name);
                        Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                        if (binary)
                        {
                            Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                            BytesRef b = f.GetBinaryValue();
                            Assert.IsNotNull(b);
                            Assert.AreEqual(10, b.Length);
                            for (int idx = 0; idx < 10; idx++)
                            {
                                Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]);
                            }
                        }
                        else
                        {
                            Debug.Assert(stringValue != null);
                            Assert.AreEqual(stringValue, f.GetStringValue());
                        }
                    }

                    if (indexed)
                    {
                        bool tv = counter % 2 == 1 && fieldID != 9;
                        if (tv)
                        {
                            Terms tfv = r.GetTermVectors(docID).GetTerms(name);
                            Assert.IsNotNull(tfv);
                            TermsEnum termsEnum = tfv.GetIterator(null);
                            Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(1, dpEnum.NextPosition());

                            Assert.AreEqual(new BytesRef("text"), termsEnum.Next());
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(0, dpEnum.NextPosition());

                            Assert.IsNull(termsEnum.Next());

                            // TODO: offsets
                        }
                        else
                        {
                            Fields vectors = r.GetTermVectors(docID);
                            Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null);
                        }

                        BooleanQuery bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST);
                        TopDocs hits2 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits2.TotalHits);
                        Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc);

                        bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST);
                        TopDocs hits3 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits3.TotalHits);
                        Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc);
                    }

                    counter++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
 public override TermsEnum Iterator(TermsEnum reuse)
 {
     return DocTermOrds.TermsEnum();
 }
Пример #17
0
 /// <summary>
 /// Creates a filtered <seealso cref="TermsEnum"/> on a terms enum. </summary>
 /// <param name="tenum"> the terms enumeration to filter. </param>
 public FilteredTermsEnum(TermsEnum tenum)
     : this(tenum, true)
 {
 }
Пример #18
0
        public virtual void TestMerge()
        {
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);

            SegmentMerger merger = new SegmentMerger(new List <AtomicReader> {
                Reader1, Reader2
            }, si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random), true);
            MergeState mergeState = merger.Merge();
            int        docsMerged = mergeState.SegmentInfo.DocCount;

            Assert.IsTrue(docsMerged == 2);
            //Should be able to open a new SegmentReader against the new directory
            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random));

            Assert.IsTrue(mergedReader != null);
            Assert.IsTrue(mergedReader.NumDocs == 2);
            Document newDoc1 = mergedReader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            //There are 2 unstored fields on the document
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = mergedReader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);

            DocsEnum termDocs = TestUtil.Docs(Random, mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);

            Assert.IsTrue(termDocs != null);
            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            int tvCount = 0;

            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
            {
                if (fieldInfo.HasVectors)
                {
                    tvCount++;
                }
            }

            //System.out.println("stored size: " + stored.Size());
            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");

            Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(vector);
            Assert.AreEqual(3, vector.Count);
            TermsEnum termsEnum = vector.GetIterator(null);

            int i = 0;

            while (termsEnum.Next() != null)
            {
                string term = termsEnum.Term.Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq;
                //System.out.println("Term: " + term + " Freq: " + freq);
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1);
                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
                i++;
            }

            TestSegmentReader.CheckNorms(mergedReader);
            mergedReader.Dispose();
        }
Пример #19
0
				/// <exception cref="System.IO.IOException"></exception>
				internal SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum 
					tenum, int startFacetOrd, int endFacetOrd) : base(counts, total - counts[missingCountIndex
					], counts[missingCountIndex], endFacetOrd == missingCountIndex + 1 ? missingCountIndex
					 : endFacetOrd)
				{
					this.tenum = tenum;
					this.mergePos = startFacetOrd;
					if (tenum != null)
					{
						tenum.SeekExact(mergePos);
						mergeTerm = tenum.Term();
					}
				}
Пример #20
0
        public virtual void TestStressAdvance_Mem()
        {
            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory          dir   = NewDirectory();
                RandomIndexWriter  w     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
                HashSet <int>      aDocs = new HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field idField = NewStringField("id", "", Field.Store.YES);
                doc.Add(idField);
                int num = AtLeast(4097);
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: numDocs=" + num);
                }
                for (int id = 0; id < num; id++)
                {
                    if (Random().Next(4) == 3)
                    {
                        f.SetStringValue("a");
                        aDocs.Add(id);
                    }
                    else
                    {
                        f.SetStringValue("b");
                    }
                    idField.SetStringValue("" + id);
                    w.AddDocument(doc);
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: doc upto " + id);
                    }
                }

                w.ForceMerge(1);

                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.Reader;
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                {
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                    {
                        aDocIDs.Add(docID);
                    }
                    else
                    {
                        bDocIDs.Add(docID);
                    }
                }
                TermsEnum te = GetOnlySegmentReader(r).Fields.GetTerms("field").GetIterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    }
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsFlags.NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsFlags.NONE);
                    TestOne(de, bDocIDs);
                }

                w.Dispose();
                r.Dispose();
                dir.Dispose();
            }
        }
Пример #21
0
 // sugar
 private bool SeekExact(TermsEnum te, string term)
 {
     return te.SeekExact(new BytesRef(term));
 }
Пример #22
0
        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random));
            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
            Terms vector = reader.Get(0).GetTerms(TestFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(TestTerms.Length, vector.Count);
            TermsEnum            termsEnum = vector.GetIterator(null);
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
                for (int j = 0; j < Positions[i].Length; j++)
                {
                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).GetTerms(TestFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(TestTerms.Length, freqVector.Count);
            termsEnum = freqVector.GetIterator(null);
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < TestTerms.Length; i++)
            {
                BytesRef text = termsEnum.Next();
                Assert.IsNotNull(text);
                string term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(TestTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }
Пример #23
0
 internal Iterator(DocTermOrds outerInstance, AtomicReader reader)
 {
     this.OuterInstance = outerInstance;
     this.Reader = reader;
     this.Te = TermsEnum();
 }
Пример #24
0
 public AssertingTermsEnum(TermsEnum @in)
     : base(@in)
 {
 }
Пример #25
0
 public IntDocValuesAnonymousInnerClassHelper(JoinDocFreqValueSource outerInstance, JoinDocFreqValueSource @this, BinaryDocValues terms, TermsEnum termsEnum)
     : base(@this)
 {
     this.outerInstance = outerInstance;
     this.terms = terms;
     this.termsEnum = termsEnum;
     @ref = new BytesRef();
 }
Пример #26
0
        public virtual void TestRandom()
        {
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = new Document();
                doc.Add(new Int32Field("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                {
                    string text;
                    if (Random.NextBoolean())
                    {
                        text = "a";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "b";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "c";
                    }
                    else
                    {
                        text = "d";
                    }

                    int posIncr = Random.NextBoolean() ? 1 : Random.Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                    {
                        posIncr = 1;
                    }
                    int offIncr     = Random.NextBoolean() ? 0 : Random.Next(5);
                    int tokenOffset = Random.Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.TryGetValue(text, out IDictionary <int?, IList <Token> > postingsByDoc))
                    {
                        actualTokens[text] = postingsByDoc = new Dictionary <int?, IList <Token> >();
                    }
                    if (!postingsByDoc.TryGetValue(docCount, out IList <Token> postings))
                    {
                        postingsByDoc[docCount] = postings = new List <Token>();
                    }
                    postings.Add(token);
                    tokens.Add(token);
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                }
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.GetTerms("content").GetIterator(null);
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Int32s    docIDToID                  = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                foreach (string term in terms)
                {
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                    {
                        docs = termsEnum.Docs(null, docs);
                        Assert.IsNotNull(docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docs.Freq);
                        }

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS);
                        Assert.IsNotNull(docsAndPositions);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
                            }
                        }

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        Assert.IsNotNull(docsAndPositionsAndOffsets);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
                                Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
                            }
                        }
                    }
                }
                // TODO: test advance:
            }
            r.Dispose();
            dir.Dispose();
        }
Пример #27
0
 public override TermsEnum Iterator(TermsEnum reuse)
 {
     return @in.Iterator(reuse);
 }
Пример #28
0
        public abstract TermsEnum GetEnumerator(); // LUCENENET specific - Refactored to require both overloads, so we don't have a strange null parameter unless needed

        /// <summary>
        /// Returns an iterator that will step through all
        /// terms. This method will not return <c>null</c>.
        /// </summary>
        /// <param name="reuse">If you have a previous <see cref="TermsEnum"/>,
        /// for example from a different field, you can pass it for possible
        /// reuse if the implementation can do so.</param>
        public virtual TermsEnum GetEnumerator(TermsEnum reuse) => GetEnumerator(); // LUCENENET specific - Refactored to require both overloads, so we don't have a strange null parameter unless needed
Пример #29
0
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public void AssertTermsEnumEquals(string info, IndexReader leftReader, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef term;
            Bits randomBits = new RandomBits(leftReader.MaxDoc, Random().NextDouble(), Random());
            DocsAndPositionsEnum leftPositions = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum leftDocs = null;
            DocsEnum rightDocs = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next(), info);
                AssertTermStatsEquals(info, leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    // with freqs:
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true);
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true);

                    // w/o freqs:
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false);
                    AssertDocsEnumEquals(info, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false);

                    // with freqs:
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs), true);
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs), true);

                    // w/o freqs:
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE), false);
                    AssertDocsSkippingEquals(info, leftReader, leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE), false);
                }
            }
            Assert.IsNull(rightTermsEnum.Next(), info);
        }
Пример #30
0
 public virtual TermsEnum GetIterator(TermsEnum reuse) => GetEnumerator(reuse);
Пример #31
0
 public TestTermsEnum(TermsEnum @in)
     : base(@in)
 {
 }
Пример #32
0
 /// <summary>
 /// Subclass can override this </summary>
 protected internal virtual void VisitTerm(TermsEnum te, int termNum)
 {
 }
Пример #33
0
 /// <summary>
 /// Returns an iterator that will step through all
 ///  terms. this method will not return null.  If you have
 ///  a previous TermsEnum, for example from a different
 ///  field, you can pass it for possible reuse if the
 ///  implementation can do so.
 /// </summary>
 public abstract TermsEnum Iterator(TermsEnum reuse);
Пример #34
0
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected internal virtual void Uninvert(AtomicReader reader, Bits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(Field);

            if (info != null && info.HasDocValues())
            {
                throw new InvalidOperationException("Type mismatch: " + Field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = DateTime.Now.Millisecond;

            Prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.Terms(Field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.Iterator(null);
            BytesRef  seekStart = termPrefix != null ? termPrefix : new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            DocsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term();
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        OrdBase = (int)te.Ord();
                        //System.out.println("got ordBase=" + ordBase);
                    }
                    catch (System.NotSupportedException uoe)
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & IndexIntervalMask) == 0)
                {
                    // Index this term
                    SizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq();
                if (df <= MaxTermDocFreq)
                {
                    DocsEnum = te.Docs(liveDocs, DocsEnum, DocsEnum.FLAG_NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = DocsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        TermInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = (int)((uint)val >> 8);
                            int ilen   = VIntSize(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = (int)((uint)val >> 8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (te.Next() == null)
                {
                    break;
                }
            }

            NumTermsInField = termNum;

            long midPoint = DateTime.Now.Millisecond;

            if (TermInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                Tnums = null;
            }
            else
            {
                this.Index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = Tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = (int)((uint)val >> 8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + Field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;
                                    /// <summary>
                                    ///* we don't have to worry about the array getting too large
                                    /// since the "pos" param will overflow first (only 24 bits available)
                                    /// if ((newlen<<1) <= 0) {
                                    ///  // overflow...
                                    ///  newlen = Integer.MAX_VALUE;
                                    ///  if (newlen <= pos + len) {
                                    ///    throw new SolrException(400,"Too many terms to uninvert field!");
                                    ///  }
                                    /// } else {
                                    ///  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    /// }
                                    /// ***
                                    /// </summary>
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    Tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                IndexedTermsArray = indexedTerms.ToArray();
            }

            long endTime = DateTime.Now.Millisecond;

            Total_time  = (int)(endTime - startTime);
            Phase1_time = (int)(midPoint - startTime);
        }
Пример #35
0
 /// <summary>
 /// Creates a filtered <seealso cref="TermsEnum"/> on a terms enum. </summary>
 /// <param name="tenum"> the terms enumeration to filter. </param>
 public FilteredTermsEnum(TermsEnum tenum, bool startWithSeek)
 {
     Debug.Assert(tenum != null);
     this.Tenum = tenum;
     DoSeek = startWithSeek;
 }
Пример #36
0
 /// <summary>
 /// Returns the term (<seealso cref="BytesRef"/>) corresponding to
 ///  the provided ordinal.
 /// </summary>
 public virtual BytesRef LookupTerm(TermsEnum termsEnum, int ord)
 {
     termsEnum.SeekExact(ord);
     return(termsEnum.Term());
 }
Пример #37
0
			/// <exception cref="System.IO.IOException"></exception>
			public override void SetNextReader(AtomicReaderContext context)
			{
				if (segmentFacetCounts != null)
				{
					segmentResults.AddItem(((TermGroupFacetCollector.MV.SegmentResult)CreateSegmentResult
						()));
				}
				groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(((AtomicReader)context.Reader
					()), groupField);
				facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(((AtomicReader)context.
					Reader()), facetField);
				facetFieldNumTerms = (int)facetFieldDocTermOrds.GetValueCount();
				if (facetFieldNumTerms == 0)
				{
					facetOrdTermsEnum = null;
				}
				else
				{
					facetOrdTermsEnum = facetFieldDocTermOrds.TermsEnum();
				}
				// [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
				segmentFacetCounts = new int[facetFieldNumTerms + 1];
				segmentTotalCount = 0;
				segmentGroupedFacetHits.Clear();
				foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
				{
					int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm
						(groupedFacetHit.groupValue);
					if (groupedFacetHit.groupValue != null && groupOrd < 0)
					{
						continue;
					}
					int facetOrd;
					if (groupedFacetHit.facetValue != null)
					{
						if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue
							))
						{
							continue;
						}
						facetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						facetOrd = facetFieldNumTerms;
					}
					// (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
					int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
					segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
				}
				if (facetPrefix != null)
				{
					TermsEnum.SeekStatus seekStatus;
					if (facetOrdTermsEnum != null)
					{
						seekStatus = facetOrdTermsEnum.SeekCeil(facetPrefix);
					}
					else
					{
						seekStatus = TermsEnum.SeekStatus.END;
					}
					if (seekStatus != TermsEnum.SeekStatus.END)
					{
						startFacetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						startFacetOrd = 0;
						endFacetOrd = 0;
						return;
					}
					BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
					facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
					seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix);
					if (seekStatus != TermsEnum.SeekStatus.END)
					{
						endFacetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						endFacetOrd = facetFieldNumTerms;
					}
				}
				else
				{
					// Don't include null...
					startFacetOrd = 0;
					endFacetOrd = facetFieldNumTerms + 1;
				}
			}
Пример #38
0
 internal Iterator(DocTermOrds outerInstance, AtomicReader reader)
 {
     this.OuterInstance = outerInstance;
     this.Reader        = reader;
     this.Te            = TermsEnum();
 }
Пример #39
0
 public TermStateAnonymousInnerClassHelper(TermsEnum outerInstance)
 {
     this.OuterInstance = outerInstance;
 }
Пример #40
0
        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (Verbose)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetIterator(null);
                int       ord   = 0;
                while (allTE.Next() != null)
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetIterator(null);
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (te.Next() == null)
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (Verbose)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }
Пример #41
0
 // sugar
 private string Next(TermsEnum te)
 {
     BytesRef br = te.Next();
     if (br == null)
     {
         return null;
     }
     else
     {
         return br.Utf8ToString();
     }
 }
Пример #42
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random);
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.GetReader();

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
Пример #43
0
 /// <summary>
 /// Subclass can override this </summary>
 protected internal virtual void VisitTerm(TermsEnum te, int termNum)
 {
 }
Пример #44
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            Collections.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetIterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #45
0
 /// <summary>
 /// Returns the term (<seealso cref="BytesRef"/>) corresponding to
 ///  the provided ordinal.
 /// </summary>
 public virtual BytesRef LookupTerm(TermsEnum termsEnum, int ord)
 {
     termsEnum.SeekExact(ord);
     return termsEnum.Term();
 }
Пример #46
0
 public override TermsEnum GetIterator(TermsEnum reuse)
 {
     return(m_input.GetIterator(reuse));
 }
 /// <exception cref="System.IO.IOException"></exception>
 public BaseTermsEnumTraverser(AbstractPrefixTreeFilter _enclosing, AtomicReaderContext
     context, IBits acceptDocs)
 {
     this._enclosing = _enclosing;
     //remember to check for null in getDocIdSet
     this.context = context;
     AtomicReader reader = context.AtomicReader;
     this.acceptDocs = acceptDocs;
     maxDoc = reader.MaxDoc;
     Terms terms = reader.Terms(this._enclosing.fieldName);
     if (terms != null)
     {
         termsEnum = terms.Iterator(null);
     }
 }
Пример #48
0
 /// <summary>
 /// Creates a new <see cref="FilterTermsEnum"/> </summary>
 /// <param name="input"> the underlying <see cref="TermsEnum"/> instance. </param>
 public FilterTermsEnum(TermsEnum input)
 {
     this.m_input = input;
 }
Пример #49
0
 public override TermsEnum Iterator(TermsEnum reuse)
 {
     // TODO: should we give this thing a random to be super-evil,
     // and randomly *not* unwrap?
     if (reuse is AssertingTermsEnum)
     {
         reuse = ((AssertingTermsEnum)reuse).TermsEnumIn_Nunit();
     }
     TermsEnum termsEnum = base.Iterator(reuse);
     Debug.Assert(termsEnum != null);
     return new AssertingTermsEnum(termsEnum);
 }
Пример #50
0
 /// <summary>
 /// Creates a filtered <see cref="TermsEnum"/> on a terms enum. </summary>
 /// <param name="tenum"> the terms enumeration to filter. </param>
 public FilteredTermsEnum(TermsEnum tenum)
     : this(tenum, true)
 {
 }
Пример #51
0
 internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords)
     : base(tenum)
 {
     Terms = terms;
     Ords = ords;
     _comparator = BytesRef.UTF8SortedAsUnicodeComparer;
     _lastElement = terms.Size() - 1;
     _lastTerm = terms.Get(ords[_lastElement], new BytesRef());
     _seekTerm = terms.Get(ords[_upto], _spare);
 }
Пример #52
0
 /// <summary>
 /// Creates a filtered <see cref="TermsEnum"/> on a terms enum. </summary>
 /// <param name="tenum"> the terms enumeration to filter. </param>
 /// <param name="startWithSeek"> start with seek </param>
 public FilteredTermsEnum(TermsEnum tenum, bool startWithSeek)
 {
     Debug.Assert(tenum != null);
     this.tenum = tenum;
     doSeek     = startWithSeek;
 }
Пример #53
0
 public BaseTermsEnumTraverser(AbstractPrefixTreeFilter outerInstance, AtomicReaderContext context, Bits acceptDocs)
 {
     this.outerInstance = outerInstance;
     
     this.context = context;
     AtomicReader reader = context.AtomicReader;
     this.acceptDocs = acceptDocs;
     maxDoc = reader.MaxDoc;
     Terms terms = reader.Terms(outerInstance.fieldName);
     if (terms != null)
     {
         termsEnum = terms.Iterator(null);
     }
 }
Пример #54
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }

            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousClass(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
#pragma warning disable 612, 618
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
#pragma warning restore 612, 618
            {
                Assert.AreEqual(numTerms - 1, terms.Count);
            }
            TermsEnum termsEnum = terms.GetEnumerator();
            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.DocFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Пример #55
0
 /// <summary>
 /// Creates a new FilterTermsEnum </summary>
 /// <param name="in"> the underlying TermsEnum instance. </param>
 public FilterTermsEnum(TermsEnum @in)
 {
     this.@in = @in;
 }
Пример #56
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            ISet <string>                terms        = new JCG.HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]      termsArray = new BytesRef[terms.Count];
            ISet <BytesRef> termsSet   = new JCG.HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                ISet <string>            acceptTerms       = new JCG.HashSet <string>();
                JCG.SortedSet <BytesRef> sortedAcceptTerms = new JCG.SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]      acceptTermsArray = new BytesRef[acceptTerms.Count];
                ISet <BytesRef> acceptTermsSet   = new JCG.HashSet <BytesRef>();
                int             upto             = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
 /// <summary>
 /// checks term-level statistics
 /// </summary>
 public virtual void AssertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum)
 {
     Assert.AreEqual(leftTermsEnum.DocFreq(), rightTermsEnum.DocFreq());
     if (leftTermsEnum.TotalTermFreq() != -1 && rightTermsEnum.TotalTermFreq() != -1)
     {
         Assert.AreEqual(leftTermsEnum.TotalTermFreq(), rightTermsEnum.TotalTermFreq());
     }
 }
Пример #58
0
        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null);
            BytesRef        term;

            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }
Пример #59
0
 /// <summary>
 /// checks term-level statistics
 /// </summary>
 public void AssertTermStatsEquals(string info, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum)
 {
     Assert.AreEqual(leftTermsEnum.DocFreq(), rightTermsEnum.DocFreq(), info);
     if (leftTermsEnum.TotalTermFreq() != -1 && rightTermsEnum.TotalTermFreq() != -1)
     {
         Assert.AreEqual(leftTermsEnum.TotalTermFreq(), rightTermsEnum.TotalTermFreq(), info);
     }
 }
Пример #60
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (Verbose)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(false, () => "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
                }
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetEnumerator();

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.MoveNext())
                {
                    docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetEnumerator();

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (termsEnum.MoveNext())
            {
                BytesRef term = termsEnum.Term;
                //System.out.println("TEST: match id term=" + term);

                termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception /*t*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetEnumerator();
                            while (termsEnum3.MoveNext())
                            {
                                Console.WriteLine("      " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetEnumerator();
                            while (termsEnum3.MoveNext())
                            {
                                Console.WriteLine("      " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetEnumerator();
                    }
                    if (!termsEnum1.MoveNext())
                    {
                        term1 = null;
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }
                    term1 = termsEnum1.Term;

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetEnumerator();
                    }
                    if (!termsEnum2.MoveNext())
                    {
                        term2 = null;
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }
                    term2 = termsEnum2.Term;

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }