public virtual void Reset()
                // no one should call us for deleted docs?

                if (terms != null)
                    TermsEnum termsEnum = terms.Iterator(null);
                    if (termsEnum.SeekExact(outerInstance.indexedBytes))
                        docs = termsEnum.Docs(null, null);
                        docs = null;
                    docs = null;

                if (docs == null)
                    docs = new DocsEnumAnonymousInnerClassHelper(this);
                atDoc = -1;
 public static int[] ToArray(DocsEnum docsEnum)
     IList<int?> docs = new List<int?>();
     while (docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
         int docID = docsEnum.DocID();
     return ArrayUtil.ToIntArray(docs);
Example #3
        internal MultiDocsEnum Reset(EnumWithSlice[] subs, int numSubs)
            this.NumSubs_Renamed = numSubs;

            this.Subs_Renamed = new EnumWithSlice[subs.Length];
            for (int i = 0; i < subs.Length; i++)
                this.Subs_Renamed[i] = new EnumWithSlice();
                this.Subs_Renamed[i].DocsEnum = subs[i].DocsEnum;
                this.Subs_Renamed[i].Slice = subs[i].Slice;
            Upto = -1;
            Doc = -1;
            Current = null;
            return this;
 /// <summary>
 /// Create a new FilterDocsEnum </summary>
 /// <param name="in"> the underlying DocsEnum instance. </param>
 public FilterDocsEnum(DocsEnum @in)
     this.@in = @in;
Example #5
        /// <summary>
        /// Default merge impl: append documents, mapping around
        ///  deletes
        /// </summary>
        public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs)
            int df = 0;
            long totTF = 0;

            if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                while (true)
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    this.StartDoc(doc, -1);
                totTF = -1;
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
                while (true)
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    int freq = postings.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, -1, -1);
                Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset());
            return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF);
            internal Explanation Explain(int target)
                int docId;
                    docId = NextDocOutOfOrder();
                    if (docId < target)
                        int tempDocId = DocsEnum.Advance(target);
                        if (tempDocId == target)
                            docId = tempDocId;
                    else if (docId == target)
                    DocsEnum = null; // goto the next ord.
                } while (docId != DocIdSetIterator.NO_MORE_DOCS);

                return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
                    "Score based on join value " + _termsEnum.Term().Utf8ToString());
Example #7
        public virtual void TestRandom()
            int num = AtLeast(2);

            for (int iter = 0; iter < num; iter++)
                if (Verbose)
                    Console.WriteLine("TEST: iter=" + iter);

                Directory dir = NewDirectory();

                IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                w.KeepFullyDeletedSegments = true;

                IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >();
                ISet <int?>      deleted = new JCG.HashSet <int?>();
                IList <BytesRef> terms   = new JCG.List <BytesRef>();

                int numDocs            = TestUtil.NextInt32(Random, 1, 100 * RandomMultiplier);
                Documents.Document doc = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                Field id = NewStringField("id", "", Field.Store.NO);

                bool onlyUniqueTerms = Random.NextBoolean();
                if (Verbose)
                    Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
                ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();
                for (int i = 0; i < numDocs; i++)
                    if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0)
                        // re-use existing term
                        BytesRef term = terms[Random.Next(terms.Count)];
                        string   s    = TestUtil.RandomUnicodeString(Random, 10);
                        BytesRef term = new BytesRef(s);
                        if (!docs.TryGetValue(term, out IList <int?> docsTerm))
                            docs[term] = docsTerm = new JCG.List <int?>();
                    id.SetStringValue("" + i);
                    if (Random.Next(4) == 1)
                    if (i > 0 && Random.Next(20) == 1)
                        int delID = Random.Next(i);
                        w.DeleteDocuments(new Term("id", "" + delID));
                        if (Verbose)
                            Console.WriteLine("TEST: delete " + delID);

                if (Verbose)
                    IList <BytesRef> termsList = new JCG.List <BytesRef>(uniqueTerms);
#pragma warning disable 612, 618
#pragma warning restore 612, 618
                    Console.WriteLine("TEST: terms in UTF16 order:");
                    foreach (BytesRef b in termsList)
                        Console.WriteLine("  " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b);
                        foreach (int docID in docs[b])
                            if (deleted.Contains(docID))
                                Console.WriteLine("    " + docID + " (deleted)");
                                Console.WriteLine("    " + docID);

                IndexReader reader = w.GetReader();
                if (Verbose)
                    Console.WriteLine("TEST: reader=" + reader);

                IBits liveDocs = MultiFields.GetLiveDocs(reader);
                foreach (int delDoc in deleted)

                for (int i = 0; i < 100; i++)
                    BytesRef term = terms[Random.Next(terms.Count)];
                    if (Verbose)
                        Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term);

                    DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE);

                    foreach (int docID in docs[term])
                        if (!deleted.Contains(docID))
                            Assert.AreEqual(docID, docsEnum.NextDoc());
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());

Example #8
        public virtual void DoTestNumbers(bool withPayloads)
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random());

            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
                Document doc = new Document();
                doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));

            IndexReader reader = w.Reader;


            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq();
                    for (int i = 0; i < freq; i++)
                        int start = dp.StartOffset();
                        Debug.Assert(start >= 0);
                        int end = dp.EndOffset();
                        Debug.Assert(end >= 0 && end >= start);
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term));
                        if (withPayloads)
                            // check that we have a payload and it starts with "pos"
                            BytesRef payload = dp.Payload;
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
                int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq();
                for (int i = 0; i < freq; i++)
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int start = dp.StartOffset();
                    Debug.Assert(start >= 0);
                    int end = dp.EndOffset();
                    Debug.Assert(end >= 0 && end >= start);
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred"));
                    if (withPayloads)
                        // check that we have a payload and it starts with "pos"
                        BytesRef payload = dp.Payload;
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
     throw new System.NotSupportedException();
 public override DocsEnum Docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse,
     int flags)
     PulsingTermState termState = (PulsingTermState) _termState;
     if (termState.PostingsSize != -1)
         PulsingDocsEnum postings;
         if (reuse is PulsingDocsEnum)
             postings = (PulsingDocsEnum) reuse;
             if (!postings.CanReuse(field))
                 postings = new PulsingDocsEnum(field);
             // the 'reuse' is actually the wrapped enum
             PulsingDocsEnum previous = (PulsingDocsEnum) GetOther(reuse);
             if (previous != null && previous.CanReuse(field))
                 postings = previous;
                 postings = new PulsingDocsEnum(field);
         if (reuse != postings)
             SetOther(postings, reuse); // postings.other = reuse
         return postings.Reset(liveDocs, termState);
         if (reuse is PulsingDocsEnum)
             DocsEnum wrapped = _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs,
                 GetOther(reuse), flags);
             SetOther(wrapped, reuse); // wrapped.other = reuse
             return wrapped;
             return _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, reuse, flags);
Example #11
        }                                           // LUCENENET NOTE: Although this isn't a great candidate for a property, did so to make API consistent

        /// <summary>
        /// Get <see cref="DocsEnum"/> for the current term. Do not
        /// call this when the enum is unpositioned. This method
        /// will not return <c>null</c>.
        /// </summary>
        /// <param name="liveDocs"> Unset bits are documents that should not
        /// be returned </param>
        /// <param name="reuse"> Pass a prior <see cref="DocsEnum"/> for possible reuse </param>
        public DocsEnum Docs(IBits liveDocs, DocsEnum reuse)
            return(Docs(liveDocs, reuse, DocsFlags.FREQS));
Example #12
 public override int Advance(int target)
     Debug.Assert(target > Doc);
     while (true)
         if (Current != null)
             int doc;
             if (target < CurrentBase)
                 // target was in the previous slice but there was no matching doc after it
                 doc = Current.NextDoc();
                 doc = Current.Advance(target - CurrentBase);
             if (doc == NO_MORE_DOCS)
                 Current = null;
                 return this.Doc = doc + CurrentBase;
         else if (Upto == NumSubs_Renamed - 1)
             return this.Doc = NO_MORE_DOCS;
             Current = Subs_Renamed[Upto].DocsEnum;
             CurrentBase = Subs_Renamed[Upto].Slice.Start;
Example #13
        public override int NextDoc()
            while (true)
                if (Current == null)
                    if (Upto == NumSubs_Renamed - 1)
                        return this.Doc = NO_MORE_DOCS;
                        Current = Subs_Renamed[Upto].DocsEnum;
                        CurrentBase = Subs_Renamed[Upto].Slice.Start;

                int doc = Current.NextDoc();
                if (doc != NO_MORE_DOCS)
                    return this.Doc = CurrentBase + doc;
                    Current = null;
Example #14
 public override DocsEnum Docs(Bits bits, DocsEnum reuse, int flags)
     return Tenum.Docs(bits, reuse, flags);
 /// <summary>
 /// for a docsenum, sets the 'other' reused enum.
 /// see GetOther for an example.
 /// </summary>
 private DocsEnum SetOther(DocsEnum de, DocsEnum other)
     AttributeSource atts = de.Attributes();
     return atts.AddAttribute(PulsingEnumAttributeImpl.Enums().put(this, other));
 /// <summary>
 /// for a docsenum, gets the 'other' reused enum.
 /// Example: Pulsing(Standard).
 /// when doing a term range query you are switching back and forth
 /// between Pulsing and Standard
 /// The way the reuse works is that Pulsing.other = Standard and
 /// Standard.other = Pulsing.
 /// </summary>
 private DocsEnum GetOther(DocsEnum de)
     if (de == null)
         return null;
         AttributeSource atts = de.Attributes();
         return atts.AddAttribute(PulsingEnumAttribute.Enums().get(this);
Example #17
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
            if (VERBOSE)
                Console.WriteLine("\nr1 docs:");
                Console.WriteLine("\nr2 docs:");
            if (r1.NumDocs != r2.NumDocs)
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
                // make sure r2 is empty
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                    docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                        Assert.Fail("r1 is not empty but r2 is");
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)

                termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                    termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                    termDocs2 = null;

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                catch (Exception t)
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw t;

                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                catch (Exception e)
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);

                    throw e;

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                    len1 = 0;
                    if (termsEnum1 == null)
                        if (!fields1Enum.MoveNext())
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        termsEnum1 = terms.GetIterator(null);
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                        // no more terms in this field
                        termsEnum1 = null;

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                    if (len1 > 0)

                // iterate until we get some docs
                int len2;
                for (; ;)
                    len2 = 0;
                    if (termsEnum2 == null)
                        if (!fields2Enum.MoveNext())
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        termsEnum2 = terms.GetIterator(null);
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                        // no more terms in this field
                        termsEnum2 = null;

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                    if (len2 > 0)

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms

                Assert.AreEqual(field1, field2);

                if (!hasDeletes)
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
Example #18
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
                throw IllegalStateException.Create("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = Environment.TickCount;

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
                // No terms
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
                // No terms

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
                // No terms match

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            // enumerate all terms, and build an intermediate form of the un-inverted field.
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    testedOrd = true;

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        //System.out.println("  chunk=" + chunk + " docs");


                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = val.TripleShift(8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                                ipos = 0;
                            else if ((val & 0x0000ff80) == 0)
                                ipos = 1;
                            else if ((val & 0x00ff8000) == 0)
                                ipos = 2;
                            else if ((val & 0xff800000) == 0)
                                ipos = 3;
                                ipos = 4;

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                index[doc] = val;
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                    tempArr[j] = (sbyte)val;
                                    val        = val.TripleShift(8);
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                    SetActualDocFreq(termNum, actualDF);

                if (!te.MoveNext())

            m_numTermsInField = termNum;

            long midPoint = Environment.TickCount;

            if (m_termInstances == 0)
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
                this.m_index = index;

                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.

                for (int pass = 0; pass < 256; pass++)
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                        pos = target.Length;
                        target = new sbyte[4096];

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                                int len = val.TripleShift(8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                    // we only have 24 bits for the array index
                                    throw IllegalStateException.Create("Too many values for UnInvertedField faceting on field " + m_field);
                                var arr = bytes[doc];

                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    while (newlen <= pos + len) // doubling strategy
                                        newlen <<= 1;
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator

                    // shrink array
                    if (pos < target.Length)
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
            if (indexedTerms != null)
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);

            long endTime = Environment.TickCount;

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
Example #19
        // Delete by Term
        private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader)
            lock (this)
                long   delCount = 0;
                Fields fields   = reader.Fields;
                if (fields == null)
                    // this reader has no postings

                TermsEnum termsEnum = null;

                string   currentField = null;
                DocsEnum docs         = null;


                bool any = false;

                //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
                foreach (Term term in termsIter)
                    // Since we visit terms sorted, we gain performance
                    // by re-using the same TermsEnum and seeking only
                    // forwards
                    if (!term.Field().Equals(currentField))
                        Debug.Assert(currentField == null || currentField.CompareTo(term.Field()) < 0);
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                            termsEnum = terms.Iterator(termsEnum);
                            termsEnum = null;

                    if (termsEnum == null)

                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);
                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        if (docsEnum != null)
                            while (true)
                                int docID = docsEnum.NextDoc();
                                //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
                                if (docID == DocIdSetIterator.NO_MORE_DOCS)
                                if (!any)
                                    any = true;
                                // NOTE: there is no limit check on the docID
                                // when deleting by Term (unlike by Query)
                                // because on flush we apply all Term deletes to
                                // each segment.  So all Term deleting here is
                                // against prior segments:
                                if (rld.Delete(docID))

            private int NextDocOutOfOrder()
                while (true)
                    if (DocsEnum != null)
                        int docId = DocsEnumNextDoc();
                        if (docId == DocIdSetIterator.NO_MORE_DOCS)
                            DocsEnum = null;
                            return _doc = docId;

                    if (_upto == outerInstance._terms.Size())
                        return _doc = DocIdSetIterator.NO_MORE_DOCS;

                    _scoreUpto = _upto;
                    if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
                        DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE);
Example #21
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
     throw new InvalidOperationException("this method should never be called");
Example #22
 /// <summary>
 /// Get <seealso cref="DocsEnum"/> for the current term.  Do not
 ///  call this when the enum is unpositioned.  this method
 ///  will not return null.
 /// </summary>
 /// <param name="liveDocs"> unset bits are documents that should not
 /// be returned </param>
 /// <param name="reuse"> pass a prior DocsEnum for possible reuse  </param>
 public DocsEnum Docs(Bits liveDocs, DocsEnum reuse)
     return Docs(liveDocs, reuse, DocsEnum.FLAG_FREQS);
        public virtual void TestCloseWithThreads()
            int NUM_THREADS   = 3;
            int numIterations = TestNightly ? 7 : 3;

            for (int iter = 0; iter < numIterations; iter++)
                if (Verbose)
                    Console.WriteLine("\nTEST: iter=" + iter);
                Directory   dir    = NewDirectory();
                IndexWriter writer = new IndexWriter(
                    NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                    .SetMergeScheduler(new ConcurrentMergeScheduler())

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                    threads[i] = new IndexerThread(writer, false, NewField)

                                 // LUCENENET NOTE - ConcurrentMergeScheduler
                                 // used to take too long for this test to index a single document
                                 // so, increased the time from 200 to 300 ms.
                                 // But it has now been restored to 200 ms like Lucene.
                        timeToRunInMilliseconds = 200

                for (int i = 0; i < NUM_THREADS; i++)

                bool done = false;
                while (!done)
                    for (int i = 0; i < NUM_THREADS; i++)
                    // only stop when at least one thread has added a doc
                        if (threads[i].addCount > 0)
                            done = true;
                        else if (!threads[i].IsAlive)
                            Assert.Fail("thread failed before indexing a single document");

                if (Verbose)
                    Console.WriteLine("\nTEST: now close");

                // Make sure threads that are adding docs are not hung:
                for (int i = 0; i < NUM_THREADS; i++)
                    // Without fix for LUCENE-1130: one of the
                    // threads will hang
                    if (threads[i].IsAlive)
                        Assert.Fail("thread seems to be hung");

                // Quick test to make sure index is not corrupt:
                IndexReader reader = DirectoryReader.Open(dir);
                DocsEnum    tdocs  = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0);
                int         count  = 0;
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                Assert.IsTrue(count > 0);

Example #24
 /// <summary>
 /// Get <seealso cref="DocsEnum"/> for the current term, with
 ///  control over whether freqs are required.  Do not
 ///  call this when the enum is unpositioned.  this method
 ///  will not return null.
 /// </summary>
 /// <param name="liveDocs"> unset bits are documents that should not
 /// be returned </param>
 /// <param name="reuse"> pass a prior DocsEnum for possible reuse </param>
 /// <param name="flags"> specifies which optional per-document values
 ///        you require; see <seealso cref="DocsEnum#FLAG_FREQS"/> </param>
 /// <seealso cref= #docs(Bits, DocsEnum, int)  </seealso>
 public abstract DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags);
Example #25
 public AssertingDocsEnum(DocsEnum @in)
     : this(@in, true)
Example #26
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
     throw new InvalidOperationException("this method should never be called");
 /// <summary>
 /// checks docs + freqs, sequentially
 /// </summary>
 public virtual void AssertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs)
     if (leftDocs == null)
     Assert.AreEqual(-1, leftDocs.DocID());
     Assert.AreEqual(-1, rightDocs.DocID());
     int docid;
     while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
         Assert.AreEqual(docid, rightDocs.NextDoc());
         // we don't assert freqs, they are allowed to be different
     Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc());
            public override sealed DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
                TVDocsEnum docsEnum;
                if (reuse != null && reuse is TVDocsEnum)
                    docsEnum = (TVDocsEnum)reuse;
                    docsEnum = new TVDocsEnum();

                docsEnum.Reset(liveDocs, TermFreqs[Ord_Renamed], PositionIndex[Ord_Renamed], Positions, StartOffsets, Lengths, Payloads, PayloadIndex);
                return docsEnum;
Example #29
        /// <summary>
        /// checks advancing docs
        /// </summary>
        public void AssertDocsSkippingEquals(string info, IndexReader leftReader, int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs)
            if (leftDocs == null)
            int docid = -1;
            int averageGap = leftReader.MaxDoc / (1 + docFreq);
            int skipInterval = 16;

            while (true)
                if (Random().NextBoolean())
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc(), info);
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip), info);

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                if (hasFreqs)
                    Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info);
Example #30
        public virtual void TestSkipTo(int indexDivisor)
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
                AddDoc(writer, "aaa aaa aaa aaa");

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
                AddDoc(writer, "bbb bbb bbb bbb");

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
                AddDoc(writer, "ccc ccc ccc ccc");

            // assure that we deal with a single segment

            IndexReader reader = DirectoryReader.Open(dir, indexDivisor);

            DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            // without optimization (assumption skipInterval == 16)

            // with next
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(11, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(12, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(27, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(28, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            //without next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

Example #31
 private void TestOne(DocsEnum docs, IList<int> expected)
     if (VERBOSE)
     int upto = -1;
     while (upto < expected.Count)
         if (VERBOSE)
             Console.WriteLine("  cycle upto=" + upto + " of " + expected.Count);
         int docID;
         if (Random().Next(4) == 1 || upto == expected.Count - 1)
             // test nextDoc()
             if (VERBOSE)
                 Console.WriteLine("    do nextDoc");
             docID = docs.NextDoc();
             // test advance()
             int inc = TestUtil.NextInt(Random(), 1, expected.Count - 1 - upto);
             if (VERBOSE)
                 Console.WriteLine("    do advance inc=" + inc);
             upto += inc;
             docID = docs.Advance(expected[upto]);
         if (upto == expected.Count)
             if (VERBOSE)
                 Console.WriteLine("  expect docID=" + DocIdSetIterator.NO_MORE_DOCS + " actual=" + docID);
             Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docID);
             if (VERBOSE)
                 Console.WriteLine("  expect docID=" + expected[upto] + " actual=" + docID);
             Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
             Assert.AreEqual((int)expected[upto], docID);
Example #32
        public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
            int NUM_THREADS   = 3;
            int numIterations = TEST_NIGHTLY ? 7 : 3;

            for (int iter = 0; iter < numIterations; iter++)
                if (VERBOSE)
                    Console.WriteLine("\nTEST: iter=" + iter);
                Directory dir    = NewDirectory();
                var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                IndexWriter writer = new IndexWriter(dir, config);

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                    threads[i] = new IndexerThread(writer, false, NewField);

                for (int i = 0; i < NUM_THREADS; i++)

                bool done = false;
                while (!done)
                    for (int i = 0; i < NUM_THREADS; i++)
                    // only stop when at least one thread has added a doc
                        if (threads[i].AddCount > 0)
                            done = true;
                        else if (!threads[i].IsAlive)
                            Assert.Fail("thread failed before indexing a single document");

                if (VERBOSE)
                    Console.WriteLine("\nTEST: now close");

                // Make sure threads that are adding docs are not hung:
                for (int i = 0; i < NUM_THREADS; i++)
                    // Without fix for LUCENE-1130: one of the
                    // threads will hang
                    if (threads[i].IsAlive)
                        Assert.Fail("thread seems to be hung");

                // Quick test to make sure index is not corrupt:
                IndexReader reader = DirectoryReader.Open(dir);
                DocsEnum    tdocs  = TestUtil.Docs(Random(), reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0);
                int         count  = 0;
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                Assert.IsTrue(count > 0);

 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
     return @in.Docs(liveDocs, reuse, flags);
Example #34
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected internal virtual void Uninvert(AtomicReader reader, Bits liveDocs, BytesRef termPrefix)
            FieldInfo info = reader.FieldInfos.FieldInfo(Field);
            if (info != null && info.HasDocValues())
                throw new InvalidOperationException("Type mismatch: " + Field + " was indexed as " + info.DocValuesType);
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = DateTime.Now.Millisecond;
            Prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;
            int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc]; // last term we saw for this document
            sbyte[][] bytes = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;
            if (fields == null)
                // No terms
            Terms terms = fields.Terms(Field);
            if (terms == null)
                // No terms

            TermsEnum te = terms.Iterator(null);
            BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
                // No terms match

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList<BytesRef> indexedTerms = null;
            PagedBytes indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            sbyte[] tempArr = new sbyte[12];

            // enumerate all terms, and build an intermediate form of the un-inverted field.
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;
            DocsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ; )
                BytesRef t = te.Term();
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                        OrdBase = (int)te.Ord();
                        //System.out.println("got ordBase=" + ordBase);
                    catch (System.NotSupportedException uoe)
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms = new List<BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    testedOrd = true;

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & IndexIntervalMask) == 0)
                    // Index this term
                    SizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes

                int df = te.DocFreq();
                if (df <= MaxTermDocFreq)
                    DocsEnum = te.Docs(liveDocs, DocsEnum, DocsEnum.FLAG_NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ; )
                        int doc = DocsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        //System.out.println("  chunk=" + chunk + " docs");


                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos = (int)((uint)val >> 8);
                            int ilen = VIntSize(delta);
                            sbyte[] arr = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked((int)0xfffffffc); // 4 byte alignment
                                sbyte[] newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr = newarr;
                                bytes[doc] = newarr;
                            pos = WriteInt(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                                ipos = 0;
                            else if ((val & 0x0000ff80) == 0)
                                ipos = 1;
                            else if ((val & 0x00ff8000) == 0)
                                ipos = 2;
                            else if ((val & 0xff800000) == 0)
                                ipos = 3;
                                ipos = 4;

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                index[doc] = val;
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                    tempArr[j] = (sbyte)val;
                                    val = (int)((uint)val >> 8);
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr = new sbyte[12];
                    SetActualDocFreq(termNum, actualDF);

                if (te.Next() == null)

            NumTermsInField = termNum;

            long midPoint = DateTime.Now.Millisecond;

            if (TermInstances == 0)
                // we didn't invert anything
                // lower memory consumption.
                Tnums = null;
                this.Index = index;

                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.

                for (int pass = 0; pass < 256; pass++)
                    sbyte[] target = Tnums[pass];
                    int pos = 0; // end in target;
                    if (target != null)
                        pos = target.Length;
                        target = new sbyte[4096];

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                                int len = (int)((uint)val >> 8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                    // we only have 24 bits for the array index
                                    throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + Field);
                                sbyte[] arr = bytes[doc];
                                for(byte b : arr) {
                                  //System.out.println("      b=" + Integer.toHexString((int) b));
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                    int newlen = target.Length;
                                    /// <summary>
                                    ///* we don't have to worry about the array getting too large
                                    /// since the "pos" param will overflow first (only 24 bits available)
                                    /// if ((newlen<<1) <= 0) {
                                    ///  // overflow...
                                    ///  newlen = Integer.MAX_VALUE;
                                    ///  if (newlen <= pos + len) {
                                    ///    throw new SolrException(400,"Too many terms to uninvert field!");
                                    ///  }
                                    /// } else {
                                    ///  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    /// }
                                    /// ***
                                    /// </summary>
                                    while (newlen <= pos + len) // doubling strategy
                                        newlen <<= 1;
                                    sbyte[] newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator

                    // shrink array
                    if (pos < target.Length)
                        sbyte[] newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;

                    Tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
            if (indexedTerms != null)
                IndexedTermsArray = indexedTerms.ToArray();

            long endTime = DateTime.Now.Millisecond;

            Total_time = (int)(endTime - startTime);
            Phase1_time = (int)(midPoint - startTime);
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
     throw new System.NotSupportedException();
Example #36
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
     throw UnsupportedOperationException.Create();
Example #37
        public static void VerifyEquals(Fields d1, Fields d2)
            if (d1 == null)
                Assert.IsTrue(d2 == null || d2.Count == 0);
            Assert.IsTrue(d2 != null);

            IEnumerator <string> fieldsEnum2 = d2.GetEnumerator();

            foreach (string field1 in d1)
                string field2 = fieldsEnum2.Current;
                Assert.AreEqual(field1, field2);

                Terms terms1 = d1.GetTerms(field1);
                TermsEnum termsEnum1 = terms1.GetIterator(null);

                Terms terms2 = d2.GetTerms(field2);
                TermsEnum termsEnum2 = terms2.GetIterator(null);

                DocsAndPositionsEnum dpEnum1 = null;
                DocsAndPositionsEnum dpEnum2 = null;
                DocsEnum             dEnum1  = null;
                DocsEnum             dEnum2  = null;

                BytesRef term1;
                while ((term1 = termsEnum1.Next()) != null)
                    BytesRef term2 = termsEnum2.Next();
                    Assert.AreEqual(term1, term2);
                    Assert.AreEqual(termsEnum1.TotalTermFreq, termsEnum2.TotalTermFreq);

                    dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
                    dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
                    if (dpEnum1 != null)
                        int docID1 = dpEnum1.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dpEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);

                        int freq1 = dpEnum1.Freq;
                        int freq2 = dpEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        IOffsetAttribute offsetAtt1 = dpEnum1.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes.GetAttribute <IOffsetAttribute>() : null;
                        IOffsetAttribute offsetAtt2 = dpEnum2.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes.GetAttribute <IOffsetAttribute>() : null;

                        if (offsetAtt1 != null)

                        for (int posUpto = 0; posUpto < freq1; posUpto++)
                            int pos1 = dpEnum1.NextPosition();
                            int pos2 = dpEnum2.NextPosition();
                            Assert.AreEqual(pos1, pos2);
                            if (offsetAtt1 != null)
                                Assert.AreEqual(offsetAtt1.StartOffset, offsetAtt2.StartOffset);
                                Assert.AreEqual(offsetAtt1.EndOffset, offsetAtt2.EndOffset);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
                        dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsFlags.FREQS);
                        dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsFlags.FREQS);
                        int docID1 = dEnum1.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
                        int freq1 = dEnum1.Freq;
                        int freq2 = dEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());

Example #38
 /// <summary>
 /// Create a new <see cref="FilterDocsEnum"/> </summary>
 /// <param name="input"> the underlying <see cref="DocsEnum"/> instance. </param>
 public FilterDocsEnum(DocsEnum input)
     this.m_input = input;
Example #39
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
            lock (this)
                Fields fields = reader.Fields;
                if (fields == null)
                    // this reader has no postings

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to, so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                    Term term  = update.Term;
                    int  limit = update.DocIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!term.Field().Equals(currentField))
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.compareTo(term.field()) < 0;
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                            termsEnum = terms.Iterator(termsEnum);
                            termsEnum = null;
                            continue; // no terms in that field

                    if (termsEnum == null)
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type);
                        if (dvUpdates == null)
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc);
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                                break; // no more docs that can be updated for this term
                            dvUpdates.Add(doc, update.Value);
            public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
                Debug.Assert(State == State_e.POSITIONED, "docs(...) called on unpositioned TermsEnum");

                // TODO: should we give this thing a random to be super-evil,
                // and randomly *not* unwrap?
                if (reuse is AssertingDocsEnum)
                    reuse = ((AssertingDocsEnum)reuse).DocsEnumIn_Nunit();
                DocsEnum docs = base.Docs(liveDocs, reuse, flags);
                return docs == null ? null : new AssertingDocsEnum(docs);
Example #41
 /// <summary>
 /// Get <see cref="DocsEnum"/> for the current term, with
 /// control over whether freqs are required. Do not
 /// call this when the enum is unpositioned. This method
 /// will not return <c>null</c>.
 /// </summary>
 /// <param name="liveDocs"> Unset bits are documents that should not
 /// be returned </param>
 /// <param name="reuse"> Pass a prior DocsEnum for possible reuse </param>
 /// <param name="flags"> Specifies which optional per-document values
 ///        you require; <see cref="DocsFlags"/></param>
 /// <seealso cref="Docs(IBits, DocsEnum)"/>
 public abstract DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags);
 public AssertingDocsEnum(DocsEnum @in)
     : this(@in, true)
        public virtual void TestRandomDocs()
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            int numDocs = AtLeast(49);
            int max     = 15678;
            int term    = Random().Next(max);

            int[]     freqInDoc  = new int[numDocs];
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            for (int i = 0; i < numDocs; i++)
                Document      doc     = new Document();
                StringBuilder builder = new StringBuilder();
                for (int j = 0; j < 199; j++)
                    int nextInt = Random().Next(max);
                    builder.Append(nextInt).Append(' ');
                    if (nextInt == term)
                doc.Add(NewField(FieldName, builder.ToString(), customType));

            IndexReader reader = writer.Reader;


            int num = AtLeast(13);

            for (int i = 0; i < num; i++)
                BytesRef           bytes            = new BytesRef("" + term);
                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext context in topReaderContext.Leaves)
                    int      maxDoc   = context.AtomicReader.MaxDoc;
                    DocsEnum docsEnum = TestUtil.Docs(Random(), context.Reader, FieldName, bytes, null, null, DocsFlags.FREQS);
                    if (FindNext(freqInDoc, context.DocBase, context.DocBase + maxDoc) == int.MaxValue)
                    for (int j = 0; j < maxDoc; j++)
                        if (freqInDoc[context.DocBase + j] != 0)
                            Assert.AreEqual(j, docsEnum.DocID);
                            Assert.AreEqual(docsEnum.Freq, freqInDoc[context.DocBase + j]);
                            if (i % 2 == 0 && Random().Next(10) == 0)
                                int next       = FindNext(freqInDoc, context.DocBase + j + 1, context.DocBase + maxDoc) - context.DocBase;
                                int advancedTo = docsEnum.Advance(next);
                                if (next >= maxDoc)
                                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, advancedTo);
                                    Assert.IsTrue(next >= advancedTo, "advanced to: " + advancedTo + " but should be <= " + next);
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.DocID, "DocBase: " + context.DocBase + " maxDoc: " + maxDoc + " " + docsEnum.GetType());

 public AssertingDocsEnum(DocsEnum @in, bool failOnUnsupportedDocID)
     : base(@in)
         int docid = @in.DocID();
         Debug.Assert(docid == -1, @in.GetType() + ": invalid initial doc id: " + docid);
     catch (System.NotSupportedException e)
         if (failOnUnsupportedDocID)
             throw e;
     Doc = -1;
Example #45
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
     return(termsEnum.Docs(liveDocs, reuse, flags));
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
     throw new NotSupportedException();
Example #47
        public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
            MultiDocsEnum docsEnum;

            // Can only reuse if incoming enum is also a MultiDocsEnum
            if (reuse != null && reuse is MultiDocsEnum)
                docsEnum = (MultiDocsEnum)reuse;
                // ... and was previously created w/ this MultiTermsEnum:
                if (!docsEnum.CanReuse(this))
                    docsEnum = new MultiDocsEnum(this, subs.Length);
                docsEnum = new MultiDocsEnum(this, subs.Length);

            MultiBits multiLiveDocs;

            if (liveDocs is MultiBits)
                multiLiveDocs = (MultiBits)liveDocs;
                multiLiveDocs = null;

            int upto = 0;

            for (int i = 0; i < numTop; i++)
                TermsEnumWithSlice entry = top[i];

                IBits b;

                if (multiLiveDocs != null)
                    // optimize for common case: requested skip docs is a
                    // congruent sub-slice of MultiBits: in this case, we
                    // just pull the liveDocs from the sub reader, rather
                    // than making the inefficient
                    // Slice(Multi(sub-readers)):
                    MultiBits.SubResult sub = multiLiveDocs.GetMatchingSub(entry.SubSlice);
                    if (sub.Matches)
                        b = sub.Result;
                        // custom case: requested skip docs is foreign:
                        // must slice it on every access
                        b = new BitsSlice(liveDocs, entry.SubSlice);
                else if (liveDocs != null)
                    b = new BitsSlice(liveDocs, entry.SubSlice);
                    // no deletions
                    b = null;

                Debug.Assert(entry.Index < docsEnum.subDocsEnum.Length, entry.Index + " vs " + docsEnum.subDocsEnum.Length + "; " + subs.Length);
                DocsEnum subDocsEnum = entry.Terms.Docs(b, docsEnum.subDocsEnum[entry.Index], flags);
                if (subDocsEnum != null)
                    docsEnum.subDocsEnum[entry.Index] = subDocsEnum;
                    subDocs[upto].DocsEnum            = subDocsEnum;
                    subDocs[upto].Slice = entry.SubSlice;
                    // should this be an error?
                    Debug.Assert(false, "One of our subs cannot provide a docsenum");

            if (upto == 0)
                return(docsEnum.Reset(subDocs, upto));
Example #48
        public virtual void TestMerge()
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);

            SegmentMerger merger     = new SegmentMerger(Arrays.AsList <AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true);
            MergeState    mergeState = merger.Merge();
            int           docsMerged = mergeState.SegmentInfo.DocCount;

            Assert.IsTrue(docsMerged == 2);
            //Should be able to open a new SegmentReader against the new directory
            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            Assert.IsTrue(mergedReader != null);
            Assert.IsTrue(mergedReader.NumDocs == 2);
            Document newDoc1 = mergedReader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            //There are 2 unstored fields on the document
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = mergedReader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);

            DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);

            Assert.IsTrue(termDocs != null);
            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            int tvCount = 0;

            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
                if (fieldInfo.HasVectors)

            //System.out.println("stored size: " + stored.Size());
            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");

            Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.AreEqual(3, vector.Count);
            TermsEnum termsEnum = vector.GetIterator(null);

            int i = 0;

            while (termsEnum.Next() != null)
                string term = termsEnum.Term.Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq;
                //System.out.println("Term: " + term + " Freq: " + freq);
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);

Example #49
        public virtual void TestIntersectRandom()
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet <string>             terms        = new HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
                string s = RandomString;
                if (!terms.Contains(s))
                    if (Random().Next(20) == 7)
                        AddDoc(w, pendingTerms, termToID, id++);
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]         termsArray = new BytesRef[terms.Count];
            HashSet <BytesRef> termsSet   = new HashSet <BytesRef>();

                int upto = 0;
                foreach (string s in terms)
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;

            if (VERBOSE)
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);

            IndexReader r = w.Reader;


            // NOTE: intentional insanity!!
            FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet <string>     acceptTerms       = new HashSet <string>();
                SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>();
                double    keepPct = Random().NextDouble();
                Automaton a;
                if (iter == 0)
                    if (VERBOSE)
                        Console.WriteLine("\nTEST: empty automaton");
                    a = BasicAutomata.MakeEmpty();
                    if (VERBOSE)
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    foreach (string s in terms)
                        string s2;
                        if (Random().NextDouble() <= keepPct)
                            s2 = s;
                            s2 = RandomString;
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);

                if (Random().NextBoolean())
                    if (VERBOSE)
                        Console.WriteLine("TEST: reduce the automaton");

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]         acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet <BytesRef> acceptTermsSet   = new HashSet <BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    Assert.IsTrue(Accepts(c, b));

                if (VERBOSE)
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));

                for (int iter2 = 0; iter2 < 100; iter2++)
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random().NextBoolean() ? null : acceptTermsArray[Random().Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            Console.WriteLine("  state=" + state);

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                        loc = 0;
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                            loc = -(loc + 1);
                            // startTerm exists in index
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq());
                        docsEnum = TestUtil.Docs(Random(), te, null, docsEnum, DocsEnum.FLAG_NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));

Example #50
 public override DocsEnum Docs(IBits bits, DocsEnum reuse, DocsFlags flags)
     return(tenum.Docs(bits, reuse, flags));
Example #51
 public override int DocID()
     Debug.Assert(Doc == base.DocID(), " invalid docID() in " + DocsEnum.GetType() + " " + base.DocID() + " instead of " + Doc);
Example #52
        public virtual void TestStressAdvance_Mem()
            for (int iter = 0; iter < 3; iter++)
                if (VERBOSE)
                    Console.WriteLine("\nTEST: iter=" + iter);
                Directory          dir   = NewDirectory();
                RandomIndexWriter  w     = new RandomIndexWriter(Random(), dir);
                HashSet <int>      aDocs = new HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                Field idField = NewStringField("id", "", Field.Store.YES);
                int num = AtLeast(4097);
                if (VERBOSE)
                    Console.WriteLine("\nTEST: numDocs=" + num);
                for (int id = 0; id < num; id++)
                    if (Random().Next(4) == 3)
                        f.StringValue = "a";
                        f.StringValue = "b";
                    idField.StringValue = "" + id;
                    if (VERBOSE)
                        Console.WriteLine("\nTEST: doc upto " + id);


                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.Reader;
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                    if (VERBOSE)
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, bDocIDs);

Example #53
        public virtual void TestRandom()
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();

            for (int docCount = 0; docCount < numDocs; docCount++)
                Document doc = new Document();
                doc.Add(new IntField("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                    string text;
                    if (Random().NextBoolean())
                        text = "a";
                    else if (Random().NextBoolean())
                        text = "b";
                    else if (Random().NextBoolean())
                        text = "c";
                        text = "d";

                    int posIncr = Random().NextBoolean() ? 1 : Random().Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                        posIncr = 1;
                    int offIncr     = Random().NextBoolean() ? 0 : Random().Next(5);
                    int tokenOffset = Random().Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.ContainsKey(text))
                        actualTokens[text] = new Dictionary <int?, IList <Token> >();
                    IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text];
                    if (!postingsByDoc.ContainsKey(docCount))
                        postingsByDoc[docCount] = new List <Token>();
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset() + "/" + token.EndOffset() + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
            DirectoryReader r = w.Reader;


            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.Terms("content").Iterator(null);
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Ints      docIDToID                  = FieldCache.DEFAULT.GetInts(sub, "id", false);
                foreach (string term in terms)
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                        docs = termsEnum.Docs(null, docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.AreEqual(expected.Count, docs.Freq());

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq());
                            foreach (Token token in expected)
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq());
                            foreach (Token token in expected)
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset(), docsAndPositionsAndOffsets.StartOffset());
                                Assert.AreEqual(token.EndOffset(), docsAndPositionsAndOffsets.EndOffset());
                // TODO: test advance:
Example #54
 public override DocsEnum Docs(Bits bits, DocsEnum reuse, int flags)
     return(Tenum.Docs(bits, reuse, flags));
        /// <summary>
        /// checks advancing docs
        /// </summary>
        public virtual void AssertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs)
            if (leftDocs == null)
            int docid = -1;
            int averageGap = MAXDOC / (1 + docFreq);
            int skipInterval = 16;

            while (true)
                if (Random().NextBoolean())
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc());
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip));

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                // we don't assert freqs, they are allowed to be different
Example #56
        public virtual void TestFixedPostings()
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    string fieldName = fieldsEnum.Current;
                    Terms terms2 = reader.GetTerms(fieldName);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                        BytesRef term = termsEnum.Next();
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());

                    for (int i = 0; i < NUM_TERMS; i++)
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);

Example #57
 /// <summary>
 /// checks docs + freqs, sequentially
 /// </summary>
 public void AssertDocsEnumEquals(string info, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs)
     if (leftDocs == null)
     Assert.AreEqual(-1, leftDocs.DocID(), info);
     Assert.AreEqual(-1, rightDocs.DocID(), info);
     int docid;
     while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
         Assert.AreEqual(docid, rightDocs.NextDoc(), info);
         if (hasFreqs)
             Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info);
     Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc(), info);
        protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
            Assert.AreEqual(1, terms.DocCount);
            int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count;

            Assert.AreEqual(termCount, terms.Size());
            Assert.AreEqual(termCount, terms.SumDocFreq);
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions());
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets());
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads());
            HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();

            foreach (string term in tk.Freqs.Keys)
                uniqueTerms.Add(new BytesRef(term));
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparator);
            TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
                BytesRef nextTerm = termsEnum.Next();
                Assert.AreEqual(sortedTerms[i], nextTerm);
                Assert.AreEqual(sortedTerms[i], termsEnum.Term());
                Assert.AreEqual(1, termsEnum.DocFreq());

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());

                docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID());
                Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq());
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions() || terms.HasOffsets())
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq();
                    Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq);
                    if (docsAndPositionsEnum != null)
                        for (int k = 0; k < freq; ++k)
                            int         position = docsAndPositionsEnum.NextPosition();
                            ISet <int?> indexes;
                            if (terms.HasPositions())
                                indexes = tk.PositionToTerms[position];
                                indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()];
                            if (terms.HasPositions())
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position)
                                        foundPosition = true;
                            if (terms.HasOffsets())
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset())
                                        foundOffset = true;
                            if (terms.HasPayloads())
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload))
                                        foundPayload = true;
                        catch (Exception e)
                            // ok
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            for (int i = 0; i < 5; ++i)
                if (Random().NextBoolean())
                    Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes)));
 private int CountDocs(DocsEnum docs)
     int count = 0;
     while ((docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
     return count;
 protected virtual void CollectDocs(FixedBitSet bitSet)
     //WARN: keep this specialization in sync
     Debug.Assert(termsEnum != null);
     docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
     int docid;
     while ((docid = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)