Terms.GetEnumerator, Lucene.Net.Index C# (CSharp)代码示例

示例#1

0

显示文件

        public virtual void TestDocsEnum()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).GetTerms(testFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(testTerms.Length, vector.Count);
                TermsEnum termsEnum = vector.GetEnumerator();
                DocsEnum  docsEnum  = null;
                for (int i = 0; i < testTerms.Length; i++)
                {
                    Assert.IsTrue(termsEnum.MoveNext());
                    BytesRef text = termsEnum.Term;
                    string   term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(testTerms[i], term);

                    docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);
                    int doc = docsEnum.DocID;
                    Assert.AreEqual(-1, doc);
                    Assert.IsTrue(docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }
                Assert.IsFalse(termsEnum.MoveNext());
            }
            reader.Dispose();
        }

示例#2

0

显示文件

        /// <summary>
        /// Returns <see cref="DocsAndPositionsEnum"/> for the specified
        /// term. This will return <c>null</c> if the
        /// field or term does not exist or positions weren't indexed. </summary>
        /// <seealso cref="TermsEnum.DocsAndPositions(IBits, DocsAndPositionsEnum)"/>
        public DocsAndPositionsEnum GetTermPositionsEnum(Term term) // LUCENENET specific: Renamed from TermPositionsEnum()
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(term.Field != null);
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(term.Bytes != null);
            }
            Fields fields = Fields;

            if (fields != null)
            {
                Terms terms = fields.GetTerms(term.Field);
                if (terms != null)
                {
                    TermsEnum termsEnum = terms.GetEnumerator();
                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        return(termsEnum.DocsAndPositions(LiveDocs, null));
                    }
                }
            }
            return(null);
        }

示例#3

0

显示文件

        public virtual void TestTerms()
        {
            Fields fields = MultiFields.GetFields(reader);

            foreach (string field in fields)
            {
                Terms terms = fields.GetTerms(field);
                Assert.IsNotNull(terms);
                TermsEnum termsEnum = terms.GetEnumerator();
                while (termsEnum.MoveNext())
                {
                    BytesRef term = termsEnum.Term;
                    Assert.IsTrue(term != null);
                    string fieldValue = (string)DocHelper.NameValues[field];
                    Assert.IsTrue(fieldValue.IndexOf(term.Utf8ToString(), StringComparison.Ordinal) != -1);
                }
            }

            DocsEnum termDocs = TestUtil.Docs(Random, reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            termDocs = TestUtil.Docs(Random, reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"));

            // NOTE: prior rev of this test was failing to first
            // call next here:
            Assert.IsTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.IsTrue(positions.DocID == 0);
            Assert.IsTrue(positions.NextPosition() >= 0);
        }

示例#4

0

显示文件

        public virtual void TestZeroTerms()
        {
            var d = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, d);
            Document          doc = new Document();

            doc.Add(NewTextField("field", "one two three", Field.Store.NO));
            doc = new Document();
            doc.Add(NewTextField("field2", "one two three", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.DeleteDocuments(new Term("field", "one"));
            w.ForceMerge(1);
            IndexReader r = w.GetReader();

            w.Dispose();
            Assert.AreEqual(1, r.NumDocs);
            Assert.AreEqual(1, r.MaxDoc);
            Terms terms = MultiFields.GetTerms(r, "field");

            if (terms != null)
            {
                Assert.IsFalse(terms.GetEnumerator().MoveNext());
            }
            r.Dispose();
            d.Dispose();
        }

示例#5

0

显示文件

 /// <summary>
 /// Returns a <see cref="TermsEnum"/> that implements <see cref="TermsEnum.Ord"/>.  If the
 /// provided <paramref name="reader"/> supports <see cref="TermsEnum.Ord"/>, we just return its
 /// <see cref="TermsEnum"/>; if it does not, we build a "private" terms
 /// index internally (WARNING: consumes RAM) and use that
 /// index to implement <see cref="TermsEnum.Ord"/>.  This also enables <see cref="TermsEnum.Ord"/> on top
 /// of a composite reader.  The returned <see cref="TermsEnum"/> is
 /// unpositioned.  This returns <c>null</c> if there are no terms.
 ///
 /// <para/><b>NOTE</b>: you must pass the same reader that was
 /// used when creating this class
 /// </summary>
 public virtual TermsEnum GetOrdTermsEnum(AtomicReader reader)
 {
     if (m_indexedTermsArray == null)
     {
         //System.out.println("GET normal enum");
         Fields fields = reader.Fields;
         if (fields == null)
         {
             return(null);
         }
         Terms terms = fields.GetTerms(m_field);
         if (terms == null)
         {
             return(null);
         }
         else
         {
             return(terms.GetEnumerator());
         }
     }
     else
     {
         //System.out.println("GET wrapped enum ordBase=" + ordBase);
         return(new OrdWrappedTermsEnum(this, reader));
     }
 }

示例#6

0

显示文件

        /// <summary>
        /// Creates a <see cref="TermContext"/> from a top-level <see cref="IndexReaderContext"/> and the
        /// given <see cref="Term"/>. this method will lookup the given term in all context's leaf readers
        /// and register each of the readers containing the term in the returned <see cref="TermContext"/>
        /// using the leaf reader's ordinal.
        /// <para/>
        /// Note: the given context must be a top-level context.
        /// </summary>
        public static TermContext Build(IndexReaderContext context, Term term)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(context != null && context.IsTopLevel);
            }
            string      field = term.Field;
            BytesRef    bytes = term.Bytes;
            TermContext perReaderTermState = new TermContext(context);

            //if (DEBUG) System.out.println("prts.build term=" + term);
            foreach (AtomicReaderContext ctx in context.Leaves)
            {
                //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
                Fields fields = ctx.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.GetTerms(field);
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.GetEnumerator();
                        if (termsEnum.SeekExact(bytes))
                        {
                            TermState termState = termsEnum.GetTermState();
                            //if (DEBUG) System.out.println("    found");
                            perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                        }
                    }
                }
            }
            return(perReaderTermState);
        }

示例#7

0

显示文件

        private void VerifyCount(IndexReader ir)
        {
            Fields fields = MultiFields.GetFields(ir);

            if (fields == null)
            {
                return;
            }
            foreach (string field in fields)
            {
                Terms terms = fields.GetTerms(field);
                if (terms == null)
                {
                    continue;
                }
                int         docCount = terms.DocCount;
                FixedBitSet visited  = new FixedBitSet(ir.MaxDoc);
                TermsEnum   te       = terms.GetEnumerator();
                while (te.MoveNext())
                {
                    DocsEnum de = TestUtil.Docs(Random, te, null, null, DocsFlags.NONE);
                    while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        visited.Set(de.DocID);
                    }
                }
                Assert.AreEqual(visited.Cardinality, docCount);
            }
        }

示例#8

0

显示文件

        /// <summary>
        /// Returns the number of documents containing the <paramref name="term"/>.
        /// This method returns 0 if the term or
        /// field does not exist. This method does not take into
        /// account deleted documents that have not yet been merged
        /// away.
        /// </summary>
        public override sealed long TotalTermFreq(Term term)
        {
            Fields fields = Fields;

            if (fields is null)
            {
                return(0);
            }
            Terms terms = fields.GetTerms(term.Field);

            if (terms is null)
            {
                return(0);
            }
            TermsEnum termsEnum = terms.GetEnumerator();

            if (termsEnum.SeekExact(term.Bytes))
            {
                return(termsEnum.TotalTermFreq);
            }
            else
            {
                return(0);
            }
        }

示例#9

0

显示文件

        private void AssertSumDocFreq(IndexReader ir)
        {
            // compute sumDocFreq across all fields
            Fields fields = MultiFields.GetFields(ir);

            foreach (string f in fields)
            {
                Terms terms      = fields.GetTerms(f);
                long  sumDocFreq = terms.SumDocFreq;
                if (sumDocFreq == -1)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("skipping field: " + f + ", codec does not support sumDocFreq");
                    }
                    continue;
                }

                long      computedSumDocFreq = 0;
                TermsEnum termsEnum          = terms.GetEnumerator();
                while (termsEnum.MoveNext())
                {
                    computedSumDocFreq += termsEnum.DocFreq;
                }
                Assert.AreEqual(computedSumDocFreq, sumDocFreq);
            }
        }

示例#10

0

显示文件

        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document  doc        = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            Field field2      = new Field("field", "", customType);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.SetTokenStream(ts);
            doc.Add(field2);
            Field field3 = new Field("field", "", customType);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field3.SetTokenStream(ts);
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(0, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetEnumerator();

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(3, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }

示例#11

0

显示文件

        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer     = new RandomIndexWriter(Random, dir, iwc);
            Document          doc        = new Document();
            FieldType         customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(1, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetEnumerator();

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }

示例#12

0

显示文件

        public virtual int[] ToDocsArray(Term term, IBits bits, IndexReader reader)
        {
            Fields    fields     = MultiFields.GetFields(reader);
            Terms     cterms     = fields.GetTerms(term.Field);
            TermsEnum ctermsEnum = cterms.GetEnumerator();

            if (ctermsEnum.SeekExact(new BytesRef(term.Text)))
            {
                DocsEnum docsEnum = TestUtil.Docs(Random, ctermsEnum, bits, null, DocsFlags.NONE);
                return(ToArray(docsEnum));
            }
            return(null);
        }

示例#13

0

显示文件

        public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, IBits liveDocs)
        {
            Terms terms = reader.GetTerms(fieldName);

            if (terms != null)
            {
                TermsEnum te = terms.GetEnumerator();
                if (te.SeekExact(bytes))
                {
                    return(te.DocsAndPositions(liveDocs, null));
                }
            }
            return(null);
        }

示例#14

0

显示文件

        private void CheckTerms(Terms terms, IBits liveDocs, params string[] termsList)
        {
            Assert.IsNotNull(terms);
            TermsEnum te = terms.GetEnumerator();

            foreach (string t in termsList)
            {
                Assert.IsTrue(te.MoveNext());
                BytesRef b = te.Term;
                Assert.AreEqual(t, b.Utf8ToString());
                DocsEnum td = TestUtil.Docs(Random, te, liveDocs, null, DocsFlags.NONE);
                Assert.IsTrue(td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(0, td.DocID);
                Assert.AreEqual(td.NextDoc(), DocIdSetIterator.NO_MORE_DOCS);
            }
            Assert.IsFalse(te.MoveNext());
        }

示例#15

0

显示文件

        public virtual void TestOffsetReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));
            Terms             vector = reader.Get(0).GetTerms(testFields[0]);

            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.GetEnumerator();

            Assert.IsNotNull(termsEnum);
            Assert.AreEqual(testTerms.Length, vector.Count);
            DocsAndPositionsEnum dpEnum = null;

            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                Assert.AreEqual(testTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }
            reader.Dispose();
        }

示例#16

0

显示文件

文件： MultiFields.cs 项目： ywscr/lucenenet

        /// <summary>
        /// Returns <see cref="DocsAndPositionsEnum"/> for the specified
        /// field &amp; term, with control over whether offsets and payloads are
        /// required.  Some codecs may be able to optimize
        /// their implementation when offsets and/or payloads are not
        /// required. This will return <c>null</c> if the field or term does not
        /// exist or positions were not indexed. See
        /// <see cref="TermsEnum.DocsAndPositions(IBits, DocsAndPositionsEnum, DocsAndPositionsFlags)"/>.
        /// </summary>
        public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsAndPositionsFlags flags)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(field != null);
                Debugging.Assert(term != null);
            }
            Terms terms = GetTerms(r, field);

            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetEnumerator();
                if (termsEnum.SeekExact(term))
                {
                    return(termsEnum.DocsAndPositions(liveDocs, null, flags));
                }
            }
            return(null);
        }

示例#17

0

显示文件

        public virtual void TestTermVectors()
        {
            Terms result = reader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(result);
            Assert.AreEqual(3, result.Count);
            TermsEnum termsEnum = result.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                string term = termsEnum.Term.Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq;
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1);
                Assert.IsTrue(freq > 0);
            }

            Fields results = reader.GetTermVectors(0);

            Assert.IsTrue(results != null);
            Assert.AreEqual(3, results.Count, "We do not have 3 term freq vectors");
        }

示例#18

0

显示文件

        public virtual void TestReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));

            for (int j = 0; j < 5; j++)
            {
                Terms vector = reader.Get(j).GetTerms(testFields[0]);
                Assert.IsNotNull(vector);
                Assert.AreEqual(testTerms.Length, vector.Count);
                TermsEnum termsEnum = vector.GetEnumerator();
                for (int i = 0; i < testTerms.Length; i++)
                {
                    Assert.IsTrue(termsEnum.MoveNext());
                    BytesRef text = termsEnum.Term;
                    string   term = text.Utf8ToString();
                    //System.out.println("Term: " + term);
                    Assert.AreEqual(testTerms[i], term);
                }
                Assert.IsFalse(termsEnum.MoveNext());
            }
            reader.Dispose();
        }

示例#19

0

显示文件

        private void PrintSegment(StreamWriter @out, SegmentCommitInfo si)
        {
            SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random));

            for (int i = 0; i < reader.NumDocs; i++)
            {
                @out.WriteLine(reader.Document(i));
            }

            Fields fields = reader.Fields;

            foreach (string field in fields)
            {
                Terms terms = fields.GetTerms(field);
                Assert.IsNotNull(terms);
                TermsEnum tis = terms.GetEnumerator();
                while (tis.MoveNext())
                {
                    @out.Write("  term=" + field + ":" + tis.Term);
                    @out.WriteLine("    DF=" + tis.DocFreq);

                    DocsAndPositionsEnum positions = tis.DocsAndPositions(reader.LiveDocs, null);

                    while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        @out.Write(" doc=" + positions.DocID);
                        @out.Write(" TF=" + positions.Freq);
                        @out.Write(" pos=");
                        @out.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq; j++)
                        {
                            @out.Write("," + positions.NextPosition());
                        }
                        @out.WriteLine("");
                    }
                }
            }
            reader.Dispose();
        }

示例#20

0

显示文件

 public override TermsEnum GetEnumerator() => m_input.GetEnumerator();

示例#21

0

显示文件

        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }

            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
#pragma warning disable 612, 618
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
#pragma warning restore 612, 618
            {
                Assert.AreEqual(numTerms - 1, terms.Count);
            }
            TermsEnum termsEnum = terms.GetEnumerator();
            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.DocFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }

示例#22

0

显示文件

        public virtual void Test()
        {
            IList <string> postingsList   = new JCG.List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }
            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random.NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random.Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random.Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }

示例#23

0

显示文件

        public static void VerifyEquals(Fields d1, Fields d2)
        {
            if (d1 == null)
            {
                Assert.IsTrue(d2 == null || d2.Count == 0);
                return;
            }
            Assert.IsTrue(d2 != null);

            IEnumerator <string> fieldsEnum2 = d2.GetEnumerator();

            foreach (string field1 in d1)
            {
                fieldsEnum2.MoveNext();
                string field2 = fieldsEnum2.Current;
                Assert.AreEqual(field1, field2);

                Terms terms1 = d1.GetTerms(field1);
                Assert.IsNotNull(terms1);
                TermsEnum termsEnum1 = terms1.GetEnumerator();

                Terms terms2 = d2.GetTerms(field2);
                Assert.IsNotNull(terms2);
                TermsEnum termsEnum2 = terms2.GetEnumerator();

                DocsAndPositionsEnum dpEnum1 = null;
                DocsAndPositionsEnum dpEnum2 = null;
                DocsEnum             dEnum1  = null;
                DocsEnum             dEnum2  = null;

                BytesRef term1;
                while (termsEnum1.MoveNext())
                {
                    term1 = termsEnum1.Term;
                    termsEnum2.MoveNext();
                    BytesRef term2 = termsEnum2.Term;
                    Assert.AreEqual(term1, term2);
                    Assert.AreEqual(termsEnum1.TotalTermFreq, termsEnum2.TotalTermFreq);

                    dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
                    dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
                    if (dpEnum1 != null)
                    {
                        Assert.IsNotNull(dpEnum2);
                        int docID1 = dpEnum1.NextDoc();
                        dpEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dpEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);

                        int freq1 = dpEnum1.Freq;
                        int freq2 = dpEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        IOffsetAttribute offsetAtt1 = dpEnum1.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes.GetAttribute <IOffsetAttribute>() : null;
                        IOffsetAttribute offsetAtt2 = dpEnum2.Attributes.HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes.GetAttribute <IOffsetAttribute>() : null;

                        if (offsetAtt1 != null)
                        {
                            Assert.IsNotNull(offsetAtt2);
                        }
                        else
                        {
                            Assert.IsNull(offsetAtt2);
                        }

                        for (int posUpto = 0; posUpto < freq1; posUpto++)
                        {
                            int pos1 = dpEnum1.NextPosition();
                            int pos2 = dpEnum2.NextPosition();
                            Assert.AreEqual(pos1, pos2);
                            if (offsetAtt1 != null)
                            {
                                Assert.AreEqual(offsetAtt1.StartOffset, offsetAtt2.StartOffset);
                                Assert.AreEqual(offsetAtt1.EndOffset, offsetAtt2.EndOffset);
                            }
                        }
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
                    }
                    else
                    {
                        dEnum1 = TestUtil.Docs(Random, termsEnum1, null, dEnum1, DocsFlags.FREQS);
                        dEnum2 = TestUtil.Docs(Random, termsEnum2, null, dEnum2, DocsFlags.FREQS);
                        Assert.IsNotNull(dEnum1);
                        Assert.IsNotNull(dEnum2);
                        int docID1 = dEnum1.NextDoc();
                        dEnum2.NextDoc();
                        // docIDs are not supposed to be equal
                        //int docID2 = dEnum2.NextDoc();
                        //Assert.AreEqual(docID1, docID2);
                        Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
                        int freq1 = dEnum1.Freq;
                        int freq2 = dEnum2.Freq;
                        Assert.AreEqual(freq1, freq2);
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
                        Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());
                    }
                }

                Assert.IsFalse(termsEnum2.MoveNext());
            }
            Assert.IsFalse(fieldsEnum2.MoveNext());
        }

示例#24

0

显示文件

        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (Verbose)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(false, "r1.NumDocs={0} vs r2.NumDocs={1}", r1.NumDocs, r2.NumDocs);
                }
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetEnumerator();

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.MoveNext())
                {
                    docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetEnumerator();

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (termsEnum.MoveNext())
            {
                BytesRef term = termsEnum.Term;
                //System.out.println("TEST: match id term=" + term);

                termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception /*t*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetEnumerator();
                            while (termsEnum3.MoveNext())
                            {
                                Console.WriteLine("      " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetEnumerator();
                            while (termsEnum3.MoveNext())
                            {
                                Console.WriteLine("      " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetEnumerator();
                    }
                    if (!termsEnum1.MoveNext())
                    {
                        term1 = null;
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }
                    term1 = termsEnum1.Term;

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetEnumerator();
                    }
                    if (!termsEnum2.MoveNext())
                    {
                        term2 = null;
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }
                    term2 = termsEnum2.Term;

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }

示例#25

0

显示文件

        public virtual void TestDoubleOffsetCounting()
        {
            Directory   dir        = NewDirectory();
            IndexWriter w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc        = new Document();
            FieldType   customType = new FieldType(StringField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets   = true;
            Field f = NewField("field", "abcd", customType);

            doc.Add(f);
            doc.Add(f);
            Field f2 = NewField("field", "", customType);

            doc.Add(f2);
            doc.Add(f);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r      = DirectoryReader.Open(dir);
            Terms       vector = r.GetTermVectors(0).GetTerms("field");

            Assert.IsNotNull(vector);
            TermsEnum termsEnum = vector.GetEnumerator();

            Assert.IsTrue(termsEnum.MoveNext());
            Assert.AreEqual("", termsEnum.Term.Utf8ToString());

            // Token "" occurred once
            Assert.AreEqual(1, termsEnum.TotalTermFreq);

            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset);
            Assert.AreEqual(8, dpEnum.EndOffset);
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            // Token "abcd" occurred three times
            Assert.IsTrue(termsEnum.MoveNext());
            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Term);
            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.AreEqual(3, termsEnum.TotalTermFreq);

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset);
            Assert.AreEqual(4, dpEnum.EndOffset);

            dpEnum.NextPosition();
            Assert.AreEqual(4, dpEnum.StartOffset);
            Assert.AreEqual(8, dpEnum.EndOffset);

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset);
            Assert.AreEqual(12, dpEnum.EndOffset);

            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            Assert.IsFalse(termsEnum.MoveNext());
            r.Dispose();
            dir.Dispose();
        }

示例#26

0

显示文件

        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.term;
                    int  limit = update.docIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.CompareToOrdinal(term.Field) < 0;
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetEnumerator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.field, update.type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.field, update.type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.value);
                        }
                    }
                }
            }
        }

示例#27

0

显示文件

        public virtual void TestPositionReader()
        {
            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random));
            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
            Terms vector = reader.Get(0).GetTerms(testFields[0]);

            Assert.IsNotNull(vector);
            Assert.AreEqual(testTerms.Length, vector.Count);
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;

            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                Assert.IsNotNull(dpEnum);
                int doc = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                doc    = dpEnum.DocID;
                Assert.AreEqual(-1, doc);
                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                Assert.IsNotNull(dpEnum);
                Assert.AreEqual(dpEnum.Freq, positions[i].Length);
                for (int j = 0; j < positions[i].Length; j++)
                {
                    Assert.AreEqual(positions[i][j], dpEnum.NextPosition());
                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
                    Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
            }

            Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset

            Assert.IsNotNull(freqVector);
            Assert.AreEqual(testTerms.Length, freqVector.Count);
            termsEnum = freqVector.GetEnumerator();
            Assert.IsNotNull(termsEnum);
            for (int i = 0; i < testTerms.Length; i++)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                BytesRef text = termsEnum.Term;
                string   term = text.Utf8ToString();
                //System.out.println("Term: " + term);
                Assert.AreEqual(testTerms[i], term);
                Assert.IsNotNull(termsEnum.Docs(null, null));
                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
            }
            reader.Dispose();
        }

示例#28

0

显示文件

        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
            {
                throw IllegalStateException.Create("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = Environment.TickCount;

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    }
                    catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                {
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                {
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        m_termInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = val.TripleShift(8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = val.TripleShift(8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (!te.MoveNext())
                {
                    break;
                }
            }

            m_numTermsInField = termNum;

            long midPoint = Environment.TickCount;

            if (m_termInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
            }
            else
            {
                this.m_index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = val.TripleShift(8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw IllegalStateException.Create("Too many values for UnInvertedField faceting on field " + m_field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    //
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);
            }

            long endTime = Environment.TickCount;

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
        }

示例#29

0

显示文件

文件： TestIndexableField.cs 项目： zalintyre/lucenenet

        public virtual void TestArbitraryFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int NUM_DOCS = AtLeast(27);

            if (Verbose)
            {
                Console.WriteLine("TEST: " + NUM_DOCS + " docs");
            }
            int[] fieldsPerDoc = new int[NUM_DOCS];
            int   baseCount    = 0;

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int fieldCount = TestUtil.NextInt32(Random, 1, 17);
                fieldsPerDoc[docCount] = fieldCount - 1;

                int finalDocCount = docCount;
                if (Verbose)
                {
                    Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount);
                }

                int finalBaseCount = baseCount;
                baseCount += fieldCount - 1;

                w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount));
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           counter = 0;

            for (int id = 0; id < NUM_DOCS; id++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
                }
                TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1);
                Assert.AreEqual(1, hits.TotalHits);
                int      docID      = hits.ScoreDocs[0].Doc;
                Document doc        = s.Doc(docID);
                int      endCounter = counter + fieldsPerDoc[id];
                while (counter < endCounter)
                {
                    string name    = "f" + counter;
                    int    fieldID = counter % 10;

                    bool stored  = (counter & 1) == 0 || fieldID == 3;
                    bool binary  = fieldID == 3;
                    bool indexed = fieldID != 3;

                    string stringValue;
                    if (fieldID != 3 && fieldID != 9)
                    {
                        stringValue = "text " + counter;
                    }
                    else
                    {
                        stringValue = null;
                    }

                    // stored:
                    if (stored)
                    {
                        IIndexableField f = doc.GetField(name);
                        Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                        if (binary)
                        {
                            Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter);
                            BytesRef b = f.GetBinaryValue();
                            Assert.IsNotNull(b);
                            Assert.AreEqual(10, b.Length);
                            for (int idx = 0; idx < 10; idx++)
                            {
                                Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]);
                            }
                        }
                        else
                        {
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(stringValue != null);
                            }
                            Assert.AreEqual(stringValue, f.GetStringValue());
                        }
                    }

                    if (indexed)
                    {
                        bool tv = counter % 2 == 1 && fieldID != 9;
                        if (tv)
                        {
                            Terms tfv = r.GetTermVectors(docID).GetTerms(name);
                            Assert.IsNotNull(tfv);
                            TermsEnum termsEnum = tfv.GetEnumerator();
                            Assert.IsTrue(termsEnum.MoveNext());
                            Assert.AreEqual(new BytesRef("" + counter), termsEnum.Term);
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(1, dpEnum.NextPosition());

                            Assert.IsTrue(termsEnum.MoveNext());
                            Assert.AreEqual(new BytesRef("text"), termsEnum.Term);
                            Assert.AreEqual(1, termsEnum.TotalTermFreq);
                            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                            Assert.AreEqual(1, dpEnum.Freq);
                            Assert.AreEqual(0, dpEnum.NextPosition());

                            Assert.IsFalse(termsEnum.MoveNext());

                            // TODO: offsets
                        }
                        else
                        {
                            Fields vectors = r.GetTermVectors(docID);
                            Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null);
                        }

                        BooleanQuery bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST);
                        TopDocs hits2 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits2.TotalHits);
                        Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc);

                        bq = new BooleanQuery();
                        bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST);
                        bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST);
                        TopDocs hits3 = s.Search(bq, 1);
                        Assert.AreEqual(1, hits3.TotalHits);
                        Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc);
                    }

                    counter++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }

示例#30

0

显示文件

文件： TestDocTermOrds.cs 项目： ywscr/lucenenet

        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (Verbose)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetEnumerator();
                int       ord   = 0;
                while (allTE.MoveNext())
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetEnumerator();
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (!te.MoveNext())
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (Verbose)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }

C# (CSharp) Lucene.Net.Index Terms.GetEnumerator示例