예제 #1
0
 internal SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd)
                 : base(counts, total - counts[0], counts[0], endFacetOrd + 1)
 {
     this.tenum = tenum;
     this.mergePos = startFacetOrd == -1 ? 1 : startFacetOrd + 1;
     if (mergePos < maxTermPos)
     {
         Debug.Assert(tenum != null);
         tenum.SeekExact(startFacetOrd == -1 ? 0 : startFacetOrd);
         mergeTerm = tenum.Term();
     }
 }
예제 #2
0
        public virtual void TestRandom()
        {
            // token -> docID -> tokens
            IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >();

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(20);
            //final int numDocs = AtLeast(5);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: randomize what IndexOptions we use; also test
            // changing this up in one IW buffered segment...:
            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = new Document();
                doc.Add(new Int32Field("id", docCount, Field.Store.NO));
                IList <Token> tokens    = new List <Token>();
                int           numTokens = AtLeast(100);
                //final int numTokens = AtLeast(20);
                int pos    = -1;
                int offset = 0;
                //System.out.println("doc id=" + docCount);
                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
                {
                    string text;
                    if (Random.NextBoolean())
                    {
                        text = "a";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "b";
                    }
                    else if (Random.NextBoolean())
                    {
                        text = "c";
                    }
                    else
                    {
                        text = "d";
                    }

                    int posIncr = Random.NextBoolean() ? 1 : Random.Next(5);
                    if (tokenCount == 0 && posIncr == 0)
                    {
                        posIncr = 1;
                    }
                    int offIncr     = Random.NextBoolean() ? 0 : Random.Next(5);
                    int tokenOffset = Random.Next(5);

                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
                    if (!actualTokens.TryGetValue(text, out IDictionary <int?, IList <Token> > postingsByDoc))
                    {
                        actualTokens[text] = postingsByDoc = new Dictionary <int?, IList <Token> >();
                    }
                    if (!postingsByDoc.TryGetValue(docCount, out IList <Token> postings))
                    {
                        postingsByDoc[docCount] = postings = new List <Token>();
                    }
                    postings.Add(token);
                    tokens.Add(token);
                    pos += posIncr;
                    // stuff abs position into type:
                    token.Type = "" + pos;
                    offset    += offIncr + tokenOffset;
                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
                }
                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
                w.AddDocument(doc);
            }
            DirectoryReader r = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "a", "b", "c", "d" };
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                // TODO: improve this
                AtomicReader sub = (AtomicReader)ctx.Reader;
                //System.out.println("\nsub=" + sub);
                TermsEnum            termsEnum                  = sub.Fields.GetTerms("content").GetEnumerator();
                DocsEnum             docs                       = null;
                DocsAndPositionsEnum docsAndPositions           = null;
                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
                FieldCache.Int32s    docIDToID                  = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
                foreach (string term in terms)
                {
                    //System.out.println("  term=" + term);
                    if (termsEnum.SeekExact(new BytesRef(term)))
                    {
                        docs = termsEnum.Docs(null, docs);
                        Assert.IsNotNull(docs);
                        int doc;
                        //System.out.println("    doc/freq");
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docs.Freq);
                        }

                        // explicitly exclude offsets here
                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS);
                        Assert.IsNotNull(docsAndPositions);
                        //System.out.println("    doc/freq/pos");
                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositions.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
                            }
                        }

                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
                        Assert.IsNotNull(docsAndPositionsAndOffsets);
                        //System.out.println("    doc/freq/pos/offs");
                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            IList <Token> expected = actualTokens[term][docIDToID.Get(doc)];
                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
                            Assert.IsNotNull(expected);
                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
                            foreach (Token token in expected)
                            {
                                int pos = Convert.ToInt32(token.Type);
                                //System.out.println("        pos=" + pos);
                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
                                Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
                                Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
                            }
                        }
                    }
                }
                // TODO: test advance:
            }
            r.Dispose();
            dir.Dispose();
        }
예제 #3
0
        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (Verbose)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetEnumerator();
                int       ord   = 0;
                while (allTE.MoveNext())
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetEnumerator();
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (!te.MoveNext())
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (Verbose)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }
예제 #4
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
예제 #5
0
 /// <summary>
 /// Returns the term (<see cref="BytesRef"/>) corresponding to
 /// the provided ordinal.
 /// </summary>
 public virtual BytesRef LookupTerm(TermsEnum termsEnum, int ord)
 {
     termsEnum.SeekExact(ord);
     return(termsEnum.Term);
 }
        private void AssertEquals(long numOrds, TermsEnum expected, TermsEnum actual)
        {
            BytesRef @ref;

            // sequential next() through all terms
            while ((@ref = expected.Next()) != null)
            {
                Assert.AreEqual(@ref, actual.Next());
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }
            Assert.IsNull(actual.Next());

            // sequential seekExact(ord) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                actual.SeekExact(i);
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // sequential seekExact(BytesRef) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                Assert.IsTrue(actual.SeekExact(expected.Term()));
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // sequential seekCeil(BytesRef) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                Assert.AreEqual(SeekStatus.FOUND, actual.SeekCeil(expected.Term()));
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekExact(ord)
            for (long i = 0; i < numOrds; i++)
            {
                long randomOrd = TestUtil.NextLong(Random(), 0, numOrds - 1);
                expected.SeekExact(randomOrd);
                actual.SeekExact(randomOrd);
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekExact(BytesRef)
            for (long i = 0; i < numOrds; i++)
            {
                long randomOrd = TestUtil.NextLong(Random(), 0, numOrds - 1);
                expected.SeekExact(randomOrd);
                actual.SeekExact(expected.Term());
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekCeil(BytesRef)
            for (long i = 0; i < numOrds; i++)
            {
                BytesRef target = new BytesRef(TestUtil.RandomUnicodeString(Random()));
                SeekStatus expectedStatus = expected.SeekCeil(target);
                Assert.AreEqual(expectedStatus, actual.SeekCeil(target));
                if (expectedStatus != SeekStatus.END)
                {
                    Assert.AreEqual(expected.Ord(), actual.Ord());
                    Assert.AreEqual(expected.Term(), actual.Term());
                }
            }
        }
예제 #7
0
 // sugar
 private bool SeekExact(TermsEnum te, string term)
 {
     return te.SeekExact(new BytesRef(term));
 }
예제 #8
0
 public override void SeekExact(long ord)
 {
     m_input.SeekExact(ord);
 }
예제 #9
0
 public override void SeekExact(long ord)
 {
     @in.SeekExact(ord);
 }
예제 #10
0
 // sugar
 private bool SeekExact(TermsEnum te, string term)
 {
     return(te.SeekExact(new BytesRef(term)));
 }
예제 #11
0
        private void TestRandomSeeks(IndexReader r, params string[] validTermStrings)
        {
            BytesRef[] validTerms = new BytesRef[validTermStrings.Length];
            for (int termIDX = 0; termIDX < validTermStrings.Length; termIDX++)
            {
                validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]);
            }
            Array.Sort(validTerms);
            if (Verbose)
            {
                Console.WriteLine("TEST: " + validTerms.Length + " terms:");
                foreach (BytesRef t in validTerms)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " " + t);
                }
            }
            TermsEnum te = MultiFields.GetTerms(r, FIELD).GetEnumerator();

            int END_LOC = -validTerms.Length - 1;

            IList <TermAndState> termStates = new List <TermAndState>();

            for (int iter = 0; iter < 100 * RandomMultiplier; iter++)
            {
                BytesRef  t;
                int       loc;
                TermState termState;
                if (Random.Next(6) == 4)
                {
                    // pick term that doens't exist:
                    t         = GetNonExistTerm(validTerms);
                    termState = null;
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: invalid term=" + t.Utf8ToString());
                    }
                    loc = Array.BinarySearch(validTerms, t);
                }
                else if (termStates.Count != 0 && Random.Next(4) == 1)
                {
                    TermAndState ts = termStates[Random.Next(termStates.Count)];
                    t   = ts.Term;
                    loc = Array.BinarySearch(validTerms, t);
                    Assert.IsTrue(loc >= 0);
                    termState = ts.State;
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: valid termState term=" + t.Utf8ToString());
                    }
                }
                else
                {
                    // pick valid term
                    loc       = Random.Next(validTerms.Length);
                    t         = BytesRef.DeepCopyOf(validTerms[loc]);
                    termState = null;
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: valid term=" + t.Utf8ToString());
                    }
                }

                // seekCeil or seekExact:
                bool doSeekExact = Random.NextBoolean();
                if (termState != null)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("  seekExact termState");
                    }
                    te.SeekExact(t, termState);
                }
                else if (doSeekExact)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("  seekExact");
                    }
                    Assert.AreEqual(loc >= 0, te.SeekExact(t));
                }
                else
                {
                    if (Verbose)
                    {
                        Console.WriteLine("  seekCeil");
                    }

                    TermsEnum.SeekStatus result = te.SeekCeil(t);
                    if (Verbose)
                    {
                        Console.WriteLine("  got " + result);
                    }

                    if (loc >= 0)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, result);
                    }
                    else if (loc == END_LOC)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.END, result);
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(loc >= -validTerms.Length);
                        }
                        Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, result);
                    }
                }

                if (loc >= 0)
                {
                    Assert.AreEqual(t, te.Term);
                }
                else if (doSeekExact)
                {
                    // TermsEnum is unpositioned if seekExact returns false
                    continue;
                }
                else if (loc == END_LOC)
                {
                    continue;
                }
                else
                {
                    loc = -loc - 1;
                    Assert.AreEqual(validTerms[loc], te.Term);
                }

                // Do a bunch of next's after the seek
                int numNext = Random.Next(validTerms.Length);

                for (int nextCount = 0; nextCount < numNext; nextCount++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: next loc=" + loc + " of " + validTerms.Length);
                    }
                    bool moved = te.MoveNext();
                    //BytesRef t2 = te.Term;
                    loc++;
                    if (loc == validTerms.Length)
                    {
                        //Assert.IsNull(t2); // LUCENENET specific - accessing the Term after MoveNext() returns false results in an assertion failure
                        Assert.IsFalse(moved);
                        break;
                    }
                    else
                    {
                        Assert.AreEqual(validTerms[loc], te.Term);
                        if (Random.Next(40) == 17 && termStates.Count < 100)
                        {
                            termStates.Add(new TermAndState(validTerms[loc], te.GetTermState()));
                        }
                    }
                }
            }
        }
예제 #12
0
        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetEnumerator();

            while (termsEnum.MoveNext())
            {
                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term));
            }
            if (Verbose)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.MoveNext() == false;
                    upto++;
                    if (isEnd)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (Verbose)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (Verbose)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }
예제 #13
0
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.term;
                    int  limit = update.docIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.CompareToOrdinal(term.Field) < 0;
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.field, update.type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.field, update.type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.value);
                        }
                    }
                }
            }
        }
예제 #14
0
        // Delete by Term
        private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            lock (this)
            {
                long   delCount = 0;
                Fields fields   = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return(0);
                }

                TermsEnum termsEnum = null;

                string   currentField = null;
                DocsEnum docs         = null;

                Debug.Assert(CheckDeleteTerm(null));

                bool any = false;

                //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
                foreach (Term term in termsIter)
                {
                    // Since we visit terms sorted, we gain performance
                    // by re-using the same TermsEnum and seeking only
                    // forwards
                    if (!string.Equals(term.Field, currentField, StringComparison.Ordinal))
                    {
                        Debug.Assert(currentField == null || currentField.CompareToOrdinal(term.Field) < 0);
                        currentField = term.Field;
                        Terms terms = fields.GetTerms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    Debug.Assert(CheckDeleteTerm(term));

                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsFlags.NONE);
                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        if (docsEnum != null)
                        {
                            while (true)
                            {
                                int docID = docsEnum.NextDoc();
                                //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
                                if (docID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (!any)
                                {
                                    rld.InitWritableLiveDocs();
                                    any = true;
                                }
                                // NOTE: there is no limit check on the docID
                                // when deleting by Term (unlike by Query)
                                // because on flush we apply all Term deletes to
                                // each segment.  So all Term deleting here is
                                // against prior segments:
                                if (rld.Delete(docID))
                                {
                                    delCount++;
                                }
                            }
                        }
                    }
                }

                return(delCount);
            }
        }
 protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
     TermsEnum termsEnum)
 {
     BytesRef spare = new BytesRef();
     DocsEnum docsEnum = null;
     for (int i = 0; i < outerInstance._terms.Size(); i++)
     {
         if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
         {
             docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
             float score = outerInstance._scores[outerInstance._ords[i]];
             for (int doc = docsEnum.NextDoc();
                 doc != NO_MORE_DOCS;
                 doc = docsEnum.NextDoc())
             {
                 // I prefer this:
                 /*if (scores[doc] < score) {
                   scores[doc] = score;
                   matchingDocs.set(doc);
                 }*/
                 // But this behaves the same as MVInnerScorer and only then the tests will pass:
                 if (!matchingDocs.Get(doc))
                 {
                     scores[doc] = score;
                     matchingDocs.Set(doc);
                 }
             }
         }
     }
 }
예제 #16
0
            public virtual void _run()
            {
                for (int iter = 0; iter < NUM_TEST_ITER; iter++)
                {
                    FieldData field     = Fields[Random().Next(Fields.Length)];
                    TermsEnum termsEnum = TermsDict.Terms(field.FieldInfo.Name).Iterator(null);
                    if (Si.Codec is Lucene3xCodec)
                    {
                        // code below expects unicode sort order
                        continue;
                    }

                    int upto = 0;
                    // Test straight enum of the terms:
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        BytesRef expected = new BytesRef(field.Terms[upto++].Text2);
                        Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term);
                    }
                    Assert.AreEqual(upto, field.Terms.Length);

                    // Test random seek:
                    TermData             term2  = field.Terms[Random().Next(field.Terms.Length)];
                    TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.Text2));
                    Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                    Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq());
                    if (field.OmitTF)
                    {
                        this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false);
                    }
                    else
                    {
                        this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true);
                    }

                    // Test random seek by ord:
                    int idx = Random().Next(field.Terms.Length);
                    term2 = field.Terms[idx];
                    bool success = false;
                    try
                    {
                        termsEnum.SeekExact(idx);
                        success = true;
                    }
                    catch (System.NotSupportedException uoe)
                    {
                        // ok -- skip it
                    }
                    if (success)
                    {
                        Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                        Assert.IsTrue(termsEnum.Term().BytesEquals(new BytesRef(term2.Text2)));
                        Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq());
                        if (field.OmitTF)
                        {
                            this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false);
                        }
                        else
                        {
                            this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true);
                        }
                    }

                    // Test seek to non-existent terms:
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek non-exist terms");
                    }
                    for (int i = 0; i < 100; i++)
                    {
                        string text2 = TestUtil.RandomUnicodeString(Random()) + ".";
                        status = termsEnum.SeekCeil(new BytesRef(text2));
                        Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
                    }

                    // Seek to each term, backwards:
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek terms backwards");
                    }
                    for (int i = field.Terms.Length - 1; i >= 0; i--)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.Terms[i].Text2)), Thread.CurrentThread.Name + ": field=" + field.FieldInfo.Name + " term=" + field.Terms[i].Text2);
                        Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq());
                    }

                    // Seek to each term by ord, backwards
                    for (int i = field.Terms.Length - 1; i >= 0; i--)
                    {
                        try
                        {
                            termsEnum.SeekExact(i);
                            Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq());
                            Assert.IsTrue(termsEnum.Term().BytesEquals(new BytesRef(field.Terms[i].Text2)));
                        }
                        catch (System.NotSupportedException uoe)
                        {
                        }
                    }

                    // Seek to non-existent empty-string term
                    status = termsEnum.SeekCeil(new BytesRef(""));
                    Assert.IsNotNull(status);
                    //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);

                    // Make sure we're now pointing to first term
                    Assert.IsTrue(termsEnum.Term().BytesEquals(new BytesRef(field.Terms[0].Text2)));

                    // Test docs enum
                    termsEnum.SeekCeil(new BytesRef(""));
                    upto = 0;
                    do
                    {
                        term2 = field.Terms[upto];
                        if (Random().Next(3) == 1)
                        {
                            DocsEnum             docs;
                            DocsEnum             docsAndFreqs;
                            DocsAndPositionsEnum postings;
                            if (!field.OmitTF)
                            {
                                postings = termsEnum.DocsAndPositions(null, null);
                                if (postings != null)
                                {
                                    docs = docsAndFreqs = postings;
                                }
                                else
                                {
                                    docs = docsAndFreqs = TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_FREQS);
                                }
                            }
                            else
                            {
                                postings     = null;
                                docsAndFreqs = null;
                                docs         = TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE);
                            }
                            Assert.IsNotNull(docs);
                            int  upto2 = -1;
                            bool ended = false;
                            while (upto2 < term2.Docs.Length - 1)
                            {
                                // Maybe skip:
                                int left = term2.Docs.Length - upto2;
                                int doc;
                                if (Random().Next(3) == 1 && left >= 1)
                                {
                                    int inc = 1 + Random().Next(left - 1);
                                    upto2 += inc;
                                    if (Random().Next(2) == 1)
                                    {
                                        doc = docs.Advance(term2.Docs[upto2]);
                                        Assert.AreEqual(term2.Docs[upto2], doc);
                                    }
                                    else
                                    {
                                        doc = docs.Advance(1 + term2.Docs[upto2]);
                                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                        {
                                            // skipped past last doc
                                            Debug.Assert(upto2 == term2.Docs.Length - 1);
                                            ended = true;
                                            break;
                                        }
                                        else
                                        {
                                            // skipped to next doc
                                            Debug.Assert(upto2 < term2.Docs.Length - 1);
                                            if (doc >= term2.Docs[1 + upto2])
                                            {
                                                upto2++;
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    doc = docs.NextDoc();
                                    Assert.IsTrue(doc != -1);
                                    upto2++;
                                }
                                Assert.AreEqual(term2.Docs[upto2], doc);
                                if (!field.OmitTF)
                                {
                                    Assert.AreEqual(term2.Positions[upto2].Length, postings.Freq());
                                    if (Random().Next(2) == 1)
                                    {
                                        this.VerifyPositions(term2.Positions[upto2], postings);
                                    }
                                }
                            }

                            if (!ended)
                            {
                                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc());
                            }
                        }
                        upto++;
                    } while (termsEnum.Next() != null);

                    Assert.AreEqual(upto, field.Terms.Length);
                }
            }
 protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,
     TermsEnum termsEnum)
 {
     BytesRef spare = new BytesRef();
     DocsEnum docsEnum = null;
     for (int i = 0; i < outerInstance._terms.Size(); i++)
     {
         if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare)))
         {
             docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE);
             float score = outerInstance._scores[outerInstance._ords[i]];
             for (int doc = docsEnum.NextDoc();
                 doc != NO_MORE_DOCS;
                 doc = docsEnum.NextDoc())
             {
                 matchingDocs.Set(doc);
                 // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
                 // can only happen in a many-to-many relation
                 scores[doc] = score;
             }
         }
     }
 }
예제 #18
0
        protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = new JCG.HashSet <string>(tk.terms).Count;

            Assert.AreEqual((long)termCount, terms.Count);      // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions);
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets);
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads);
            ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();

            foreach (string term in tk.freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparer);
            TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                Assert.AreEqual(sortedTerms[i], termsEnum.Term);
                Assert.AreEqual(1, termsEnum.DocFreq);

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID);
                Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions || terms.HasOffsets)
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq;
                    Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int        position = docsAndPositionsEnum.NextPosition();
                            ISet <int> indexes;
                            if (terms.HasPositions)
                            {
                                indexes = tk.positionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions)
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets)
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset)
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads)
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload()))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e) when(e.IsException())
                        {
                            // ok
                        }
                        catch (Exception e) when(e.IsAssertionError())
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsFalse(termsEnum.MoveNext());
            for (int i = 0; i < 5; ++i)
            {
                if (Random.NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
            }
        }
예제 #19
0
				/// <exception cref="System.IO.IOException"></exception>
				internal SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum 
					tenum, int startFacetOrd, int endFacetOrd) : base(counts, total - counts[missingCountIndex
					], counts[missingCountIndex], endFacetOrd == missingCountIndex + 1 ? missingCountIndex
					 : endFacetOrd)
				{
					this.tenum = tenum;
					this.mergePos = startFacetOrd;
					if (tenum != null)
					{
						tenum.SeekExact(mergePos);
						mergeTerm = tenum.Term();
					}
				}
예제 #20
0
        protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count;

            Assert.AreEqual(termCount, terms.Count);
            Assert.AreEqual(termCount, terms.SumDocFreq);
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions);
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets);
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads);
            HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();

            foreach (string term in tk.Freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparer);
            TermsEnum termsEnum = terms.GetIterator(Random().NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                BytesRef nextTerm = termsEnum.Next();
                Assert.AreEqual(sortedTerms[i], nextTerm);
                Assert.AreEqual(sortedTerms[i], termsEnum.Term);
                Assert.AreEqual(1, termsEnum.DocFreq);

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID);
                Assert.AreEqual(tk.Freqs[termsEnum.Term.Utf8ToString()], (int?)docsEnum.Freq);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions || terms.HasOffsets)
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq;
                    Assert.AreEqual(tk.Freqs[termsEnum.Term.Utf8ToString()], (int?)freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int         position = docsAndPositionsEnum.NextPosition();
                            ISet <int?> indexes;
                            if (terms.HasPositions)
                            {
                                indexes = tk.PositionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions)
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term) && tk.Positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets)
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset)
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads)
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term) && Equals(tk.Payloads[index], docsAndPositionsEnum.GetPayload()))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
#pragma warning disable 168
                        catch (Exception e)
#pragma warning restore 168
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsNull(termsEnum.Next());
            for (int i = 0; i < 5; ++i)
            {
                if (Random().NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
            }
        }
예제 #21
0
 /// <summary>
 /// Returns the term (<seealso cref="BytesRef"/>) corresponding to
 ///  the provided ordinal.
 /// </summary>
 public virtual BytesRef LookupTerm(TermsEnum termsEnum, int ord)
 {
     termsEnum.SeekExact(ord);
     return termsEnum.Term();
 }
예제 #22
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception t)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception e)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw e;
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetIterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetIterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }