Example #1
0
        public virtual void TestSeeking()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string           reg           = AutomatonTestUtil.RandomRegexp(Random);
                Automaton        automaton     = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                TermsEnum        te            = MultiFields.GetTerms(reader, "field").GetIterator(null);
                IList <BytesRef> unsortedTerms = new List <BytesRef>(terms);
                unsortedTerms.Shuffle(Random);

                foreach (BytesRef term in unsortedTerms)
                {
                    if (BasicOperations.Run(automaton, term.Utf8ToString()))
                    {
                        // term is accepted
                        if (Random.NextBoolean())
                        {
                            // seek exact
                            Assert.IsTrue(te.SeekExact(term));
                        }
                        else
                        {
                            // seek ceil
                            Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
                            Assert.AreEqual(term, te.Term);
                        }
                    }
                }
            }
        }
Example #2
0
        public virtual void TestSeekingAndNexting()
        {
            for (int i = 0; i < numIterations; i++)
            {
                TermsEnum te = MultiFields.GetTerms(reader, "field").GetIterator(null);

                foreach (BytesRef term in terms)
                {
                    int c = Random.Next(3);
                    if (c == 0)
                    {
                        Assert.AreEqual(term, te.Next());
                    }
                    else if (c == 1)
                    {
                        Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
                        Assert.AreEqual(term, te.Term);
                    }
                    else
                    {
                        Assert.IsTrue(te.SeekExact(term));
                    }
                }
            }
        }
Example #3
0
        private void VerifyDocFreq()
        {
            IndexReader reader   = DirectoryReader.Open(dir);
            TermsEnum   termEnum = MultiFields.GetTerms(reader, "content").GetEnumerator();

            // create enumeration of all terms
            // go to the first term (aaa)
            termEnum.MoveNext();
            // assert that term is 'aaa'
            Assert.AreEqual("aaa", termEnum.Term.Utf8ToString());
            Assert.AreEqual(200, termEnum.DocFreq);
            // go to the second term (bbb)
            termEnum.MoveNext();
            // assert that term is 'bbb'
            Assert.AreEqual("bbb", termEnum.Term.Utf8ToString());
            Assert.AreEqual(100, termEnum.DocFreq);

            // create enumeration of terms after term 'aaa',
            // including 'aaa'
            termEnum.SeekCeil(new BytesRef("aaa"));
            // assert that term is 'aaa'
            Assert.AreEqual("aaa", termEnum.Term.Utf8ToString());
            Assert.AreEqual(200, termEnum.DocFreq);
            // go to term 'bbb'
            termEnum.MoveNext();
            // assert that term is 'bbb'
            Assert.AreEqual("bbb", termEnum.Term.Utf8ToString());
            Assert.AreEqual(100, termEnum.DocFreq);
            reader.Dispose();
        }
Example #4
0
        private void CheckTermsOrder(IndexReader r, ISet <string> allTerms, bool isTop)
        {
            TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetEnumerator();

            BytesRef last = new BytesRef();

            ISet <string> seenTerms = new JCG.HashSet <string>();

            while (terms.MoveNext())
            {
                BytesRef term = terms.Term;

                Assert.IsTrue(last.CompareTo(term) < 0);
                last.CopyBytes(term);

                string s = term.Utf8ToString();
                Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")");
                seenTerms.Add(s);
            }

            if (isTop)
            {
                Assert.IsTrue(allTerms.SetEquals(seenTerms));
            }

            // Test seeking:
            IEnumerator <string> it = seenTerms.GetEnumerator();

            while (it.MoveNext())
            {
                BytesRef tr = new BytesRef(it.Current);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString()));
            }
        }
Example #5
0
        public override BytesRef Next()
        {
            //System.out.println("FTE.next doSeek=" + doSeek);
            //new Throwable().printStackTrace(System.out);
            for (; ;)
            {
                // Seek or forward the iterator
                if (doSeek)
                {
                    doSeek = false;
                    BytesRef t = NextSeekTerm(actualTerm);
                    //System.out.println("  seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
                    // Make sure we always seek forward:
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(actualTerm == null || t == null || Comparer.Compare(t, actualTerm) > 0, () => "curTerm=" + actualTerm + " seekTerm=" + t);
                    }
                    if (t == null || tenum.SeekCeil(t) == SeekStatus.END)
                    {
                        // no more terms to seek to or enum exhausted
                        //System.out.println("  return null");
                        return(null);
                    }
                    actualTerm = tenum.Term;
                    //System.out.println("  got term=" + actualTerm.utf8ToString());
                }
                else
                {
                    actualTerm = tenum.Next();
                    if (actualTerm == null)
                    {
                        // enum exhausted
                        return(null);
                    }
                }

                // check if term is accepted
                switch (Accept(actualTerm))
                {
                case FilteredTermsEnum.AcceptStatus.YES_AND_SEEK:
                    doSeek = true;
                    // term accepted, but we need to seek so fall-through
                    goto case FilteredTermsEnum.AcceptStatus.YES;

                case FilteredTermsEnum.AcceptStatus.YES:
                    // term accepted
                    return(actualTerm);

                case FilteredTermsEnum.AcceptStatus.NO_AND_SEEK:
                    // invalid term, seek next time
                    doSeek = true;
                    break;

                case FilteredTermsEnum.AcceptStatus.END:
                    // we are supposed to end the enum
                    return(null);
                }
            }
        }
Example #6
0
        public virtual void TestFilterIndexReader()
        {
            Directory directory = NewDirectory();

            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            Document d1 = new Document();

            d1.Add(NewTextField("default", "one two", Field.Store.YES));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(NewTextField("default", "one three", Field.Store.YES));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(NewTextField("default", "two four", Field.Store.YES));
            writer.AddDocument(d3);

            writer.Dispose();

            Directory target = NewDirectory();

            // We mess with the postings so this can fail:
            ((BaseDirectoryWrapper)target).CrossCheckTermVectorsOnDispose = false;

            writer = new IndexWriter(target, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            IndexReader reader = new TestReader(DirectoryReader.Open(directory));

            writer.AddIndexes(reader);
            writer.Dispose();
            reader.Dispose();
            reader = DirectoryReader.Open(target);

            TermsEnum terms = MultiFields.GetTerms(reader, "default").GetEnumerator();

            while (terms.MoveNext())
            {
                Assert.IsTrue(terms.Term.Utf8ToString().IndexOf('e') != -1);
            }

            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(new BytesRef("one")));

            DocsAndPositionsEnum positions = terms.DocsAndPositions(MultiFields.GetLiveDocs(reader), null);

            while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.IsTrue((positions.DocID % 2) == 1);
            }

            reader.Dispose();
            directory.Dispose();
            target.Dispose();
        }
        public virtual void TestNextIntoWrongField()
        {
            foreach (string name in OldNames)
            {
                Directory   dir   = OldIndexDirs[name];
                IndexReader r     = DirectoryReader.Open(dir);
                TermsEnum   terms = MultiFields.GetFields(r).Terms("content").Iterator(null);
                BytesRef    t     = terms.Next();
                Assert.IsNotNull(t);

                // content field only has term aaa:
                Assert.AreEqual("aaa", t.Utf8ToString());
                Assert.IsNull(terms.Next());

                BytesRef aaaTerm = new BytesRef("aaa");

                // should be found exactly
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, DocsEnum.FLAG_NONE)));
                Assert.IsNull(terms.Next());

                // should hit end of field
                Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("bbb")));
                Assert.IsNull(terms.Next());

                // should seek to aaa
                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, terms.SeekCeil(new BytesRef("a")));
                Assert.IsTrue(terms.Term().BytesEquals(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, DocsEnum.FLAG_NONE)));
                Assert.IsNull(terms.Next());

                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
                Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random(), terms, null, null, DocsEnum.FLAG_NONE)));
                Assert.IsNull(terms.Next());

                r.Dispose();
            }
        }
Example #8
0
        public void TestSplitSeq()
        {
            MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();

            Directory[] dirs = new Directory[] {
                NewDirectory(),
                NewDirectory(),
                NewDirectory()
            };
            try
            {
                splitter.Split(TEST_VERSION_CURRENT, input, dirs, true);
                Document    doc;
                int         start;
                IndexReader ir;
                using (ir = DirectoryReader.Open(dirs[0]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals("0", doc.Get("id"));
                    start = ir.NumDocs;
                }
                using (ir = DirectoryReader.Open(dirs[1]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals(start + "", doc.Get("id"));
                    start += ir.NumDocs;
                }
                using (ir = DirectoryReader.Open(dirs[2]))
                {
                    assertTrue(ir.NumDocs - NUM_DOCS / 3 <= 1);
                    doc = ir.Document(0);
                    assertEquals(start + "", doc.Get("id"));
                    // make sure the deleted doc is not here
                    TermsEnum te = MultiFields.GetTerms(ir, "id").GetIterator(null);
                    Term      t  = new Term("id", (NUM_DOCS - 1) + "");
                    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(new BytesRef(t.Text())));
                    assertNotSame(t.Text(), te.Term.Utf8ToString());
                }
            }
            finally
            {
                foreach (Directory d in dirs)
                {
                    d.Dispose();
                }
            }
        }
Example #9
0
 public override long LookupTerm(BytesRef key)
 {
     try
     {
         if (te.SeekCeil(key) == SeekStatus.FOUND)
         {
             return(te.Ord);
         }
         else
         {
             return(-te.Ord - 1);
         }
     }
     catch (Exception e) when(e.IsIOException())
     {
         throw RuntimeException.Create(e);
     }
 }
Example #10
0
 public override long LookupTerm(BytesRef key)
 {
     try
     {
         if (Te.SeekCeil(key) == SeekStatus.FOUND)
         {
             return(Te.Ord());
         }
         else
         {
             return(-Te.Ord() - 1);
         }
     }
     catch (System.IO.IOException e)
     {
         throw new Exception(e.Message, e);
     }
 }
Example #11
0
 public override long LookupTerm(BytesRef key)
 {
     try
     {
         if (te.SeekCeil(key) == SeekStatus.FOUND)
         {
             return(te.Ord);
         }
         else
         {
             return(-te.Ord - 1);
         }
     }
     catch (IOException e)
     {
         throw new Exception(e.ToString(), e);
     }
 }
Example #12
0
        public virtual int DoTest(int iter, int ndocs, int maxTF, float percentDocs)
        {
            Directory dir = NewDirectory();

            long start = Environment.TickCount;

            AddDocs(Random(), dir, ndocs, "foo", "val", maxTF, percentDocs);
            long end = Environment.TickCount;

            if (VERBOSE)
            {
                Console.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start));
            }

            IndexReader reader = DirectoryReader.Open(dir);

            TermsEnum tenum = MultiFields.GetTerms(reader, "foo").GetIterator(null);

            start = Environment.TickCount;

            int      ret    = 0;
            DocsEnum tdocs  = null;
            Random   random = new Random(Random().Next());

            for (int i = 0; i < iter; i++)
            {
                tenum.SeekCeil(new BytesRef("val"));
                tdocs = TestUtil.Docs(random, tenum, MultiFields.GetLiveDocs(reader), tdocs, DocsFlags.NONE);
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    ret += tdocs.DocID;
                }
            }

            end = Environment.TickCount;
            if (VERBOSE)
            {
                Console.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start));
            }

            return(ret);
        }
Example #13
0
        public virtual int DoTest(int iter, int ndocs, int maxTF, float percentDocs)
        {
            Directory dir = NewDirectory();

            long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            AddDocs(LuceneTestCase.Random, dir, ndocs, "foo", "val", maxTF, percentDocs);
            long end = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            if (Verbose)
            {
                Console.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start));
            }

            IndexReader reader = DirectoryReader.Open(dir);

            TermsEnum tenum = MultiFields.GetTerms(reader, "foo").GetEnumerator();

            start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            int      ret    = 0;
            DocsEnum tdocs  = null;
            Random   random = new Random(Random.Next());

            for (int i = 0; i < iter; i++)
            {
                tenum.SeekCeil(new BytesRef("val"));
                tdocs = TestUtil.Docs(random, tenum, MultiFields.GetLiveDocs(reader), tdocs, DocsFlags.NONE);
                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    ret += tdocs.DocID;
                }
            }

            end = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            if (Verbose)
            {
                Console.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start));
            }

            return(ret);
        }
Example #14
0
        private void TestSavedTerms(IndexReader r, IList <BytesRef> terms)
        {
            Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r);
            IndexSearcher s = NewSearcher(r);

            terms = CollectionsHelper.Shuffle(terms);
            TermsEnum termsEnum = MultiFields.GetTerms(r, "field").Iterator(null);
            bool      failed    = false;

            for (int iter = 0; iter < 10 * terms.Count; iter++)
            {
                BytesRef term = terms[Random().Next(terms.Count)];
                Console.WriteLine("TEST: search " + term);
                long t0    = Environment.TickCount;
                int  count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits;
                if (count <= 0)
                {
                    Console.WriteLine("  FAILED: count=" + count);
                    failed = true;
                }
                long t1 = Environment.TickCount;
                Console.WriteLine("  took " + (t1 - t0) + " millis");

                TermsEnum.SeekStatus result = termsEnum.SeekCeil(term);
                if (result != TermsEnum.SeekStatus.FOUND)
                {
                    if (result == TermsEnum.SeekStatus.END)
                    {
                        Console.WriteLine("  FAILED: got END");
                    }
                    else
                    {
                        Console.WriteLine("  FAILED: wrong term: got " + termsEnum.Term());
                    }
                    failed = true;
                }
            }
            Assert.IsFalse(failed);
        }
Example #15
0
        private void TestSavedTerms(IndexReader r, IList <BytesRef> terms)
        {
            Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r);
            IndexSearcher s = NewSearcher(r);

            terms.Shuffle(Random);
            TermsEnum termsEnum = MultiFields.GetTerms(r, "field").GetEnumerator();
            bool      failed    = false;

            for (int iter = 0; iter < 10 * terms.Count; iter++)
            {
                BytesRef term = terms[Random.Next(terms.Count)];
                Console.WriteLine("TEST: search " + term);
                long t0    = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                int  count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits;
                if (count <= 0)
                {
                    Console.WriteLine("  FAILED: count=" + count);
                    failed = true;
                }
                long t1 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                Console.WriteLine("  took " + (t1 - t0) + " millis");

                TermsEnum.SeekStatus result = termsEnum.SeekCeil(term);
                if (result != TermsEnum.SeekStatus.FOUND)
                {
                    if (result == TermsEnum.SeekStatus.END)
                    {
                        Console.WriteLine("  FAILED: got END");
                    }
                    else
                    {
                        Console.WriteLine("  FAILED: wrong term: got " + termsEnum.Term);
                    }
                    failed = true;
                }
            }
            Assert.IsFalse(failed);
        }
Example #16
0
        public virtual void TestTermDocs(int indexDivisor)
        {
            //After adding the document, we should be able to read it back in
            SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));

            Assert.IsTrue(reader != null);
            Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor);

            TermsEnum terms = reader.Fields.Terms(DocHelper.TEXT_FIELD_2_KEY).Iterator(null);

            terms.SeekCeil(new BytesRef("field"));
            DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS);

            if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                int docId = termDocs.DocID();
                Assert.IsTrue(docId == 0);
                int freq = termDocs.Freq();
                Assert.IsTrue(freq == 3);
            }
            reader.Dispose();
        }
Example #17
0
        public virtual void TestNonFlex()
        {
            Directory d = NewDirectory();

            const int DOC_COUNT = 177;

            IndexWriter w = new IndexWriter(d, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(7).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < 2; iter++)
            {
                if (iter == 0)
                {
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewTextField("field1", "this is field1", Field.Store.NO));
                    doc.Add(NewTextField("field2", "this is field2", Field.Store.NO));
                    doc.Add(NewTextField("field3", "aaa", Field.Store.NO));
                    doc.Add(NewTextField("field4", "bbb", Field.Store.NO));
                    for (int i = 0; i < DOC_COUNT; i++)
                    {
                        w.AddDocument(doc);
                    }
                }
                else
                {
                    w.ForceMerge(1);
                }

                IndexReader r = w.GetReader();

                TermsEnum terms = MultiFields.GetTerms(r, "field3").GetEnumerator();
                Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("abc")));
                r.Dispose();
            }

            w.Dispose();
            d.Dispose();
        }
Example #18
0
        private void TestRandomSeeks(IndexReader r, params string[] validTermStrings)
        {
            BytesRef[] validTerms = new BytesRef[validTermStrings.Length];
            for (int termIDX = 0; termIDX < validTermStrings.Length; termIDX++)
            {
                validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]);
            }
            Array.Sort(validTerms);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + validTerms.Length + " terms:");
                foreach (BytesRef t in validTerms)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " " + t);
                }
            }
            TermsEnum te = MultiFields.GetTerms(r, FIELD).GetIterator(null);

            int END_LOC = -validTerms.Length - 1;

            IList <TermAndState> termStates = new List <TermAndState>();

            for (int iter = 0; iter < 100 * RANDOM_MULTIPLIER; iter++)
            {
                BytesRef  t;
                int       loc;
                TermState termState;
                if (Random.Next(6) == 4)
                {
                    // pick term that doens't exist:
                    t         = GetNonExistTerm(validTerms);
                    termState = null;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: invalid term=" + t.Utf8ToString());
                    }
                    loc = Array.BinarySearch(validTerms, t);
                }
                else if (termStates.Count != 0 && Random.Next(4) == 1)
                {
                    TermAndState ts = termStates[Random.Next(termStates.Count)];
                    t   = ts.Term;
                    loc = Array.BinarySearch(validTerms, t);
                    Assert.IsTrue(loc >= 0);
                    termState = ts.State;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: valid termState term=" + t.Utf8ToString());
                    }
                }
                else
                {
                    // pick valid term
                    loc       = Random.Next(validTerms.Length);
                    t         = BytesRef.DeepCopyOf(validTerms[loc]);
                    termState = null;
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: valid term=" + t.Utf8ToString());
                    }
                }

                // seekCeil or seekExact:
                bool doSeekExact = Random.NextBoolean();
                if (termState != null)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekExact termState");
                    }
                    te.SeekExact(t, termState);
                }
                else if (doSeekExact)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekExact");
                    }
                    Assert.AreEqual(loc >= 0, te.SeekExact(t));
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  seekCeil");
                    }

                    TermsEnum.SeekStatus result = te.SeekCeil(t);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  got " + result);
                    }

                    if (loc >= 0)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, result);
                    }
                    else if (loc == END_LOC)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.END, result);
                    }
                    else
                    {
                        Debug.Assert(loc >= -validTerms.Length);
                        Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, result);
                    }
                }

                if (loc >= 0)
                {
                    Assert.AreEqual(t, te.Term);
                }
                else if (doSeekExact)
                {
                    // TermsEnum is unpositioned if seekExact returns false
                    continue;
                }
                else if (loc == END_LOC)
                {
                    continue;
                }
                else
                {
                    loc = -loc - 1;
                    Assert.AreEqual(validTerms[loc], te.Term);
                }

                // Do a bunch of next's after the seek
                int numNext = Random.Next(validTerms.Length);

                for (int nextCount = 0; nextCount < numNext; nextCount++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: next loc=" + loc + " of " + validTerms.Length);
                    }
                    BytesRef t2 = te.Next();
                    loc++;
                    if (loc == validTerms.Length)
                    {
                        Assert.IsNull(t2);
                        break;
                    }
                    else
                    {
                        Assert.AreEqual(validTerms[loc], t2);
                        if (Random.Next(40) == 17 && termStates.Count < 100)
                        {
                            termStates.Add(new TermAndState(validTerms[loc], te.GetTermState()));
                        }
                    }
                }
            }
        }
Example #19
0
        public virtual void Test()
        {
            Random       random   = new Random(Random.Next());
            LineFileDocs docs     = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            Directory    d        = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                LuceneTestCase.Random, d, analyzer);
            int numDocs = AtLeast(10);

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                w.AddDocument(docs.NextDoc());
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            List <BytesRef> terms     = new List <BytesRef>();
            TermsEnum       termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null);
            BytesRef        term;

            while ((term = termsEnum.Next()) != null)
            {
                terms.Add(BytesRef.DeepCopyOf(term));
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + terms.Count + " terms");
            }

            int upto  = -1;
            int iters = AtLeast(200);

            for (int iter = 0; iter < iters; iter++)
            {
                bool isEnd;
                if (upto != -1 && LuceneTestCase.Random.NextBoolean())
                {
                    // next
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter next");
                    }
                    isEnd = termsEnum.Next() == null;
                    upto++;
                    if (isEnd)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  end");
                        }
                        Assert.AreEqual(upto, terms.Count);
                        upto = -1;
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString());
                        }
                        Assert.IsTrue(upto < terms.Count);
                        Assert.AreEqual(terms[upto], termsEnum.Term);
                    }
                }
                else
                {
                    BytesRef target;
                    string   exists;
                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        // likely fake term
                        if (LuceneTestCase.Random.NextBoolean())
                        {
                            target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random));
                        }
                        else
                        {
                            target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random));
                        }
                        exists = "likely not";
                    }
                    else
                    {
                        // real term
                        target = terms[LuceneTestCase.Random.Next(terms.Count)];
                        exists = "yes";
                    }

                    upto = terms.BinarySearch(target);

                    if (LuceneTestCase.Random.NextBoolean())
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekCeil
                        TermsEnum.SeekStatus status = termsEnum.SeekCeil(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + status);
                        }

                        if (upto < 0)
                        {
                            upto = -(upto + 1);
                            if (upto >= terms.Count)
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.END, status);
                                upto = -1;
                            }
                            else
                            {
                                Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);
                                Assert.AreEqual(terms[upto], termsEnum.Term);
                            }
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status);
                            Assert.AreEqual(terms[upto], termsEnum.Term);
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists);
                        }
                        // seekExact
                        bool result = termsEnum.SeekExact(target);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got " + result);
                        }
                        if (upto < 0)
                        {
                            Assert.IsFalse(result);
                            upto = -1;
                        }
                        else
                        {
                            Assert.IsTrue(result);
                            Assert.AreEqual(target, termsEnum.Term);
                        }
                    }
                }
            }

            r.Dispose();
            d.Dispose();
            docs.Dispose();
        }
Example #20
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (term != null && term.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = Array.BinarySearch(outerInstance.m_indexedTermsArray, target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    }
                    ord = 0;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((ord >> outerInstance.indexIntervalBits) == startIdx && term != null && term.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(outerInstance.m_indexedTermsArray[startIdx]);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);                           // should be non-null since it's in the index
                    }
                }

                while (term != null && term.CompareTo(target) < 0)
                {
                    Next();
                }

                if (term == null)
                {
                    return(SeekStatus.END);
                }
                else if (term.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
        protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count;

            Assert.AreEqual(termCount, terms.Size());
            Assert.AreEqual(termCount, terms.SumDocFreq);
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions());
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets());
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads());
            HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();

            foreach (string term in tk.Freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparator);
            TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                BytesRef nextTerm = termsEnum.Next();
                Assert.AreEqual(sortedTerms[i], nextTerm);
                Assert.AreEqual(sortedTerms[i], termsEnum.Term());
                Assert.AreEqual(1, termsEnum.DocFreq());

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID());
                Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq());
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions() || terms.HasOffsets())
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq();
                    Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int         position = docsAndPositionsEnum.NextPosition();
                            ISet <int?> indexes;
                            if (terms.HasPositions())
                            {
                                indexes = tk.PositionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions())
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets())
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset())
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads())
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e)
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsNull(termsEnum.Next());
            for (int i = 0; i < 5; ++i)
            {
                if (Random().NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes)));
                }
            }
        }
Example #22
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (Term_Renamed != null && Term_Renamed.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = OuterInstance.IndexedTermsArray.ToList().BinarySearch(target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    Ord_Renamed = 0;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((Ord_Renamed >> OuterInstance.IndexIntervalBits) == startIdx && Term_Renamed != null && Term_Renamed.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(OuterInstance.IndexedTermsArray[startIdx]);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null); // should be non-null since it's in the index
                }

                while (Term_Renamed != null && Term_Renamed.CompareTo(target) < 0)
                {
                    Next();
                }

                if (Term_Renamed == null)
                {
                    return(SeekStatus.END);
                }
                else if (Term_Renamed.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
Example #23
0
 public override SeekStatus SeekCeil(BytesRef text)
 {
     return(m_input.SeekCeil(text));
 }
Example #24
0
        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (VERBOSE)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetIterator(null);
                int       ord   = 0;
                while (allTE.Next() != null)
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetIterator(null);
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (te.Next() == null)
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (VERBOSE)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }
Example #25
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random);
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.GetReader();

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
Example #26
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        BytesRef term = termsEnum.Next();
                        Assert.IsNotNull(term);
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsNull(termsEnum.Next());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
        private void AssertEquals(long numOrds, TermsEnum expected, TermsEnum actual)
        {
            BytesRef @ref;

            // sequential next() through all terms
            while ((@ref = expected.Next()) != null)
            {
                Assert.AreEqual(@ref, actual.Next());
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }
            Assert.IsNull(actual.Next());

            // sequential seekExact(ord) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                actual.SeekExact(i);
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // sequential seekExact(BytesRef) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                Assert.IsTrue(actual.SeekExact(expected.Term()));
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // sequential seekCeil(BytesRef) through all terms
            for (long i = 0; i < numOrds; i++)
            {
                expected.SeekExact(i);
                Assert.AreEqual(SeekStatus.FOUND, actual.SeekCeil(expected.Term()));
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekExact(ord)
            for (long i = 0; i < numOrds; i++)
            {
                long randomOrd = TestUtil.NextLong(Random(), 0, numOrds - 1);
                expected.SeekExact(randomOrd);
                actual.SeekExact(randomOrd);
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekExact(BytesRef)
            for (long i = 0; i < numOrds; i++)
            {
                long randomOrd = TestUtil.NextLong(Random(), 0, numOrds - 1);
                expected.SeekExact(randomOrd);
                actual.SeekExact(expected.Term());
                Assert.AreEqual(expected.Ord(), actual.Ord());
                Assert.AreEqual(expected.Term(), actual.Term());
            }

            // random seekCeil(BytesRef)
            for (long i = 0; i < numOrds; i++)
            {
                BytesRef target = new BytesRef(TestUtil.RandomUnicodeString(Random()));
                SeekStatus expectedStatus = expected.SeekCeil(target);
                Assert.AreEqual(expectedStatus, actual.SeekCeil(target));
                if (expectedStatus != SeekStatus.END)
                {
                    Assert.AreEqual(expected.Ord(), actual.Ord());
                    Assert.AreEqual(expected.Term(), actual.Term());
                }
            }
        }
Example #28
0
            public virtual void _run()
            {
                for (int iter = 0; iter < NUM_TEST_ITER; iter++)
                {
                    FieldData field     = fields[Random.Next(fields.Length)];
                    TermsEnum termsEnum = termsDict.GetTerms(field.fieldInfo.Name).GetIterator(null);
#pragma warning disable 612, 618
                    if (si.Codec is Lucene3xCodec)
#pragma warning restore 612, 618
                    {
                        // code below expects unicode sort order
                        continue;
                    }

                    int upto = 0;
                    // Test straight enum of the terms:
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        BytesRef expected = new BytesRef(field.terms[upto++].text2);
                        Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term);
                    }
                    Assert.AreEqual(upto, field.terms.Length);

                    // Test random seek:
                    TermData             term2  = field.terms[Random.Next(field.terms.Length)];
                    TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.text2));
                    Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                    Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq);
                    if (field.omitTF)
                    {
                        this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false);
                    }
                    else
                    {
                        this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true);
                    }

                    // Test random seek by ord:
                    int idx = Random.Next(field.terms.Length);
                    term2 = field.terms[idx];
                    bool success = false;
                    try
                    {
                        termsEnum.SeekExact(idx);
                        success = true;
                    }
#pragma warning disable 168
                    catch (NotSupportedException uoe)
#pragma warning restore 168
                    {
                        // ok -- skip it
                    }
                    if (success)
                    {
                        Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND);
                        Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(term2.text2)));
                        Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq);
                        if (field.omitTF)
                        {
                            this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false);
                        }
                        else
                        {
                            this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true);
                        }
                    }

                    // Test seek to non-existent terms:
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: seek non-exist terms");
                    }
                    for (int i = 0; i < 100; i++)
                    {
                        string text2 = TestUtil.RandomUnicodeString(Random) + ".";
                        status = termsEnum.SeekCeil(new BytesRef(text2));
                        Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END);
                    }

                    // Seek to each term, backwards:
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: seek terms backwards");
                    }
                    for (int i = field.terms.Length - 1; i >= 0; i--)
                    {
                        Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.terms[i].text2)), Thread.CurrentThread.Name + ": field=" + field.fieldInfo.Name + " term=" + field.terms[i].text2);
                        Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq);
                    }

                    // Seek to each term by ord, backwards
                    for (int i = field.terms.Length - 1; i >= 0; i--)
                    {
                        try
                        {
                            termsEnum.SeekExact(i);
                            Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq);
                            Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[i].text2)));
                        }
#pragma warning disable 168
                        catch (NotSupportedException uoe)
#pragma warning restore 168
                        {
                        }
                    }

                    // Seek to non-existent empty-string term
                    status = termsEnum.SeekCeil(new BytesRef(""));
                    Assert.IsNotNull(status);
                    //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status);

                    // Make sure we're now pointing to first term
                    Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[0].text2)));

                    // Test docs enum
                    termsEnum.SeekCeil(new BytesRef(""));
                    upto = 0;
                    do
                    {
                        term2 = field.terms[upto];
                        if (Random.Next(3) == 1)
                        {
                            DocsEnum             docs;
                            DocsEnum             docsAndFreqs;
                            DocsAndPositionsEnum postings;
                            if (!field.omitTF)
                            {
                                postings = termsEnum.DocsAndPositions(null, null);
                                if (postings != null)
                                {
                                    docs = docsAndFreqs = postings;
                                }
                                else
                                {
                                    docs = docsAndFreqs = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.FREQS);
                                }
                            }
                            else
                            {
                                postings     = null;
                                docsAndFreqs = null;
                                docs         = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE);
                            }
                            Assert.IsNotNull(docs);
                            int  upto2 = -1;
                            bool ended = false;
                            while (upto2 < term2.docs.Length - 1)
                            {
                                // Maybe skip:
                                int left = term2.docs.Length - upto2;
                                int doc;
                                if (Random.Next(3) == 1 && left >= 1)
                                {
                                    int inc = 1 + Random.Next(left - 1);
                                    upto2 += inc;
                                    if (Random.Next(2) == 1)
                                    {
                                        doc = docs.Advance(term2.docs[upto2]);
                                        Assert.AreEqual(term2.docs[upto2], doc);
                                    }
                                    else
                                    {
                                        doc = docs.Advance(1 + term2.docs[upto2]);
                                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                        {
                                            // skipped past last doc
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(upto2 == term2.docs.Length - 1);
                                            }
                                            ended = true;
                                            break;
                                        }
                                        else
                                        {
                                            // skipped to next doc
                                            if (Debugging.AssertsEnabled)
                                            {
                                                Debugging.Assert(upto2 < term2.docs.Length - 1);
                                            }
                                            if (doc >= term2.docs[1 + upto2])
                                            {
                                                upto2++;
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    doc = docs.NextDoc();
                                    Assert.IsTrue(doc != -1);
                                    upto2++;
                                }
                                Assert.AreEqual(term2.docs[upto2], doc);
                                if (!field.omitTF)
                                {
                                    Assert.AreEqual(term2.positions[upto2].Length, postings.Freq);
                                    if (Random.Next(2) == 1)
                                    {
                                        this.VerifyPositions(term2.positions[upto2], postings);
                                    }
                                }
                            }

                            if (!ended)
                            {
                                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc());
                            }
                        }
                        upto++;
                    } while (termsEnum.Next() != null);

                    Assert.AreEqual(upto, field.terms.Length);
                }
            }
Example #29
0
        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
            {
                throw new InvalidOperationException("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = Environment.TickCount;

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.GetIterator(null);
            BytesRef  seekStart = termPrefix != null ? termPrefix : new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    }
#pragma warning disable 168
                    catch (NotSupportedException uoe)
#pragma warning restore 168
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                {
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                {
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        m_termInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = (int)((uint)val >> 8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = (int)((uint)val >> 8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (te.Next() == null)
                {
                    break;
                }
            }

            m_numTermsInField = termNum;

            long midPoint = Environment.TickCount;

            if (m_termInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
            }
            else
            {
                this.m_index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = (int)((uint)val >> 8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + m_field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    //
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);
            }

            long endTime = Environment.TickCount;

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
        }
Example #30
0
        protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms)
        {
            Assert.AreEqual(1, terms.DocCount);
            int termCount = new JCG.HashSet <string>(tk.terms).Count;

            Assert.AreEqual((long)termCount, terms.Count);      // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this)
            Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions);
            Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets);
            Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads);
            ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>();

            foreach (string term in tk.freqs.Keys)
            {
                uniqueTerms.Add(new BytesRef(term));
            }
            BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/);
            Array.Sort(sortedTerms, terms.Comparer);
            TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value);

            this.termsEnum.Value = termsEnum;
            for (int i = 0; i < sortedTerms.Length; ++i)
            {
                Assert.IsTrue(termsEnum.MoveNext());
                Assert.AreEqual(sortedTerms[i], termsEnum.Term);
                Assert.AreEqual(1, termsEnum.DocFreq);

                FixedBitSet bits     = new FixedBitSet(1);
                DocsEnum    docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                bits.Set(0);

                docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum);
                Assert.IsNotNull(docsEnum);
                Assert.AreEqual(0, docsEnum.NextDoc());
                Assert.AreEqual(0, docsEnum.DocID);
                Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq);
                Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc());
                this.docsEnum.Value = docsEnum;

                bits.Clear(0);
                DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (docsAndPositionsEnum != null)
                {
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                bits.Set(0);

                docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum);
                Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null);
                if (terms.HasPositions || terms.HasOffsets)
                {
                    Assert.AreEqual(0, docsAndPositionsEnum.NextDoc());
                    int freq = docsAndPositionsEnum.Freq;
                    Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq);
                    if (docsAndPositionsEnum != null)
                    {
                        for (int k = 0; k < freq; ++k)
                        {
                            int        position = docsAndPositionsEnum.NextPosition();
                            ISet <int> indexes;
                            if (terms.HasPositions)
                            {
                                indexes = tk.positionToTerms[position];
                                Assert.IsNotNull(indexes);
                            }
                            else
                            {
                                indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset];
                                Assert.IsNotNull(indexes);
                            }
                            if (terms.HasPositions)
                            {
                                bool foundPosition = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position)
                                    {
                                        foundPosition = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPosition);
                            }
                            if (terms.HasOffsets)
                            {
                                bool foundOffset = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset)
                                    {
                                        foundOffset = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundOffset);
                            }
                            if (terms.HasPayloads)
                            {
                                bool foundPayload = false;
                                foreach (int index in indexes)
                                {
                                    if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload()))
                                    {
                                        foundPayload = true;
                                        break;
                                    }
                                }
                                Assert.IsTrue(foundPayload);
                            }
                        }
                        try
                        {
                            docsAndPositionsEnum.NextPosition();
                            Assert.Fail();
                        }
                        catch (Exception e) when(e.IsException())
                        {
                            // ok
                        }
                        catch (Exception e) when(e.IsAssertionError())
                        {
                            // ok
                        }
                    }
                    Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
                }
                this.docsAndPositionsEnum.Value = docsAndPositionsEnum;
            }
            Assert.IsFalse(termsEnum.MoveNext());
            for (int i = 0; i < 5; ++i)
            {
                if (Random.NextBoolean())
                {
                    Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
                else
                {
                    Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes)));
                }
            }
        }
Example #31
0
        public virtual void TestStressAdvance_Mem()
        {
            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }
                Directory          dir   = NewDirectory();
                RandomIndexWriter  w     = new RandomIndexWriter(Random(), dir);
                HashSet <int>      aDocs = new HashSet <int>();
                Documents.Document doc   = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field idField = NewStringField("id", "", Field.Store.YES);
                doc.Add(idField);
                int num = AtLeast(4097);
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: numDocs=" + num);
                }
                for (int id = 0; id < num; id++)
                {
                    if (Random().Next(4) == 3)
                    {
                        f.StringValue = "a";
                        aDocs.Add(id);
                    }
                    else
                    {
                        f.StringValue = "b";
                    }
                    idField.StringValue = "" + id;
                    w.AddDocument(doc);
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: doc upto " + id);
                    }
                }

                w.ForceMerge(1);

                IList <int> aDocIDs = new List <int>();
                IList <int> bDocIDs = new List <int>();

                DirectoryReader r         = w.Reader;
                int[]           idToDocID = new int[r.MaxDoc];
                for (int docID = 0; docID < idToDocID.Length; docID++)
                {
                    int id = Convert.ToInt32(r.Document(docID).Get("id"));
                    if (aDocs.Contains(id))
                    {
                        aDocIDs.Add(docID);
                    }
                    else
                    {
                        bDocIDs.Add(docID);
                    }
                }
                TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null);

                DocsEnum de = null;
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2);
                    }
                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, aDocIDs);

                    Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b")));
                    de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE);
                    TestOne(de, bDocIDs);
                }

                w.Dispose();
                r.Dispose();
                dir.Dispose();
            }
        }