Пример #1
0
        public virtual void TestIntersectStartTerm()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.Reader;

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.Terms("field");

            Automaton         automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Пример #2
0
        public virtual void TestIntersectEmptyString()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "", Field.Store.NO));
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            // add empty string to both documents, so that singletonDocID == -1.
            // For a FST-based term dict, we'll expect to see the first arc is
            // flaged with HAS_FINAL_OUTPUT
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            doc.Add(NewStringField("field", "", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.GetTerms("field");

            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); // accept ALL
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);

            TermsEnum te = terms.Intersect(ca, null);
            DocsEnum  de;

            Assert.AreEqual("", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            // pass empty string
            te = terms.Intersect(ca, new BytesRef(""));

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Пример #3
0
        public virtual void TestIntersectBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewTextField("field", "aaa", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bbb", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("field", "ccc", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader      sub       = GetOnlySegmentReader(r);
            Terms             terms     = sub.Fields.GetTerms("field");
            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te        = terms.Intersect(ca, null);

            Assert.AreEqual("aaa", te.Next().Utf8ToString());
            Assert.AreEqual(0, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("abc"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("aaa"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Пример #4
0
        /// <summary>
        /// Terms api equivalency
        /// </summary>
        public void AssertTermsEquals(string info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, bool deep)
        {
            if (leftTerms == null || rightTerms == null)
            {
                Assert.IsNull(leftTerms, info);
                Assert.IsNull(rightTerms, info);
                return;
            }
            AssertTermsStatisticsEquals(info, leftTerms, rightTerms);
            Assert.AreEqual(leftTerms.HasOffsets(), rightTerms.HasOffsets());
            Assert.AreEqual(leftTerms.HasPositions(), rightTerms.HasPositions());
            Assert.AreEqual(leftTerms.HasPayloads(), rightTerms.HasPayloads());

            TermsEnum leftTermsEnum = leftTerms.Iterator(null);
            TermsEnum rightTermsEnum = rightTerms.Iterator(null);
            AssertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);

            AssertTermsSeekingEquals(info, leftTerms, rightTerms);

            if (deep)
            {
                int numIntersections = AtLeast(3);
                for (int i = 0; i < numIntersections; i++)
                {
                    string re = AutomatonTestUtil.RandomRegexp(Random());
                    CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton());
                    if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
                    {
                        // TODO: test start term too
                        TermsEnum leftIntersection = leftTerms.Intersect(automaton, null);
                        TermsEnum rightIntersection = rightTerms.Intersect(automaton, null);
                        AssertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, Rarely());
                    }
                }
            }
        }
        // following code is almost an exact dup of code from TestDuelingCodecs: sorry!

        public virtual void AssertTerms(Terms leftTerms, Terms rightTerms, bool deep)
        {
            if (leftTerms == null || rightTerms == null)
            {
                Assert.IsNull(leftTerms);
                Assert.IsNull(rightTerms);
                return;
            }
            AssertTermsStatistics(leftTerms, rightTerms);

            // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

            TermsEnum leftTermsEnum = leftTerms.Iterator(null);
            TermsEnum rightTermsEnum = rightTerms.Iterator(null);
            AssertTermsEnum(leftTermsEnum, rightTermsEnum, true);

            AssertTermsSeeking(leftTerms, rightTerms);

            if (deep)
            {
                int numIntersections = AtLeast(3);
                for (int i = 0; i < numIntersections; i++)
                {
                    string re = AutomatonTestUtil.RandomRegexp(Random());
                    CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton());
                    if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
                    {
                        // TODO: test start term too
                        TermsEnum leftIntersection = leftTerms.Intersect(automaton, null);
                        TermsEnum rightIntersection = rightTerms.Intersect(automaton, null);
                        AssertTermsEnum(leftIntersection, rightIntersection, Rarely());
                    }
                }
            }
        }