Esempio n. 1
0
 /// <summary>
 /// Create a new AutomatonQuery from an <seealso cref="Automaton"/>.
 /// </summary>
 /// <param name="term"> Term containing field and possibly some pattern structure. The
 ///        term text is ignored. </param>
 /// <param name="automaton"> Automaton to run, terms that are accepted are considered a
 ///        match. </param>
 public AutomatonQuery(Term term, Automaton automaton)
     : base(term.Field)
 {
     this.Term = term;
     this.Automaton_Renamed = automaton;
     this.Compiled          = new CompiledAutomaton(automaton);
 }
Esempio n. 2
0
 /// <summary>
 /// Create a new AutomatonQuery from an <seealso cref="Automaton"/>.
 /// </summary>
 /// <param name="term"> Term containing field and possibly some pattern structure. The
 ///        term text is ignored. </param>
 /// <param name="automaton"> Automaton to run, terms that are accepted are considered a
 ///        match. </param>
 public AutomatonQuery(Term term, Automaton automaton)
     : base(term.Field)
 {
     this.Term = term;
     this.Automaton_Renamed = automaton;
     this.Compiled = new CompiledAutomaton(automaton);
 }
Esempio n. 3
0
                internal IntersectTermsEnum(TermsReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) : base(outerInstance)
                {
                    //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
                    this.fst        = outerInstance.index;
                    this.fstReader  = fst.GetBytesReader();
                    this.fstOutputs = outerInstance.index.Outputs;
                    this.fsa        = compiled.RunAutomaton;
                    this.level      = -1;
                    this.stack      = new Frame[16];
                    for (int i = 0; i < stack.Length; i++)
                    {
                        this.stack[i] = new Frame();
                    }

                    Frame frame;

                    /*frame = */ LoadVirtualFrame(NewFrame()); // LUCENENET: IDE0059: Remove unnecessary value assignment
                    this.level++;
                    frame = LoadFirstFrame(NewFrame());
                    PushFrame(frame);

                    this.decoded = false;
                    this.pending = false;

                    if (startTerm == null)
                    {
                        pending = IsAccept(TopFrame());
                    }
                    else
                    {
                        DoSeekCeil(startTerm);
                        pending = !startTerm.Equals(term) && IsValid(TopFrame()) && IsAccept(TopFrame());
                    }
                }
Esempio n. 4
0
        public virtual void TestIntersectStartTerm()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.Reader;

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.Terms("field");

            Automaton         automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 5
0
 /// <summary>
 /// Create a new AutomatonQuery from an <see cref="Automaton"/>.
 /// </summary>
 /// <param name="term"> <see cref="Term"/> containing field and possibly some pattern structure. The
 ///        term text is ignored. </param>
 /// <param name="automaton"> <see cref="Automaton"/> to run, terms that are accepted are considered a
 ///        match. </param>
 public AutomatonQuery(Term term, Automaton automaton)
     : base(term.Field)
 {
     this.m_term      = term;
     this.m_automaton = automaton;
     this.m_compiled  = new CompiledAutomaton(automaton);
 }
Esempio n. 6
0
        public virtual void TestIntersectEmptyString()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "", Field.Store.NO));
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            // add empty string to both documents, so that singletonDocID == -1.
            // For a FST-based term dict, we'll expect to see the first arc is
            // flaged with HAS_FINAL_OUTPUT
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            doc.Add(NewStringField("field", "", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.GetTerms("field");

            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); // accept ALL
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);

            TermsEnum te = terms.Intersect(ca, null);
            DocsEnum  de;

            Assert.AreEqual("", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            // pass empty string
            te = terms.Intersect(ca, new BytesRef(""));

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 7
0
        public override TermsEnum Intersect(CompiledAutomaton automaton, BytesRef bytes)
        {
            TermsEnum termsEnum = m_input.Intersect(automaton, bytes);

            Debug.Assert(termsEnum != null);
            Debug.Assert(bytes == null || bytes.IsValid());
            return(new AssertingAtomicReader.AssertingTermsEnum(termsEnum));
        }
Esempio n. 8
0
            public override TermsEnum Intersect(CompiledAutomaton automaton, BytesRef bytes)
            {
                TermsEnum termsEnum = @in.Intersect(automaton, bytes);

                Debug.Assert(termsEnum != null);
                Debug.Assert(bytes == null || bytes.Valid);
                return(new AssertingTermsEnum(termsEnum));
            }
Esempio n. 9
0
        public virtual void TestIntersectBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewTextField("field", "aaa", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bbb", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("field", "ccc", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader      sub       = GetOnlySegmentReader(r);
            Terms             terms     = sub.Fields.GetTerms("field");
            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te        = terms.Intersect(ca, null);

            Assert.AreEqual("aaa", te.Next().Utf8ToString());
            Assert.AreEqual(0, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("abc"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("aaa"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 10
0
        private bool Accepts(CompiledAutomaton c, BytesRef b)
        {
            int state = c.RunAutomaton.InitialState;

            for (int idx = 0; idx < b.Length; idx++)
            {
                Assert.IsTrue(state != -1);
                state = c.RunAutomaton.Step(state, b.Bytes[b.Offset + idx] & 0xff);
            }
            return(c.RunAutomaton.IsAccept(state));
        }
Esempio n. 11
0
        /// <summary>
        /// Construct an enumerator based upon an automaton, enumerating the specified
        /// field, working on a supplied <see cref="TermsEnum"/>
        /// <para/>
        /// @lucene.experimental
        /// </summary>
        /// <param name="tenum"> TermsEnum </param>
        /// <param name="compiled"> CompiledAutomaton </param>
        public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled)
            : base(tenum)
        {
            this.finite       = compiled.Finite;
            this.runAutomaton = compiled.RunAutomaton;
            Debug.Assert(this.runAutomaton != null);
            this.commonSuffixRef = compiled.CommonSuffixRef;
            this.allTransitions  = compiled.SortedTransitions;

            // used for path tracking, where each bit is a numbered state.
            visited = new long[runAutomaton.Count];

            termComp = Comparer;
        }
Esempio n. 12
0
        /// <summary>
        /// Return an automata-based enum for matching up to <paramref name="editDistance"/> from
        /// <paramref name="lastTerm"/>, if possible
        /// </summary>
        protected virtual TermsEnum GetAutomatonEnum(int editDistance, BytesRef lastTerm)
        {
            IList <CompiledAutomaton> runAutomata = InitAutomata(editDistance);

            if (editDistance < runAutomata.Count)
            {
                //if (BlockTreeTermsWriter.DEBUG) System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
                CompiledAutomaton compiled = runAutomata[editDistance];
                return(new AutomatonFuzzyTermsEnum(this, m_terms.Intersect(compiled, lastTerm == null ? null : compiled.Floor(lastTerm, new BytesRef())), runAutomata.SubList(0, editDistance + 1).ToArray(/*new CompiledAutomaton[editDistance + 1]*/)));
            }
            else
            {
                return(null);
            }
        }
Esempio n. 13
0
 /// <summary>
 /// Returns a <see cref="TermsEnum"/> that iterates over all terms that
 /// are accepted by the provided
 /// <see cref="CompiledAutomaton"/>.  If the <paramref name="startTerm"/> is
 /// provided then the returned enum will only accept terms
 /// &gt; <paramref name="startTerm"/>, but you still must call
 /// <see cref="TermsEnum.Next()"/> first to get to the first term.  Note that the
 /// provided <paramref name="startTerm"/> must be accepted by
 /// the automaton.
 ///
 /// <para><b>NOTE</b>: the returned <see cref="TermsEnum"/> cannot
 /// seek</para>.
 /// </summary>
 public virtual TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     // TODO: eventually we could support seekCeil/Exact on
     // the returned enum, instead of only being able to seek
     // at the start
     if (compiled.Type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
     {
         throw new System.ArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     if (startTerm == null)
     {
         return(new AutomatonTermsEnum(GetIterator(null), compiled));
     }
     else
     {
         return(new AutomatonTermsEnumAnonymousInnerClassHelper(this, GetIterator(null), compiled, startTerm));
     }
 }
Esempio n. 14
0
 /// <summary>
 /// Returns a TermsEnum that iterates over all terms that
 ///  are accepted by the provided {@link
 ///  CompiledAutomaton}.  If the <code>startTerm</code> is
 ///  provided then the returned enum will only accept terms
 ///  > <code>startTerm</code>, but you still must call
 ///  next() first to get to the first term.  Note that the
 ///  provided <code>startTerm</code> must be accepted by
 ///  the automaton.
 ///
 /// <p><b>NOTE</b>: the returned TermsEnum cannot
 /// seek</p>.
 /// </summary>
 public virtual TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     // TODO: eventually we could support seekCeil/Exact on
     // the returned enum, instead of only being able to seek
     // at the start
     if (compiled.Type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
     {
         throw new System.ArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     if (startTerm == null)
     {
         return new AutomatonTermsEnum(Iterator(null), compiled);
     }
     else
     {
         return new AutomatonTermsEnumAnonymousInnerClassHelper(this, Iterator(null), compiled, startTerm);
     }
 }
Esempio n. 15
0
        public virtual void TestIntersect()
        {
            for (int i = 0; i < numIterations; i++)
            {
                string                   reg       = AutomatonTestUtil.RandomRegexp(Random);
                Automaton                automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                CompiledAutomaton        ca        = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                TermsEnum                te        = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
                Automaton                expected  = BasicOperations.Intersection(termsAutomaton, automaton);
                JCG.SortedSet <BytesRef> found     = new JCG.SortedSet <BytesRef>();
                while (te.Next() != null)
                {
                    found.Add(BytesRef.DeepCopyOf(te.Term));
                }

                Automaton actual = BasicAutomata.MakeStringUnion(found);
                Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
            }
        }
Esempio n. 16
0
        public override TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
        {
            IList <MultiTermsEnum.TermsEnumIndex> termsEnums = new List <MultiTermsEnum.TermsEnumIndex>();

            for (int i = 0; i < subs.Length; i++)
            {
                TermsEnum termsEnum = subs[i].Intersect(compiled, startTerm);
                if (termsEnum != null)
                {
                    termsEnums.Add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
                }
            }

            if (termsEnums.Count > 0)
            {
                return((new MultiTermsEnum(subSlices)).Reset(termsEnums.ToArray(/*MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY*/)));
            }
            else
            {
                return(TermsEnum.EMPTY);
            }
        }
Esempio n. 17
0
        // following code is almost an exact dup of code from TestDuelingCodecs: sorry!

        public virtual void AssertTerms(Terms leftTerms, Terms rightTerms, bool deep)
        {
            if (leftTerms == null || rightTerms == null)
            {
                Assert.IsNull(leftTerms);
                Assert.IsNull(rightTerms);
                return;
            }
            AssertTermsStatistics(leftTerms, rightTerms);

            // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

            TermsEnum leftTermsEnum  = leftTerms.GetIterator(null);
            TermsEnum rightTermsEnum = rightTerms.GetIterator(null);

            AssertTermsEnum(leftTermsEnum, rightTermsEnum, true);

            AssertTermsSeeking(leftTerms, rightTerms);

            if (deep)
            {
                int numIntersections = AtLeast(3);
                for (int i = 0; i < numIntersections; i++)
                {
                    string            re        = AutomatonTestUtil.RandomRegexp(Random);
                    CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExpSyntax.NONE)).ToAutomaton());
                    if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
                    {
                        // TODO: test start term too
                        TermsEnum leftIntersection  = leftTerms.Intersect(automaton, null);
                        TermsEnum rightIntersection = rightTerms.Intersect(automaton, null);
                        AssertTermsEnum(leftIntersection, rightIntersection, Rarely());
                    }
                }
            }
        }
Esempio n. 18
0
        public virtual void TestIntersectRandom()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet <string>             terms        = new HashSet <string>();
            ICollection <string>         pendingTerms = new List <string>();
            IDictionary <BytesRef, int?> termToID     = new Dictionary <BytesRef, int?>();
            int id = 0;

            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random.Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[]         termsArray = new BytesRef[terms.Count];
            HashSet <BytesRef> termsSet   = new HashSet <BytesRef>();

            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.GetReader();

            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet <string>     acceptTerms       = new HashSet <string>();
                SortedSet <BytesRef> sortedAcceptTerms = new SortedSet <BytesRef>();
                double    keepPct = Random.NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random.NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random.NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[]         acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet <BytesRef> acceptTermsSet   = new HashSet <BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual   = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq);
                        docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
                public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance,
                    CompiledAutomaton compiled, BytesRef startTerm)
                {
                    this.outerInstance = outerInstance;
                    runAutomaton = compiled.RunAutomaton;
                    compiledAutomaton = compiled;
                    termOrd = -1;
                    states = new State[1];
                    states[0] = new State(this);
                    states[0].changeOrd = outerInstance.terms.Length;
                    states[0].state = runAutomaton.InitialState;
                    states[0].transitions = compiledAutomaton.SortedTransitions[states[0].state];
                    states[0].transitionUpto = -1;
                    states[0].transitionMax = -1;

                    //System.out.println("IE.init startTerm=" + startTerm);

                    if (startTerm != null)
                    {
                        int skipUpto = 0;
                        if (startTerm.Length == 0)
                        {
                            if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0)
                            {
                                termOrd = 0;
                            }
                        }
                        else
                        {
                            termOrd++;

                            for (int i = 0; i < startTerm.Length; i++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + i] & 0xFF;

                                while (label > states[i].transitionMax)
                                {
                                    states[i].transitionUpto++;
                                    Debug.Assert(states[i].transitionUpto < states[i].transitions.Length);
                                    states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min;
                                    states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max;
                                    Debug.Assert(states[i].transitionMin >= 0);
                                    Debug.Assert(states[i].transitionMin <= 255);
                                    Debug.Assert(states[i].transitionMax >= 0);
                                    Debug.Assert(states[i].transitionMax <= 255);
                                }

                                // Skip forwards until we find a term matching
                                // the label at this position:
                                while (termOrd < outerInstance.terms.Length)
                                {
                                    int skipOffset = outerInstance.skipOffsets[termOrd];
                                    int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset;
                                    int termOffset_i = outerInstance.termOffsets[termOrd];
                                    int termLength = outerInstance.termOffsets[1 + termOrd] - termOffset_i;

                                    // if (DEBUG) {
                                    //   System.out.println("  check termOrd=" + termOrd + " term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips) + " i=" + i);
                                    // }

                                    if (termOrd == states[stateUpto].changeOrd)
                                    {
                                        // if (DEBUG) {
                                        //   System.out.println("  end push return");
                                        // }
                                        stateUpto--;
                                        termOrd--;
                                        return;
                                    }

                                    if (termLength == i)
                                    {
                                        termOrd++;
                                        skipUpto = 0;
                                        // if (DEBUG) {
                                        //   System.out.println("    term too short; next term");
                                        // }
                                    }
                                    else if (label < (outerInstance.termBytes[termOffset_i + i] & 0xFF))
                                    {
                                        termOrd--;
                                        // if (DEBUG) {
                                        //   System.out.println("  no match; already beyond; return termOrd=" + termOrd);
                                        // }
                                        stateUpto -= skipUpto;
                                        Debug.Assert(stateUpto >= 0);
                                        return;
                                    }
                                    else if (label == (outerInstance.termBytes[termOffset_i + i] & 0xFF))
                                    {
                                        // if (DEBUG) {
                                        //   System.out.println("    label[" + i + "] matches");
                                        // }
                                        if (skipUpto < numSkips)
                                        {
                                            Grow();

                                            int nextState = runAutomaton.Step(states[stateUpto].state, label);

                                            // Automaton is required to accept startTerm:
                                            Debug.Assert(nextState != -1);

                                            stateUpto++;
                                            states[stateUpto].changeOrd = outerInstance.skips[skipOffset + skipUpto++];
                                            states[stateUpto].state = nextState;
                                            states[stateUpto].transitions =
                                                compiledAutomaton.SortedTransitions[nextState];
                                            states[stateUpto].transitionUpto = -1;
                                            states[stateUpto].transitionMax = -1;
                                            //System.out.println("  push " + states[stateUpto].transitions.length + " trans");

                                            // if (DEBUG) {
                                            //   System.out.println("    push skip; changeOrd=" + states[stateUpto].changeOrd);
                                            // }

                                            // Match next label at this same term:
                                            goto nextLabelContinue;
                                        }
                                        else
                                        {
                                            // if (DEBUG) {
                                            //   System.out.println("    linear scan");
                                            // }
                                            // Index exhausted: just scan now (the
                                            // number of scans required will be less
                                            // than the minSkipCount):

                                            int startTermOrd = termOrd;
                                            while (termOrd < outerInstance.terms.Length &&
                                                   outerInstance.Compare(termOrd, startTerm) <= 0)
                                            {
                                                Debug.Assert(termOrd == startTermOrd ||
                                                             outerInstance.skipOffsets[termOrd] ==
                                                             outerInstance.skipOffsets[termOrd + 1]);
                                                termOrd++;
                                            }
                                            Debug.Assert(termOrd - startTermOrd < outerInstance.minSkipCount);
                                            termOrd--;
                                            stateUpto -= skipUpto;
                                            // if (DEBUG) {
                                            //   System.out.println("  end termOrd=" + termOrd);
                                            // }
                                            return;
                                        }
                                    }
                                    else
                                    {
                                        if (skipUpto < numSkips)
                                        {
                                            termOrd = outerInstance.skips[skipOffset + skipUpto];
                                            // if (DEBUG) {
                                            //   System.out.println("  no match; skip to termOrd=" + termOrd);
                                            // }
                                        }
                                        else
                                        {
                                            // if (DEBUG) {
                                            //   System.out.println("  no match; next term");
                                            // }
                                            termOrd++;
                                        }
                                        skipUpto = 0;
                                    }
                                }

                                // startTerm is >= last term so enum will not
                                // return any terms:
                                termOrd--;
                                // if (DEBUG) {
                                //   System.out.println("  beyond end; no terms will match");
                                // }
                                return;
                                nextLabelContinue:
                                ;
                            }
                            nextLabelBreak:
                            ;
                        }

                        int termOffset = outerInstance.termOffsets[termOrd];
                        int termLen = outerInstance.termOffsets[1 + termOrd] - termOffset;

                        if (termOrd >= 0 &&
                            !startTerm.Equals(new BytesRef(outerInstance.termBytes, termOffset, termLen)))
                        {
                            stateUpto -= skipUpto;
                            termOrd--;
                        }
                        // if (DEBUG) {
                        //   System.out.println("  loop end; return termOrd=" + termOrd + " stateUpto=" + stateUpto);
                        // }
                    }
                }
Esempio n. 20
0
        public virtual void TestIntersectStartTerm()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(new LogDocMergePolicy());

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.Reader;
            w.Dispose();
            AtomicReader sub = GetOnlySegmentReader(r);
            Terms terms = sub.Fields.Terms("field");

            Automaton automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton();
            CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
            TermsEnum te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 21
0
 public AutomatonTermsEnumAnonymousInnerClassHelper(Terms outerInstance, Lucene.Net.Index.TermsEnum iterator, CompiledAutomaton compiled, BytesRef startTerm)
     : base(iterator, compiled)
 {
     this.outerInstance = outerInstance;
     this.startTerm     = startTerm;
 }
Esempio n. 22
0
        /// <summary>
        /// Terms api equivalency
        /// </summary>
        public void AssertTermsEquals(string info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, bool deep)
        {
            if (leftTerms == null || rightTerms == null)
            {
                Assert.IsNull(leftTerms, info);
                Assert.IsNull(rightTerms, info);
                return;
            }
            AssertTermsStatisticsEquals(info, leftTerms, rightTerms);
            Assert.AreEqual(leftTerms.HasOffsets(), rightTerms.HasOffsets());
            Assert.AreEqual(leftTerms.HasPositions(), rightTerms.HasPositions());
            Assert.AreEqual(leftTerms.HasPayloads(), rightTerms.HasPayloads());

            TermsEnum leftTermsEnum = leftTerms.Iterator(null);
            TermsEnum rightTermsEnum = rightTerms.Iterator(null);
            AssertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);

            AssertTermsSeekingEquals(info, leftTerms, rightTerms);

            if (deep)
            {
                int numIntersections = AtLeast(3);
                for (int i = 0; i < numIntersections; i++)
                {
                    string re = AutomatonTestUtil.RandomRegexp(Random());
                    CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton());
                    if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
                    {
                        // TODO: test start term too
                        TermsEnum leftIntersection = leftTerms.Intersect(automaton, null);
                        TermsEnum rightIntersection = rightTerms.Intersect(automaton, null);
                        AssertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, Rarely());
                    }
                }
            }
        }
Esempio n. 23
0
            public AutomatonFuzzyTermsEnum(FuzzyTermsEnum outerInstance, TermsEnum tenum, CompiledAutomaton[] compiled)
                : base(tenum, false)
            {
                this.OuterInstance = outerInstance;

                if (!InstanceFieldsInitialized)
                {
                    InitializeInstanceFields();
                    InstanceFieldsInitialized = true;
                }
                this.Matchers = new ByteRunAutomaton[compiled.Length];
                for (int i = 0; i < compiled.Length; i++)
                {
                    this.Matchers[i] = compiled[i].RunAutomaton;
                }
                TermRef = new BytesRef(outerInstance.Term_Renamed.Text());
            }
Esempio n. 24
0
 public AutomatonTermsEnumAnonymousInnerClassHelper(TermsEnum iterator, CompiledAutomaton compiled, BytesRef startTerm)
     : base(iterator, compiled)
 {
     this.startTerm = startTerm;
 }
Esempio n. 25
0
 public AutomatonTermsEnumAnonymousInnerClassHelper(Terms outerInstance, Lucene.Net.Index.TermsEnum iterator, CompiledAutomaton compiled, BytesRef startTerm)
     : base(iterator, compiled)
 {
     this.OuterInstance = outerInstance;
     this.StartTerm = startTerm;
 }
Esempio n. 26
0
 public override TermsEnum Intersect(CompiledAutomaton automaton, BytesRef bytes)
 {
     TermsEnum termsEnum = @in.Intersect(automaton, bytes);
     Debug.Assert(termsEnum != null);
     Debug.Assert(bytes == null || bytes.Valid);
     return new AssertingTermsEnum(termsEnum);
 }
Esempio n. 27
0
        public virtual void TestIntersectRandom()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int numTerms = AtLeast(300);
            //final int numTerms = 50;

            HashSet<string> terms = new HashSet<string>();
            ICollection<string> pendingTerms = new List<string>();
            IDictionary<BytesRef, int?> termToID = new Dictionary<BytesRef, int?>();
            int id = 0;
            while (terms.Count != numTerms)
            {
                string s = RandomString;
                if (!terms.Contains(s))
                {
                    terms.Add(s);
                    pendingTerms.Add(s);
                    if (Random().Next(20) == 7)
                    {
                        AddDoc(w, pendingTerms, termToID, id++);
                    }
                }
            }
            AddDoc(w, pendingTerms, termToID, id++);

            BytesRef[] termsArray = new BytesRef[terms.Count];
            HashSet<BytesRef> termsSet = new HashSet<BytesRef>();
            {
                int upto = 0;
                foreach (string s in terms)
                {
                    BytesRef b = new BytesRef(s);
                    termsArray[upto++] = b;
                    termsSet.Add(b);
                }
                Array.Sort(termsArray);
            }

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: indexed terms (unicode order):");
                foreach (BytesRef t in termsArray)
                {
                    Console.WriteLine("  " + t.Utf8ToString() + " -> id:" + termToID[t]);
                }
            }

            IndexReader r = w.Reader;
            w.Dispose();

            // NOTE: intentional insanity!!
            FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: can we also test infinite As here...?

                // From the random terms, pick some ratio and compile an
                // automaton:
                HashSet<string> acceptTerms = new HashSet<string>();
                SortedSet<BytesRef> sortedAcceptTerms = new SortedSet<BytesRef>();
                double keepPct = Random().NextDouble();
                Automaton a;
                if (iter == 0)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: empty automaton");
                    }
                    a = BasicAutomata.MakeEmpty();
                }
                else
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: keepPct=" + keepPct);
                    }
                    foreach (string s in terms)
                    {
                        string s2;
                        if (Random().NextDouble() <= keepPct)
                        {
                            s2 = s;
                        }
                        else
                        {
                            s2 = RandomString;
                        }
                        acceptTerms.Add(s2);
                        sortedAcceptTerms.Add(new BytesRef(s2));
                    }
                    a = BasicAutomata.MakeStringUnion(sortedAcceptTerms);
                }

                if (Random().NextBoolean())
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reduce the automaton");
                    }
                    a.Reduce();
                }

                CompiledAutomaton c = new CompiledAutomaton(a, true, false);

                BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.Count];
                HashSet<BytesRef> acceptTermsSet = new HashSet<BytesRef>();
                int upto = 0;
                foreach (string s in acceptTerms)
                {
                    BytesRef b = new BytesRef(s);
                    acceptTermsArray[upto++] = b;
                    acceptTermsSet.Add(b);
                    Assert.IsTrue(Accepts(c, b));
                }
                Array.Sort(acceptTermsArray);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: accept terms (unicode order):");
                    foreach (BytesRef t in acceptTermsArray)
                    {
                        Console.WriteLine("  " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : ""));
                    }
                    Console.WriteLine(a.ToDot());
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    BytesRef startTerm = acceptTermsArray.Length == 0 || Random().NextBoolean() ? null : acceptTermsArray[Random().Next(acceptTermsArray.Length)];

                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString()));

                        if (startTerm != null)
                        {
                            int state = c.RunAutomaton.InitialState;
                            for (int idx = 0; idx < startTerm.Length; idx++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff;
                                Console.WriteLine("  state=" + state + " label=" + label);
                                state = c.RunAutomaton.Step(state, label);
                                Assert.IsTrue(state != -1);
                            }
                            Console.WriteLine("  state=" + state);
                        }
                    }

                    TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm);

                    int loc;
                    if (startTerm == null)
                    {
                        loc = 0;
                    }
                    else
                    {
                        loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm));
                        if (loc < 0)
                        {
                            loc = -(loc + 1);
                        }
                        else
                        {
                            // startTerm exists in index
                            loc++;
                        }
                    }
                    while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]))
                    {
                        loc++;
                    }

                    DocsEnum docsEnum = null;
                    while (loc < termsArray.Length)
                    {
                        BytesRef expected = termsArray[loc];
                        BytesRef actual = te.Next();
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST:   next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString()));
                        }
                        Assert.AreEqual(expected, actual);
                        Assert.AreEqual(1, te.DocFreq());
                        docsEnum = TestUtil.Docs(Random(), te, null, docsEnum, DocsEnum.FLAG_NONE);
                        int docID = docsEnum.NextDoc();
                        Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]);
                        do
                        {
                            loc++;
                        } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc]));
                    }
                    Assert.IsNull(te.Next());
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 28
0
 public override TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     return(new IntersectTermsEnum(this, compiled, startTerm));
 }
Esempio n. 29
0
 public override TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     if (compiled.Type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
     {
         throw new System.ArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     return new IntersectEnum(this, compiled, startTerm);
 }
 public override TermsEnum Intersect(CompiledAutomaton compiled,
     BytesRef startTerm)
 {
     return _delegateTerms.Intersect(compiled, startTerm);
 }
        // following code is almost an exact dup of code from TestDuelingCodecs: sorry!

        public virtual void AssertTerms(Terms leftTerms, Terms rightTerms, bool deep)
        {
            if (leftTerms == null || rightTerms == null)
            {
                Assert.IsNull(leftTerms);
                Assert.IsNull(rightTerms);
                return;
            }
            AssertTermsStatistics(leftTerms, rightTerms);

            // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

            TermsEnum leftTermsEnum = leftTerms.Iterator(null);
            TermsEnum rightTermsEnum = rightTerms.Iterator(null);
            AssertTermsEnum(leftTermsEnum, rightTermsEnum, true);

            AssertTermsSeeking(leftTerms, rightTerms);

            if (deep)
            {
                int numIntersections = AtLeast(3);
                for (int i = 0; i < numIntersections; i++)
                {
                    string re = AutomatonTestUtil.RandomRegexp(Random());
                    CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton());
                    if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL)
                    {
                        // TODO: test start term too
                        TermsEnum leftIntersection = leftTerms.Intersect(automaton, null);
                        TermsEnum rightIntersection = rightTerms.Intersect(automaton, null);
                        AssertTermsEnum(leftIntersection, rightIntersection, Rarely());
                    }
                }
            }
        }
Esempio n. 32
0
        public virtual void TestIntersectEmptyString()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            doc.Add(NewStringField("field", "", Field.Store.NO));
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            // add empty string to both documents, so that singletonDocID == -1.
            // For a FST-based term dict, we'll expect to see the first arc is
            // flaged with HAS_FINAL_OUTPUT
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            doc.Add(NewStringField("field", "", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.Reader;
            w.Dispose();
            AtomicReader sub = GetOnlySegmentReader(r);
            Terms terms = sub.Fields.Terms("field");

            Automaton automaton = (new RegExp(".*", RegExp.NONE)).ToAutomaton(); // accept ALL
            CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);

            TermsEnum te = terms.Intersect(ca, null);
            DocsEnum de;

            Assert.AreEqual("", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsEnum.FLAG_NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsEnum.FLAG_NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            // pass empty string
            te = terms.Intersect(ca, new BytesRef(""));

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsEnum.FLAG_NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 33
0
 public override TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     return(new SortingTermsEnum(m_input.Intersect(compiled, startTerm), docMap, indexOptions));
 }
 public override TermsEnum Intersect(CompiledAutomaton compiled, BytesRef startTerm)
 {
     return new DirectIntersectTermsEnum(this, compiled, startTerm);
 }
 public override TermsEnum Intersect(CompiledAutomaton compiled,
                                     BytesRef startTerm)
 {
     return(_delegateTerms.Intersect(compiled, startTerm));
 }
Esempio n. 36
0
                // TODO: in some cases we can filter by length?  eg
                // regexp foo*bar must be at least length 6 bytes
                public IntersectEnum(BlockTreeTermsReader.FieldReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm)
                {
                    this.OuterInstance = outerInstance;
                    // if (DEBUG) {
                    //   System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
                    // }
                    runAutomaton = compiled.RunAutomaton;
                    CompiledAutomaton = compiled;
                    @in = (IndexInput)[email protected]();
                    Stack = new Frame[5];
                    for (int idx = 0; idx < Stack.Length; idx++)
                    {
                        Stack[idx] = new Frame(this, idx);
                    }
                    for (int arcIdx = 0; arcIdx < Arcs.Length; arcIdx++)
                    {
                        Arcs[arcIdx] = new FST<BytesRef>.Arc<BytesRef>();
                    }

                    if (outerInstance.Index == null)
                    {
                        FstReader = null;
                    }
                    else
                    {
                        FstReader = outerInstance.Index.BytesReader;
                    }

                    // TODO: if the automaton is "smallish" we really
                    // should use the terms index to seek at least to
                    // the initial term and likely to subsequent terms
                    // (or, maybe just fallback to ATE for such cases).
                    // Else the seek cost of loading the frames will be
                    // too costly.

                    FST<BytesRef>.Arc<BytesRef> arc = outerInstance.Index.GetFirstArc(Arcs[0]);
                    // Empty string prefix must have an output in the index!
                    Debug.Assert(arc.Final);

                    // Special pushFrame since it's the first one:
                    Frame f = Stack[0];
                    f.Fp = f.FpOrig = outerInstance.RootBlockFP;
                    f.Prefix = 0;
                    f.State = runAutomaton.InitialState;
                    f.Arc = arc;
                    f.OutputPrefix = arc.Output;
                    f.Load(outerInstance.RootCode);

                    // for assert:
                    Debug.Assert(SetSavedStartTerm(startTerm));

                    CurrentFrame = f;
                    if (startTerm != null)
                    {
                        SeekToStartTerm(startTerm);
                    }
                }
Esempio n. 37
0
                internal IntersectTermsEnum(FSTTermsReader.TermsReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) : base(outerInstance)
                {
                    this.outerInstance = outerInstance;
                    //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
                    this.fst        = outerInstance.dict;
                    this.fstReader  = fst.GetBytesReader();
                    this.fstOutputs = outerInstance.dict.Outputs;
                    this.fsa        = compiled.RunAutomaton;
                    this.level      = -1;
                    this.stack      = new Frame[16];
                    for (int i = 0; i < stack.Length; i++)
                    {
                        this.stack[i] = new Frame(this);
                    }

                    Frame frame;

                    frame = LoadVirtualFrame(NewFrame());
                    this.level++;
                    frame = LoadFirstFrame(NewFrame());
                    PushFrame(frame);

                    this.meta     = null;
                    this.metaUpto = 1;
                    this.decoded  = false;
                    this.pending  = false;

                    if (startTerm == null)
                    {
                        pending = IsAccept(TopFrame());
                    }
                    else
                    {
                        DoSeekCeil(startTerm);
                        pending = !startTerm.Equals(term) && IsValid(TopFrame()) && IsAccept(TopFrame());
                    }
                }
Esempio n. 38
0
 private bool Accepts(CompiledAutomaton c, BytesRef b)
 {
     int state = c.RunAutomaton.InitialState;
     for (int idx = 0; idx < b.Length; idx++)
     {
         Assert.IsTrue(state != -1);
         state = c.RunAutomaton.Step(state, b.Bytes[b.Offset + idx] & 0xff);
     }
     return c.RunAutomaton.IsAccept(state);
 }