예제 #1
0
        internal ISet <Int32sRef> ToFiniteStrings(BytesRef surfaceForm, TokenStreamToAutomaton ts2a)
        {
            // Analyze surface form:
            Automaton   automaton = null;
            TokenStream ts        = indexAnalyzer.GetTokenStream("", surfaceForm.Utf8ToString());

            try
            {
                // Create corresponding automaton: labels are bytes
                // from each analyzed token, with byte 0 used as
                // separator between tokens:
                automaton = ts2a.ToAutomaton(ts);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }

            ReplaceSep(automaton);
            automaton = ConvertAutomaton(automaton);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(SpecialOperations.IsFinite(automaton));
            }

            // Get all paths from the automaton (there can be
            // more than one path, eg if the analyzer created a
            // graph using SynFilter or WDF):

            // TODO: we could walk & add simultaneously, so we
            // don't have to alloc [possibly biggish]
            // intermediate HashSet in RAM:
            return(SpecialOperations.GetFiniteStrings(automaton, maxGraphExpansions));
        }
예제 #2
0
        public virtual void TestIntersect()
        {
            for (int i = 0; i < NumIterations; i++)
            {
                string               reg       = AutomatonTestUtil.RandomRegexp(Random());
                Automaton            automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                CompiledAutomaton    ca        = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                TermsEnum            te        = MultiFields.GetTerms(Reader, "field").Intersect(ca, null);
                Automaton            expected  = BasicOperations.Intersection(TermsAutomaton, automaton);
                SortedSet <BytesRef> found     = new SortedSet <BytesRef>();
                while (te.Next() != null)
                {
                    found.Add(BytesRef.DeepCopyOf(te.Term));
                }

                Automaton actual = BasicAutomata.MakeStringUnion(found);
                Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
            }
        }