internal ISet <Int32sRef> ToFiniteStrings(BytesRef surfaceForm, TokenStreamToAutomaton ts2a) { // Analyze surface form: Automaton automaton = null; TokenStream ts = indexAnalyzer.GetTokenStream("", surfaceForm.Utf8ToString()); try { // Create corresponding automaton: labels are bytes // from each analyzed token, with byte 0 used as // separator between tokens: automaton = ts2a.ToAutomaton(ts); } finally { IOUtils.DisposeWhileHandlingException(ts); } ReplaceSep(automaton); automaton = ConvertAutomaton(automaton); if (Debugging.AssertsEnabled) { Debugging.Assert(SpecialOperations.IsFinite(automaton)); } // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): // TODO: we could walk & add simultaneously, so we // don't have to alloc [possibly biggish] // intermediate HashSet in RAM: return(SpecialOperations.GetFiniteStrings(automaton, maxGraphExpansions)); }
public virtual void TestIntersect() { for (int i = 0; i < NumIterations; i++) { string reg = AutomatonTestUtil.RandomRegexp(Random()); Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false); TermsEnum te = MultiFields.GetTerms(Reader, "field").Intersect(ca, null); Automaton expected = BasicOperations.Intersection(TermsAutomaton, automaton); SortedSet <BytesRef> found = new SortedSet <BytesRef>(); while (te.Next() != null) { found.Add(BytesRef.DeepCopyOf(te.Term)); } Automaton actual = BasicAutomata.MakeStringUnion(found); Assert.IsTrue(BasicOperations.SameLanguage(expected, actual)); } }