Automaton
RegExp
toAutomaton(null)
private void FindLeaves(RegExp exp, Kind kind, IList <Automaton> list, IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { if (exp.kind == kind) { FindLeaves(exp.exp1, kind, list, automata, automaton_provider); FindLeaves(exp.exp2, kind, list, automata, automaton_provider); } else { list.Add(exp.ToAutomaton(automata, automaton_provider)); } }
public void TestSpecialCase3() { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); }
public void TestSpecialCase2() { RegExp re = new RegExp(".+\u0775"); string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails! }
public void TestSpecialCase() { RegExp re = new RegExp(".?"); Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); // make sure character dfa accepts empty string Assert.IsTrue(cra.IsAccept(cra.InitialState)); Assert.IsTrue(cra.Run("")); Assert.IsTrue(cra.Run(new char[0], 0, 0)); // make sure byte dfa accepts empty string Assert.IsTrue(bra.IsAccept(bra.InitialState)); Assert.IsTrue(bra.Run(new byte[0], 0, 0)); }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); RandomAcceptedStrings rx = new RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception /*t*/) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random()); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception t) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw t; } } } }
private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { IList <Automaton> list; Automaton a = null; switch (kind) { case Kind.REGEXP_UNION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider); a = BasicOperations.Union(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CONCATENATION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); a = BasicOperations.Concatenate(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_INTERSECTION: a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider)); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_OPTIONAL: a = exp1.ToAutomaton(automata, automaton_provider).Optional(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MIN: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MINMAX: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_COMPLEMENT: a = exp1.ToAutomaton(automata, automaton_provider).Complement(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CHAR: a = BasicAutomata.MakeChar(c); break; case Kind.REGEXP_CHAR_RANGE: a = BasicAutomata.MakeCharRange(from, to); break; case Kind.REGEXP_ANYCHAR: a = BasicAutomata.MakeAnyChar(); break; case Kind.REGEXP_EMPTY: a = BasicAutomata.MakeEmpty(); break; case Kind.REGEXP_STRING: a = BasicAutomata.MakeString(s); break; case Kind.REGEXP_ANYSTRING: a = BasicAutomata.MakeAnyString(); break; case Kind.REGEXP_AUTOMATON: Automaton aa = null; if (automata != null) { aa = automata[s]; } if (aa == null && automaton_provider != null) { try { aa = automaton_provider.GetAutomaton(s); } catch (Exception e) when(e.IsIOException()) { throw new ArgumentException(e.ToString(), e); } } if (aa == null) { throw new ArgumentException("'" + s + "' not found"); } a = (Automaton)aa.Clone(); // always clone here (ignore allow_mutate) break; case Kind.REGEXP_INTERVAL: a = BasicAutomata.MakeInterval(min, max, digits); break; } return(a); }
public void TestSpecialCase3() { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); var bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); }
public void TestSpecialCase2() { RegExp re = new RegExp(".+\u0775"); string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); var bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails! }
private void FindLeaves(RegExp exp, Kind kind, IList<Automaton> list, IDictionary<string, Automaton> automata, AutomatonProvider automaton_provider) { if (exp.kind == kind) { FindLeaves(exp.Exp1, kind, list, automata, automaton_provider); FindLeaves(exp.Exp2, kind, list, automata, automaton_provider); } else { list.Add(exp.ToAutomaton(automata, automaton_provider)); } }
internal DumbRegexpQuery(Term term, RegExpSyntax flags) : base(term.Field) { RegExp re = new RegExp(term.Text, flags); automaton = re.ToAutomaton(); }