static YieldTypeChecker() { yieldTypeCheckerAutomatonSolver = new CharSetSolver(BitWidth.BV7); yieldTypeCheckerAutomaton = Automaton<BvSet>.MkProduct(yieldTypeCheckerAutomatonSolver.Convert(yieldTypeCheckerRegex), yieldTypeCheckerAutomatonSolver.Convert(@"^[1-9A-D]*$"), // result of product with this Automaton provides us //an automaton that has (*) existence alphanum chars in our property automaton yieldTypeCheckerAutomatonSolver); minimizedTypeCheckerAutomaton = yieldTypeCheckerAutomaton.Determinize(yieldTypeCheckerAutomatonSolver).Minimize(yieldTypeCheckerAutomatonSolver); #if DEBUG && !DEBUG_DETAIL yieldTypeCheckerAutomatonSolver.ShowGraph(minimizedTypeCheckerAutomaton, "minimizedPropertyAutomaton.dgml"); #endif }
public void intEq2() { var solver = new CharSetSolver(BitWidth.BV64); List<char> alph = new List<char> { 'a', 'b', 'c' }; HashSet<char> al = new HashSet<char>(alph); PDLPred phi = new PDLIntEq(new PDLAllPos(), 2); StringBuilder sb = new StringBuilder(); phi.ToMSO(new FreshGen()).ToString(sb); System.Console.WriteLine(sb); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^(a|b|c){2}$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); ////string file = "../../../TestPDL/DotFiles/IntEq2"; //solver.SaveAsDot(dfa, "aut", file); }
public MoveSequence(string regex) { solver = new CharSetSolver(BitWidth.BV7); moveAutomaton = solver.Convert("^(" + regex + ")$").Determinize(solver).Minimize(solver); currentState = 0; //solver.ShowGraph(moveAutomaton, "D"); //ComputeDeadStates(); }
public void TestLoopThatStartsWith0() { string regex = @"(w(a|bc){0,2})"; CharSetSolver css = new CharSetSolver(BitWidth.BV7); var aut = css.Convert(regex,RegexOptions.Singleline,true); //css.ShowGraph(aut, "CornerCase"); var str = "w.-J_"; var actual = css.Accepts(aut, str); var expected = Regex.IsMatch(str, regex); Assert.AreEqual(expected, actual); }
static void Main(string[] args) { HashSet<char> al = new HashSet<char>(new char[]{'a','b'}); CharSetSolver solver = new CharSetSolver(BitWidth.BV64); string rexpr = "a|b"; var escapedRexpr = string.Format("^{0}$", rexpr); Automaton<BDD> aut = null; try { aut = solver.Convert(escapedRexpr); } catch (AutomataException e) { throw new PDLException("The input is not a well formatted regular expression."+e.Message); } var diff = aut.Minus(solver.Convert("^c$"), solver); if (!diff.IsEmpty) throw new PDLException("The regular expression should only accept strings over (a|b)*."); var auttt = new Pair<HashSet<char>, Automaton<BDD>>(al, aut); }
public void TestNFA2() { CharSetSolver solver = new CharSetSolver(BitWidth.BV7); var a = solver.MkCharConstraint('a'); var na = solver.MkNot(a); var nfa = Automaton<BDD>.Create(solver, 0, new int[] { 1 }, new Move<BDD>[] { new Move<BDD>(0, 1, solver.True), new Move<BDD>(0, 2, solver.True), new Move<BDD>(2, 1, solver.True), new Move<BDD>(1, 1, a), new Move<BDD>(1, 2, na) }); var min_nfa = nfa.Minimize(); nfa.isDeterministic = true; //pretend that nfa is equivalent, causes the deterministic version to be executed that provides the wrong result var min_nfa_wrong = nfa.Minimize(); nfa.isDeterministic = false; min_nfa_wrong.isDeterministic = false; //min_nfa.ShowGraph("min_nfa"); //min_nfa_wrong.ShowGraph("min_nfa_wrong"); //min_nfa.Determinize().Minimize().ShowGraph("min_nfa1"); //nfa.Determinize().Minimize().ShowGraph("dfa"); //nfa.ShowGraph("nfa"); //min_nfa_wrong.Determinize().Minimize().ShowGraph("min_nfa2"); Assert.IsFalse(min_nfa.IsEquivalentWith(min_nfa_wrong)); Assert.IsTrue(min_nfa.IsEquivalentWith(nfa)); //concrete witness "abab" distinguishes nfa from min_nfa_wrong Assert.IsTrue(solver.Convert("^abab$").Intersect(nfa).IsEmpty); Assert.IsFalse(solver.Convert("^abab$").Intersect(min_nfa_wrong).IsEmpty); }
public void MkCheckDeterminismTest2() { var converter = new CharSetSolver(BitWidth.BV7); var regex = @"^[\0-\x7E]*(([01]|01)0)$"; var fa = converter.Convert(regex, System.Text.RegularExpressions.RegexOptions.None); fa = fa.RemoveEpsilons(); fa.CheckDeterminism(); Assert.IsFalse(fa.IsDeterministic, "fa expected to be nondeterministic"); var fad = fa.Determinize(); //converter.ShowGraph(fa, "fa.dgml"); //converter.ShowGraph(fad, "fad.dgml"); fad.CheckDeterminism(true); Assert.IsTrue(fad.IsDeterministic, "fa expected to be deterministic"); }
public void EDTestNew() { CharSetSolver solver = new CharSetSolver(); string a = "absabaasd"; var aut = solver.Convert("^((ab|b){1,2}cc)*$").Determinize().Minimize(); int dist; var output = EditDistance.GetClosestElement(a, aut, solver, out dist); Console.WriteLine("string: {0}, distance: {1}", output, dist); Assert.IsTrue(dist == 5); output = EditDistance.GetClosestElement("aba", aut, solver, out dist); Console.WriteLine("string: {0}, distance: {1}", output, dist); Assert.IsTrue(dist == 2); }
public void EDTest2() { CharSetSolver solver = new CharSetSolver(); string a = "aa"; var aut = solver.Convert("^(a|b){3}$").Determinize().Minimize(); int dist; var output = EditDistance.GetClosestElement(a, aut, solver, out dist); Console.WriteLine("string: {0}, distance: {1}", output, dist); Assert.IsTrue(dist == 1); output = EditDistance.GetClosestElement("bc", aut, solver, out dist); Console.WriteLine("string: {0}, distance: {1}", output, dist); Assert.IsTrue(dist == 2); }
public void CheckDeterminize() { var solver = new CharSetSolver(); string regex = @"^[0-9]+(?i:[a-d])$|^[0-9]+[a-dA-D]{2}$"; var fa = solver.Convert(regex, System.Text.RegularExpressions.RegexOptions.Singleline); //fa.ShowGraph("fa"); var fadet = fa.Determinize().Normalize(); //fadet.ShowGraph("fadet"); //fadet.Normalize().ShowGraph("fadet_norm"); var cs = fadet.Compile(); //var aut = cs.Automaton; //Assert.IsFalse(fadet.IsFinalState(fadet.Transition(0, "ab01".ToCharArray()))); //Assert.IsTrue(fadet.IsFinalState(fadet.Transition(0, "01ab".ToCharArray()))); //Assert.IsFalse(fadet.IsFinalState(fadet.Transition(0, "0881abc".ToCharArray()))); //Assert.IsTrue(fadet.IsFinalState(fadet.Transition(0, "0881ac".ToCharArray()))); //Assert.IsFalse(fadet.IsFinalState(fadet.Transition(0, "013333a.".ToCharArray()))); }
public void TestTrivialWordBoundary4() { Regex r = new Regex(@"^\b[A@]\b$", RegexOptions.None); CharSetSolver css = new CharSetSolver(BitWidth.BV16); var aut = css.Convert(r.ToString(), RegexOptions.Singleline, true); //css.ShowGraph(aut, "TrivialWordBoundary4_with_b"); css.RegexConverter.EliminateBoundaryStates(aut); var aut1 = aut.RemoveEpsilons().Determinize().Minimize(); //css.ShowGraph(aut, "TrivialWordBoundary4"); string s = "@"; bool ismatchExpected = r.IsMatch(s); bool ismatchActual = css.Accepts(aut1, s); CheckValidity(css, aut, r); }
public void Utf8regex() { var solver = new CharSetSolver(BitWidth.BV16); var one = @"[\0-\x7F]"; var two = @"[\xC2-\xDF][\x80-\xBF]"; var thr = @"(\xE0[\xA0-\xBF]|\xED[\x80-\x9F]|[\xE1-\xEC\xEE\xEF][\x80-\xBF])[\x80-\xBF]"; var fou = @"(\xF0[\x90-\x9F]|\xF4[\x80-\x8F]|[\xF1-\xF3][\x80-\xBF])[\x80-\xBF]{2}"; var regex = string.Format("^({0}|{1}|{2}|{3})*$", one, two, thr, fou); var aut = solver.Convert(regex); var aut2 = aut.Determinize().MinimizeHopcroft(); //var aut22 = aut.Determinize(solver).Minimize2(solver); //var aut3 = aut2.Complement(solver); //solver.ShowGraph(aut, "Utf8"); //solver.ShowGraph(aut2, "aut2"); //solver.ShowGraph(aut22, "aut22"); //solver.ShowGraph(aut3, "Utf8compl"); }
public void CompileTest() { var solver = new CharSetSolver(); string regex = @"ab|ac"; var fa = solver.Convert(regex); var fadet = fa.Determinize().Minimize(); //fadet.ShowGraph("fadet"); var cs = fadet.Compile(); var aut = cs.Automaton; Assert.AreEqual <int>(aut.Transition(0, 'd'), 0); Assert.AreEqual <int>(aut.Transition(0, 'c', 'a'), 1); Assert.AreEqual <int>(aut.Transition(1, 'b', 'f', 'o', 'o'), 2); Assert.IsTrue(aut.IsFinalState(2)); Assert.IsTrue(aut.IsSinkState(2)); Assert.IsFalse(aut.IsSinkState(1)); Assert.IsFalse(aut.IsFinalState(0)); }
public void mod6() { var solver = new CharSetSolver(BitWidth.BV64); List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); PDLPred phi = new PDLModSetEq(new PDLAllPos(), 6, 3); StringBuilder sb = new StringBuilder(); phi.ToMSO(new FreshGen()).ToString(sb); System.Console.WriteLine(sb); Stopwatch sw = new Stopwatch(); sw.Start(); var dfa = phi.GetDFA(al, solver); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds); //Stopwatch sw1 = new Stopwatch(); //sw1.Start(); //var dfa1 = phi.GetDFA(al, solver); //sw1.Stop(); //Console.WriteLine(sw1.ElapsedMilliseconds); var test = solver.Convert(@"^((a|b){6})*(a|b){3}$").Determinize(solver).Minimize(solver); //string file = "../../../TestPDL/DotFiles/mod6"; //solver.SaveAsDot(dfa, "aut", file); // solver.SaveAsDot(test, "aut", file+"t"); Console.Write(phi.ToMSO().ToString()); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); }
public void WS1SSubset() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); WS1SFormula f1 = new WS1SUnaryPred("X", solver.MkCharConstraint(false, 'a')); WS1SFormula f2 = new WS1SUnaryPred("Y1", solver.MkCharConstraint(false, 'b')); WS1SFormula f3 = new WS1SUnaryPred("Z", solver.MkCharConstraint(false, 'a')); WS1SFormula f = new WS1SAnd(f1, new WS1SAnd(f2, f3)); WS1SFormula s1 = new WS1SSucc("X", "Y1"); WS1SFormula s2 = new WS1SSucc("Y2", "Z"); WS1SFormula s3 = new WS1SSubset("Y1", "Y2"); WS1SFormula s = new WS1SAnd(new WS1SAnd(s1, s2), s3); WS1SFormula phi = new WS1SExists("X", new WS1SExists("Y1", new WS1SExists("Y2", new WS1SExists("Z", new WS1SAnd(f, s))))); WS1SFormula phit = new WS1SExists("X", new WS1SExists("Y", new WS1SSubset("X", "Y"))); var dd = phit.getDFA(al, solver); //solver.SaveAsDot(phit.getDFA(al, solver), "bla","bla.dot"); var dfa = phi.getDFA(al, solver); var test = solver.Convert(@"^(a|b)*aba(a|b)*$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); string file = "../../../MSOZ3Test/DotFiles/aba"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing }
public void WS1SSingleton() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List<char> alph = new List<char> { 'a', 'b' }; HashSet<char> al = new HashSet<char>(alph); WS1SFormula f1 = new WS1SSingleton("X"); WS1SFormula phi = new WS1SExists("X", f1); var dfa = phi.getDFA(al, solver); var test = solver.Convert(@"^(a|b)+$").Determinize(solver).Minimize(solver); string file = "../../../MSOZ3Test/DotFiles/singletona"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); }
public void FirstLast() { var solver = new CharSetSolver(BitWidth.BV64); List<char> alph = new List<char> { 'a', 'b' }; HashSet<char> al = new HashSet<char>(alph); PDLPred phi = new PDLIsSuccessor(new PDLFirst(), new PDLLast()); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^(a|b){2}$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); //string file = "../../../TestPDL/DotFiles/FirstLast"; //solver.SaveAsDot(dfa, "aut", file); }
public void Exists() { var solver = new CharSetSolver(BitWidth.BV64); List<char> alph = new List<char> { 'a', 'b' }; HashSet<char> al = new HashSet<char>(alph); PDLPred phi = new PDLExistsFO("x", new PDLAtPos('a', new PDLPosVar("x"))); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^b*a(a|b)*$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); ////string file = "../../../TestPDL/DotFiles/exists"; ////solver.SaveAsDot(dfa, "aut", file); }
//[TestMethod] public void TestC4Cregexes() { string[] regexes = File.ReadAllLines(@"..\..\..\Samples\C4C\mathiasbynens_url_regex.txt"); CharSetSolver css = new CharSetSolver(BitWidth.BV7); var notsupportedcases = new HashSet<int>(new int[] { 1, //uses require (?=) 4, //uses prevent (?!) 9, //uses lazy a*? 11, //uses prevent (?!) }); for (int i = 0; i < regexes.Length; i++) { if (!notsupportedcases.Contains(i)) { string regex = regexes[i]; var aut = css.Convert(regex); CheckValidity(css, aut, new Regex(regex, RegexOptions.Singleline)); } } }
public void WS1SSucc() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List<char> alph = new List<char> { 'a', 'b' }; HashSet<char> al = new HashSet<char>(alph); WS1SFormula f3 = new WS1SSucc("X", "Y"); WS1SFormula phi = new WS1SExists("X", new WS1SExists("Y", f3)); var dfa = phi.getDFA(al, solver); var test = solver.Convert(@"^(a|b){2,}$"); string file = "../../../MSOZ3Test/DotFiles/containsab"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); }
public void Exists() { var solver = new CharSetSolver(BitWidth.BV64); List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); PDLPred phi = new PDLExistsFO("x", new PDLAtPos('a', new PDLPosVar("x"))); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^b*a(a|b)*$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); ////string file = "../../../TestPDL/DotFiles/exists"; ////solver.SaveAsDot(dfa, "aut", file); }
public void WS1STrue() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); WS1SFormula phi = new WS1SExists("X", new WS1STrue()); var dfa = phi.getDFA(al, solver); var test = solver.Convert(@"^(a|b)*$"); string file = "../../../MSOZ3Test/DotFiles/true"; solver.SaveAsDot(dfa, "true", file); //extension .dot is added automatically when missing Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); }
public void SCCTest1() { CharSetSolver solver = new CharSetSolver(); var aut = solver.Convert("^a(ab)*$").Determinize().Minimize(); var sccs = GraphAlgorithms.GetStronglyConnectedComponents(aut); List<int> total = new List<int>(); foreach (var scc in sccs) { Console.WriteLine(); foreach (var st in scc) { total.Add(st); Console.Write(st + ","); } } Assert.IsTrue(sccs.ToArray().Length == 2); Assert.IsTrue(total.Count == aut.StateCount); }
public void FirstLast() { var solver = new CharSetSolver(BitWidth.BV64); List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); PDLPred phi = new PDLIsSuccessor(new PDLFirst(), new PDLLast()); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^(a|b){2}$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); //string file = "../../../TestPDL/DotFiles/FirstLast"; //solver.SaveAsDot(dfa, "aut", file); }
//[TestMethod] public void TestC4Cregexes() { string[] regexes = File.ReadAllLines(@"..\..\..\Samples\C4C\mathiasbynens_url_regex.txt"); CharSetSolver css = new CharSetSolver(BitWidth.BV7); var notsupportedcases = new HashSet <int>(new int[] { 1, //uses require (?=) 4, //uses prevent (?!) 9, //uses lazy a*? 11, //uses prevent (?!) }); for (int i = 0; i < regexes.Length; i++) { if (!notsupportedcases.Contains(i)) { string regex = regexes[i]; var aut = css.Convert(regex); CheckValidity(css, aut, new Regex(regex, RegexOptions.Singleline)); } } }
public void gen_csharp_TestRegex2csharp() { var solver = new CharSetSolver(); string regex = @"^(\w\d)+$"; var sfa = solver.Convert(regex, RegexOptions.Singleline).RemoveEpsilons(); var sfaDet = sfa.Determinize(); var sfaMin = sfaDet.Minimize(); //solver.ShowGraph(sfa, "sfa"); //solver.ShowGraph(sfaDet, "sfaDet"); //solver.ShowGraph(sfaMin, "sfaMin"); var cs = solver.ToCS(sfaMin, true, "Regex1", "RegexTransfomer"); var yes = cs.IsMatch("a1b2b4"); var no = cs.IsMatch("r5t6uu"); //Console.WriteLine(cs.SourceCode); //Console.ReadLine(); Assert.IsTrue(yes); Assert.IsFalse(no); }
public void WS1SLabelTest() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); WS1SFormula f = new WS1SUnaryPred("X", solver.MkCharConstraint(false, 'a')); WS1SFormula phi = new WS1SExists("X", f); var dfa = phi.getDFA(al, solver); var test = solver.Convert(@"^(a|b)*$").Determinize(solver).Minimize(solver); string file = "../../../MSOZ3Test/DotFiles/sigmastar"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); }
public void MSOLast() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List<char> alph = new List<char> { 'a', 'b' }; HashSet<char> al = new HashSet<char>(alph); //ex x. first(x) MSOFormula formula = new MSOExistsFO("x", new MSOLast("x")); Assert.IsTrue(formula.CheckUseOfVars()); var dfa = formula.getDFA(al, solver); var test = solver.Convert(@"^(a|b)+$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); string file = "../../../MSOZ3Test/DotFiles/exlast"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing }
public void CompileEvilRegex() { var regex = @"^(([a-z])+.)+[A-Z]([a-z])+$"; Regex EvilRegex = new Regex(regex, RegexOptions.Compiled | (RegexOptions.Singleline)); string a = "aaaaaaaaaaaaaaaaaaaa"; //takes time exponential in the length of a int t = 0; for (int i = 0; i < 10; i++) { t = System.Environment.TickCount; EvilRegex.IsMatch(a); t = System.Environment.TickCount - t; a += "a"; } Assert.IsTrue(t > 100); var solver = new CharSetSolver(); var fa = solver.Convert(regex); var fadet = fa.Determinize().Minimize(); var cs = fadet.Compile(); //fadet.ShowGraph("EvilRegex"); }
public void SCCTest1() { CharSetSolver solver = new CharSetSolver(); var aut = solver.Convert("^a(ab)*$").Determinize().Minimize(); var sccs = GraphAlgorithms.GetStronglyConnectedComponents(aut); List <int> total = new List <int>(); foreach (var scc in sccs) { Console.WriteLine(); foreach (var st in scc) { total.Add(st); Console.Write(st + ","); } } Assert.IsTrue(sccs.ToArray().Length == 2); Assert.IsTrue(total.Count == aut.StateCount); }
public void FirstLastEq() { var solver = new CharSetSolver(BitWidth.BV64); List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); PDLPred phi = new PDLPosEq(new PDLFirst(), new PDLLast()); var dfa = phi.GetDFA(al, solver); var test = solver.Convert(@"^(a|b){1}$"); Console.WriteLine(phi.ToMSO(new FreshGen()).ToWS1S(solver).ToString()); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); //string file = "../../../TestPDL/DotFiles/FirstLastEq"; //solver.SaveAsDot(dfa, "aut", file); }
public void TestBVAlgebraBasicOperations() { var css = new CharSetSolver(); var regexa = new Regex("(?i:a)"); var aut = css.Convert("(?i:a)"); var aut_minterms = css.RegexConverter.ConvertToSymbolicRegex(regexa, true).ComputeMinterms(); var aut_bva = BVAlgebra.Create(css, aut_minterms); var aut_BV = aut.ReplaceAlgebra <BV>(bdd => aut_bva.MapPredToBV(bdd, aut_minterms), aut_bva); //aut_BV.ShowGraph("aut_BV"); var aut_BV_det = aut_BV.Determinize().Minimize(); //aut_BV_det.ShowGraph("aut_BV_det"); Assert.AreEqual <int>(2, aut_BV_det.StateCount); Assert.AreEqual <int>(3, aut_BV_det.MoveCount); var a_bv = aut_bva.MkCharConstraint('a'); var a_id = aut_bva.GetIdOfChar('a'); var A_id = aut_bva.GetIdOfChar('A'); Assert.AreEqual <int>(a_id, A_id); Assert.AreEqual <BV>(a_bv, aut_bva.atoms[a_id]); Assert.AreEqual <int>(3, aut_BV_det.MoveCount); Assert.AreEqual <int>(2, aut_bva.atoms.Length); }
public void MSOLast() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List <char> alph = new List <char> { 'a', 'b' }; HashSet <char> al = new HashSet <char>(alph); //ex x. first(x) MSOFormula formula = new MSOExistsFO("x", new MSOLast("x")); Assert.IsTrue(formula.CheckUseOfVars()); var dfa = formula.getDFA(al, solver); var test = solver.Convert(@"^(a|b)+$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); string file = "../../../MSOZ3Test/DotFiles/exlast"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing }
public void LorisTest() { HashSet<char> al = new HashSet<char>(new char[] { 'a', 'b' }); CharSetSolver solver = new CharSetSolver(BitWidth.BV64); string rexpr1 = "(a|b)*"; var escapedRexpr = string.Format("^({0})$", rexpr1); Automaton<BDD> aut1 = null; try { aut1 = solver.Convert(escapedRexpr); } catch (ArgumentException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } catch (AutomataException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } var diff = aut1.Minus(solver.Convert("^(a|b)*$"), solver); if (!diff.IsEmpty) throw new PDLException("The regular expression should only accept strings over (a|b)*."); string rexpr2 = "(a|b)+"; escapedRexpr = string.Format("^({0})$", rexpr2); Automaton<BDD> aut2 = null; try { aut2 = solver.Convert(escapedRexpr); } catch (ArgumentException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } catch (AutomataException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } diff = aut2.Minus(solver.Convert("^(a|b)*$"), solver); if (!diff.IsEmpty) throw new PDLException("The regular expression should only accept strings over (a|b)*."); var feedbackGrade = DFAGrading.GetGrade(aut1, aut2, al, solver, 2000, 10, FeedbackLevel.Solution, false, true, true); var feedString = "<ul>"; foreach (var feed in feedbackGrade.Second) feedString += string.Format("<li>{0}</li>", feed); feedString += "</ul>"; Console.WriteLine(string.Format("<div>Grade: {0} <br /> Feedback: {1}</div>", feedbackGrade.First, feedString)); }
//private static void TestIgnoreCase() //{ // Microsoft.Automata.Utilities.IgnoreCaseRelationGenerator.Generate( // "Microsoft.Automata.Generated", // "IgnoreCaseRelation", // @"C:\GitHub\AutomataDotNet\Automata\src\Automata\Internal\Generated"); //} static void TestCppCodeGen(Regex[] regexes) { Automaton <BDD>[] automata = new Automaton <BDD> [regexes.Length]; Automaton <BDD>[] Cautomata = new Automaton <BDD> [regexes.Length]; var solver = new CharSetSolver(); #region convert the regexes to automata Console.Write("Converting {0} regexes to automata and minimizing the automata ...", regexes.Length); int t = System.Environment.TickCount; Func <Automaton <BDD>, bool> IsFull = (a => a.StateCount == 1 && a.IsFinalState(a.InitialState) && a.IsLoopState(a.InitialState) && a.GetMovesCountFrom(a.InitialState) == 1 && a.GetMoveFrom(a.InitialState).Label.Equals(solver.True)); for (int i = 0; i < regexes.Length; i++) { try { var aut = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); automata[i] = aut; if (IsFull(automata[i]) || automata[i].IsEmpty) { Console.WriteLine("\nReplacing trivial regex \"{0}\" with \"^dummy$\"", i, regexes[i]); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } catch (Exception e) { Console.WriteLine("\nCoverting regex {0}: '{1}' failed, reason: {2}, replacing with \"^dummy$\"", i, regexes[i], e.Message); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region complement the automata t = System.Environment.TickCount; Console.Write("Creating complements of autmata ..."); for (int i = 0; i < regexes.Length; i++) { Cautomata[i] = automata[i].Complement().Minimize(); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate positive test strings Console.Write(string.Format("Generating a positive test set for all automata ", NrOfStrings)); t = System.Environment.TickCount; List <string[]> members = new List <string[]>(); List <string[]> Cmembers = new List <string[]>(); for (int id = 0; id < automata.Length; id++) { Console.Write("."); var M = automata[id].Intersect(solver.Convert("^[\0-\x7F]{0," + CodeGenTests.MaxStringLength + "}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; int time = System.Environment.TickCount; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } time = System.Environment.TickCount - time; members.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate negative test strings t = System.Environment.TickCount; Console.Write(string.Format("Generating a negative test set for all automata ", NrOfStrings)); for (int id = 0; id < Cautomata.Length; id++) { Console.Write("."); //var M = Cautomata[id].Intersect(solver.Convert("^[^\uD800-\uDFFF]{0,100}$", RegexOptions.Singleline), solver).Determinize(solver); var M = Cautomata[id].Intersect(solver.Convert("^[\0-\uFFFF]{0,100}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } Cmembers.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate c++ int t2 = System.Environment.TickCount; CppTest.Compile(automata, solver, true); t2 = System.Environment.TickCount - t2; Console.WriteLine(string.Format(" done ({0}ms)", t2)); #endregion #region convert the test strings to UTF8 List <byte[][]> membersUTF8 = new List <byte[][]>(); List <byte[][]> CmembersUTF8 = new List <byte[][]>(); for (int id = 0; id < automata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = Encoding.UTF8.GetBytes(members[id][i]); } membersUTF8.Add(tmp); } for (int id = 0; id < Cautomata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = Encoding.UTF8.GetBytes(Cmembers[id][i]); } CmembersUTF8.Add(tmp); } #endregion #region compute tot nr of bits double bits = 0; for (int id = 0; id < automata.Length; id++) { int nrBytes = 0; for (int i = 0; i < NrOfStrings; i++) { nrBytes += membersUTF8[id][i].Length + CmembersUTF8[id][i].Length; } bits += (nrBytes * 8.0); } bits = bits * CodeGenTests.Repetitions; //repeated Reps times #endregion #region run c++ tests Console.Write("Running c++ tests ... "); double totsec_cpp = 0; for (int id = 0; id < automata.Length; id++) { double sec_cpp = 0; int accepted = CppTest.Test(true, id, membersUTF8[id], members[id], out sec_cpp); totsec_cpp += sec_cpp; int Caccepted = CppTest.Test(false, id, CmembersUTF8[id], Cmembers[id], out sec_cpp); totsec_cpp += sec_cpp; } double bps_cpp = bits / totsec_cpp; double mbps_cpp = (bps_cpp / 1000000.0); int Mbps_cpp = (int)Math.Round(mbps_cpp); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_cpp, Mbps_cpp)); #endregion #region run .NET tests Console.Write("Running .NET tests ... "); double totsec_net = 0; for (int id = 0; id < automata.Length; id++) { DotNetTest.Compile(regexes[id]); //make sure each regex is precompiled double sec_net; int accepted2 = DotNetTest.Test(true, members[id], out sec_net); totsec_net += sec_net; int Caccepted2 = DotNetTest.Test(false, Cmembers[id], out sec_net); totsec_net += sec_net; } double bps_net = bits / totsec_net; double mbps_net = (bps_net / 1000000.0); int Mbps_net = (int)Math.Round(mbps_net); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_net, Mbps_net)); #endregion Console.WriteLine(string.Format("speedup (.NET-time/c++-time) = {0}X", ((int)Math.Round(totsec_net / totsec_cpp)))); }
public void gen_chsarp_TestSampleRegexes2csharp() { var solver = new CharSetSolver(BitWidth.BV16); List <string> regexesAll = new List <string>(File.ReadAllLines(regexesFile)); List <int> timedout = new List <int>(); List <int> excluded = new List <int>(new int[] { 36, 50, 64, 65, 162, 166, 210, 238, 334, 355, 392, 455, 471, 490, 594, 611, 612, 671, 725, 731, 741, 760, 775, 800, 852, 870, 873, 880, 882, 893, 923, 991, 997, 1053, 1062, 1164, 1220, 1228, 1273, 1318, 1339, 1352, 1386, 1404, 1413, 1414, 1423, 1424, 1429, 1431, 1434, 1482, 1487, 1516, 1517, 1518, 1519, 1520, 1537, 1565, 1566, 1635, 1744, 1749, 1829, 1868 }); List <string> regexes = new List <string>(); for (int i = 1; i < regexesAll.Count; i++) { if (!excluded.Contains(i)) { regexes.Add(regexesAll[i]); } } int K = 10; //number of pos/neg strings to be generated for each regex for (int i = 1; i < 100; i++) { try { var regex = regexes[i]; var aut = solver.Convert(regex, RegexOptions.Singleline); var autDet = aut.Determinize(2000); var autMin = autDet.Minimize(); var autMinC = aut.Complement(); if (autMin.IsEmpty || autMinC.IsEmpty || autMinC.IsEpsilon) { continue; } CheckIsClean(autMin); //var autMinExpr = z3.ConvertAutomatonGuardsToExpr(autMin); //var sfa = new SFA<FuncDecl, Expr, Sort>(z3, z3.CharacterSort, autMinExpr); //var stbb = new STBuilder<FuncDecl, Expr, Sort>(z3); //var st = ST<FuncDecl, Expr, Sort>.SFAtoST(sfa); //var stb = st.ToSTb(); ////var csAcceptor = stb.Compile("RegexTransfomer", "SampleAcceptor", false, true); var csAcceptor = solver.ToCS(autMin); HashSet <string> posSamples = new HashSet <string>(); HashSet <string> negSamples = new HashSet <string>(); int k = autMin.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut = solver.Convert("^.{0," + (3 * k) + "}$").Determinize().Minimize(); int tries = 0; var aut1 = autMin.Intersect(maxLengthAut); while (posSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(aut1); if (!s.EndsWith("\n")) { if (!posSamples.Add(s)) { tries++; } } } tries = 0; int k2 = autMinC.FindShortestFinalPath(autMinC.InitialState).Item1.Length; var maxLengthAut2 = solver.Convert("^.{0," + (3 * k2) + "}$").Determinize().Minimize(); var autMinCprefix = autMinC.Intersect(maxLengthAut2); while (negSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(autMinCprefix); if (!s.EndsWith("\n")) { if (!negSamples.Add(s)) { tries++; } } } foreach (string s in posSamples) { if (!RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("match expected regex:" + i); break; } if (!csAcceptor.IsMatch(s)) { Console.WriteLine("match expected regex:" + i); break; } } foreach (string s in negSamples) { if (RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("mismatch expected regex:" + i); break; } if (csAcceptor.IsMatch(s)) { Console.WriteLine("mismatch expected regex:" + i); break; } } } catch (TimeoutException) { Console.WriteLine("timeout regex:" + i); timedout.Add(i); continue; } } }
public void TestGeneratedUtf8EncodeFlat() { int K = 100; //number of strings int L = 10000; //length of each string string _1; string _2; string _3; TryGeneratedUtf8EncodeFlat("\uDAE1\uDCA5", out _1); TryGeneratedUtf8Encode_F("\uDAE1\uDCA5", out _2); TryActualUtf8Encode("\uDAE1\uDCA5", out _3); Assert.AreEqual<string>(_1, _2); Assert.AreEqual<string>(_1, _3); CharSetSolver css = new CharSetSolver(BitWidth.BV16); var A = css.Convert("^.{" + L + "}$"); //var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD]|([\uD800-\uDBFF][\uDC00-\uDFFF]))*$"); //var utf16 = css.Convert(@"^([\uD800-\uDBFF][\uDC00-\uDFFF])*$"); var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD])*$"); A = Automaton<BDD>.MkProduct(A, utf16); //css.Chooser.RandomSeed = 123; string[] inputs = new string[K]; for (int i = 0; i < K; i++) { inputs[i] = css.GenerateMember(A); } for (int i = 0; i < K; i++) { string out_expected; string out_bek; string out_bek_stream; string out_bek_orig; int stat_expected = TryActualUtf8Encode(inputs[i], out out_expected); int stat_actual = TryGeneratedUtf8EncodeFlat(inputs[i], out out_bek); int stat_actual_stream = TryGeneratedUtf8EncodeStream(inputs[i], out out_bek_stream); int stat_actual_orig = TryGeneratedUtf8Encode_F(inputs[i], out out_bek_orig); Assert.AreEqual<string>(out_expected, out_bek_orig); Assert.AreEqual<string>(out_expected, out_bek); Assert.AreEqual<string>(out_expected, out_bek_stream); } int timeOur = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8EncodeFlat(inputs[i], out tmp); } timeOur = System.Environment.TickCount - timeOur; int timeOurStream = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8EncodeStream(inputs[i], out tmp); } timeOurStream = System.Environment.TickCount - timeOurStream; int timeOurOrig = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8Encode(inputs[i], out tmp); } timeOurOrig = System.Environment.TickCount - timeOurOrig; int timeSys = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryActualUtf8Encode(inputs[i], out tmp); } timeSys = System.Environment.TickCount - timeSys; Console.WriteLine("timeOurStream:{3}ms, timeOur:{0}ms, timeOurOrig:{1}ms, timeSys:{2}ms", timeOur, timeOurOrig, timeSys, timeOurStream); }
static void TestRegex(Regex regex) { var solver = new CharSetSolver(); string myregex = regex.ToString(); //Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo(myregex, RegexOptions.None, "EvilRegex", "RegexTransfomer", true) }, // new System.Reflection.AssemblyName("EvilRegex")); var sfa = solver.Convert(myregex, regex.Options).RemoveEpsilons(); var sfaDet = sfa.Determinize(); var sfaMin = sfaDet.Minimize(); //solver.ShowGraph(sfa, "sfa"); //solver.ShowGraph(sfaDet, "sfaDet"); //solver.ShowGraph(sfaMin, "sfaMin"); var cs = solver.ToCS(sfaMin, true, "MyRegex", "RegexTransfomer"); var regexMin = solver.ConvertToRegex(sfaMin); Console.WriteLine("------- given regex --------"); Console.WriteLine(myregex); Console.WriteLine("----------------------------"); Console.WriteLine("-------- regexMin ----------"); Console.WriteLine(regexMin); Console.WriteLine("----------------------------"); Console.WriteLine("-------- cs ----------------"); // Console.WriteLine(cs.SourceCode); Console.WriteLine("----------------------------"); string sIn = solver.GenerateMember(sfaMin); string sOut = solver.GenerateMember(sfaMin.Complement()); string s = sIn; int t1; //int t2; int t3; for (int i = 0; i < 2; i++) { //original regex t1 = System.Environment.TickCount; bool res1 = false; for (int j = 0; j < 100000; j++) { res1 = Regex.IsMatch(s, regex.ToString(), regex.Options); } //res1 = evilregex.IsMatch(s); t1 = System.Environment.TickCount - t1; ////minimized regex //t2 = System.Environment.TickCount; //bool res2 = false; //for (int j = 0; j < 100000; j++) // res2 = Regex.IsMatch(s, regexMin, regex.Options); //t2 = System.Environment.TickCount - t2; //code from minimized regex t3 = System.Environment.TickCount; bool res3 = false; for (int j = 0; j < 100000; j++) { res3 = cs.IsMatch(s); } t3 = System.Environment.TickCount - t3; Console.WriteLine(String.Format("{0}ms({1}), {2}ms({3})", t1, res1, t3, res3)); s = sOut; } Console.WriteLine("done...(press any key)"); Console.ReadKey(); }
//check if delta(S,T,c) exists static string ShortStringStoTwithC(char c, int S, int T, Automaton<BDD> aut, int limit, CharSetSolver solver) { var pair = new Pair<int, int>(S, T); if (S == T) return ""; var aut1 = Automaton<BDD>.Create(solver, S, new int[] { T }, aut.GetMoves()); var autR = solver.Convert(System.Text.RegularExpressions.Regex.Escape(c.ToString())); var contst = aut1.Intersect(autR).Determinize().Minimize(); var finst= contst.GetFinalStates(); var strings = new Dictionary<int, string>(); strings[contst.InitialState] = ""; Dictionary<int,int> dist = new Dictionary<int,int>(); HashSet<int> visited = new HashSet<int>(); List<int> toVisit = new List<int>(); visited.Add(contst.InitialState); toVisit.Add(contst.InitialState); dist[contst.InitialState] = 0; while (toVisit.Count > 0) { var curr = toVisit[0]; toVisit.RemoveAt(0); if(dist[curr]<=limit) foreach (var move in contst.GetMovesFrom(curr)) if (!visited.Contains(move.TargetState)) { dist[move.TargetState] = dist[move.SourceState] + 1; visited.Add(move.TargetState); toVisit.Add(move.TargetState); char wit='a'; foreach(var w in solver.GenerateAllCharacters(move.Label,false)){ wit=w; break; } strings[move.TargetState] = strings[move.SourceState] + wit; if (finst.Contains(move.TargetState)) { return strings[move.TargetState]; } } } throw new AutomataException("this code shouldn't be reachable"); }
public void TestTrivialWordBoundary1() { string r = @"\b"; CharSetSolver css = new CharSetSolver(BitWidth.BV7); var aut = css.Convert(r).RemoveEpsilons().Determinize().Minimize(); //css.ShowGraph(aut, "TrivialWordBoundary1"); CheckValidity(css, aut, new Regex(r, RegexOptions.Singleline)); }
public void LorisTest() { HashSet <char> al = new HashSet <char>(new char[] { 'a', 'b' }); CharSetSolver solver = new CharSetSolver(BitWidth.BV64); string rexpr1 = "(a|b)*"; var escapedRexpr = string.Format("^({0})$", rexpr1); Automaton <BDD> aut1 = null; try { aut1 = solver.Convert(escapedRexpr); } catch (ArgumentException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } catch (AutomataException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } var diff = aut1.Minus(solver.Convert("^(a|b)*$"), solver); if (!diff.IsEmpty) { throw new PDLException("The regular expression should only accept strings over (a|b)*."); } string rexpr2 = "(a|b)+"; escapedRexpr = string.Format("^({0})$", rexpr2); Automaton <BDD> aut2 = null; try { aut2 = solver.Convert(escapedRexpr); } catch (ArgumentException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } catch (AutomataException e) { throw new PDLException("The input is not a well formatted regular expression.\n" + e.ToString()); } diff = aut2.Minus(solver.Convert("^(a|b)*$"), solver); if (!diff.IsEmpty) { throw new PDLException("The regular expression should only accept strings over (a|b)*."); } var feedbackGrade = DFAGrading.GetGrade(aut1, aut2, al, solver, 2000, 10, FeedbackLevel.Solution, false, true, true); var feedString = "<ul>"; foreach (var feed in feedbackGrade.Second) { feedString += string.Format("<li>{0}</li>", feed); } feedString += "</ul>"; Console.WriteLine(string.Format("<div>Grade: {0} <br /> Feedback: {1}</div>", feedbackGrade.First, feedString)); }
//private static void TestIgnoreCase() //{ // Microsoft.Automata.Internal.Utilities.IgnoreCaseRelationGenerator.Generate( // "Microsoft.Automata.Internal.Generated", // "IgnoreCaseRelation", // @"C:\GitHub\AutomataDotNet\Automata\src\Automata\Internal\Generated"); //} static void TestCppCodeGen(Regex[] regexes) { Automaton<BDD>[] automata = new Automaton<BDD>[regexes.Length]; Automaton<BDD>[] Cautomata = new Automaton<BDD>[regexes.Length]; var solver = new CharSetSolver(); #region convert the regexes to automata Console.Write("Converting {0} regexes to automata and minimizing the automata ...", regexes.Length); int t = System.Environment.TickCount; Func<Automaton<BDD>, bool> IsFull = (a => a.StateCount == 1 && a.IsFinalState(a.InitialState) && a.IsLoopState(a.InitialState) && a.GetMovesCountFrom(a.InitialState) == 1 && a.GetMoveFrom(a.InitialState).Label.Equals(solver.True)); for (int i = 0; i < regexes.Length; i++) { try { var aut = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); automata[i] = aut; if (IsFull(automata[i]) || automata[i].IsEmpty) { Console.WriteLine("\nReplacing trivial regex \"{0}\" with \"^dummy$\"", i, regexes[i]); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } catch (Exception e) { Console.WriteLine("\nCoverting regex {0}: '{1}' failed, reason: {2}, replacing with \"^dummy$\"", i, regexes[i], e.Message); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region complement the automata t = System.Environment.TickCount; Console.Write("Creating complements of autmata ..."); for (int i = 0; i < regexes.Length; i++) { Cautomata[i] = automata[i].Complement().Minimize(); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate positive test strings Console.Write(string.Format("Generating a positive test set for all automata ", NrOfStrings)); t = System.Environment.TickCount; List<string[]> members = new List<string[]>(); List<string[]> Cmembers = new List<string[]>(); for (int id = 0; id < automata.Length; id++) { Console.Write("."); var M = automata[id].Intersect(solver.Convert("^[\0-\x7F]{0," + CodeGenTests.MaxStringLength + "}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; int time = System.Environment.TickCount; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } time = System.Environment.TickCount - time; members.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate negative test strings t = System.Environment.TickCount; Console.Write(string.Format("Generating a negative test set for all automata ", NrOfStrings)); for (int id = 0; id < Cautomata.Length; id++) { Console.Write("."); //var M = Cautomata[id].Intersect(solver.Convert("^[^\uD800-\uDFFF]{0,100}$", RegexOptions.Singleline), solver).Determinize(solver); var M = Cautomata[id].Intersect(solver.Convert("^[\0-\uFFFF]{0,100}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } Cmembers.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate c++ int t2 = System.Environment.TickCount; CppTest.Compile(automata, solver, true); t2 = System.Environment.TickCount - t2; Console.WriteLine(string.Format(" done ({0}ms)", t2)); #endregion #region convert the test strings to UTF8 List<byte[][]> membersUTF8 = new List<byte[][]>(); List<byte[][]> CmembersUTF8 = new List<byte[][]>(); for (int id = 0; id < automata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) tmp[i] = Encoding.UTF8.GetBytes(members[id][i]); membersUTF8.Add(tmp); } for (int id = 0; id < Cautomata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) tmp[i] = Encoding.UTF8.GetBytes(Cmembers[id][i]); CmembersUTF8.Add(tmp); } #endregion #region compute tot nr of bits double bits = 0; for (int id = 0; id < automata.Length; id++) { int nrBytes = 0; for (int i = 0; i < NrOfStrings; i++) nrBytes += membersUTF8[id][i].Length + CmembersUTF8[id][i].Length; bits += (nrBytes * 8.0); } bits = bits * CodeGenTests.Repetitions; //repeated Reps times #endregion #region run c++ tests Console.Write("Running c++ tests ... "); double totsec_cpp = 0; for (int id = 0; id < automata.Length; id++) { double sec_cpp = 0; int accepted = CppTest.Test(true, id, membersUTF8[id], members[id], out sec_cpp); totsec_cpp += sec_cpp; int Caccepted = CppTest.Test(false, id, CmembersUTF8[id], Cmembers[id], out sec_cpp); totsec_cpp += sec_cpp; } double bps_cpp = bits / totsec_cpp; double mbps_cpp = (bps_cpp / 1000000.0); int Mbps_cpp = (int)Math.Round(mbps_cpp); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_cpp, Mbps_cpp)); #endregion #region run .NET tests Console.Write("Running .NET tests ... "); double totsec_net = 0; for (int id = 0; id < automata.Length; id++) { DotNetTest.Compile(regexes[id]); //make sure each regex is precompiled double sec_net; int accepted2 = DotNetTest.Test(true, members[id], out sec_net); totsec_net += sec_net; int Caccepted2 = DotNetTest.Test(false, Cmembers[id], out sec_net); totsec_net += sec_net; } double bps_net = bits / totsec_net; double mbps_net = (bps_net / 1000000.0); int Mbps_net = (int)Math.Round(mbps_net); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_net, Mbps_net)); #endregion Console.WriteLine(string.Format("speedup (.NET-time/c++-time) = {0}X", ((int)Math.Round(totsec_net / totsec_cpp)))); }
public void TestGeneratedCssEncode() { CharSetSolver css = new CharSetSolver(BitWidth.BV16); var A = css.Convert(".{50,}"); //at least 100 characters var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD]|([\uD800-\uDBFF][\uDC00-\uDFFF]))*$"); A = A.Intersect(utf16); //css.Chooser.RandomSeed = 123; int okCnt = 0; int error1Cnt = 0; int error2Cnt = 0; int diffErrors = 0; for (int i = 0; i < 1000; i++) { string input = css.GenerateMember(A); string out_expected; string out_CssEncode; string out_CssEncode_B; string out_CssEncode_F; int stat_expected = TryActualCssEncode(input, out out_expected); int stat_CssEncode = TryGeneratedCssEncode(input, out out_CssEncode); int stat_CssEncode_B = TryGeneratedCssEncode_B(input, out out_CssEncode_B); int stat_CssEncode_F = TryGeneratedCssEncode_F(input, out out_CssEncode_F); Assert.AreEqual<string>(out_expected, out_CssEncode); Assert.AreEqual<string>(out_expected, out_CssEncode_B); Assert.AreEqual<string>(out_expected, out_CssEncode_F); Assert.AreEqual<int>(stat_CssEncode, stat_CssEncode_B); Assert.AreEqual<int>(stat_CssEncode, stat_CssEncode_F); if (stat_expected != stat_CssEncode) diffErrors += 1; if (stat_expected == 0) okCnt += 1; else if (stat_expected == 1) error1Cnt += 1; else error2Cnt += 1; } Console.WriteLine("okCnt={0}, error1Cnt={1}, error2Cnt={2}, diffErrors={3}", okCnt, error1Cnt, error2Cnt, diffErrors); }
public void TestGeneratedCssEncodePerformance() { CharSetSolver css = new CharSetSolver(BitWidth.BV16); var A = css.Convert("^.{100,}$"); //at least 50 chars var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD]|([\uD800-\uDBFF][\uDC00-\uDFFF]))*$"); A = A.Intersect(utf16); //css.Chooser.RandomSeed = 123; List<string> samples = new List<string>(); //construct a sample set of 100000 strings of length >= 50 that are valid inputs while (samples.Count < 100) { string input = css.GenerateMember(A);//margus samples.Add(input); // if (TryActualCssEncode(input, out tmp) == 0) // samples.Add(input); } //now use the sample set for performace comparison var antiXssTimes = new List<int>(); var CssEncodeTimes = new List<int>(); var CssEncodeTimes_B = new List<int>(); var CssEncodeTimes_F = new List<int>(); int NrOfReps = 100; for (int j = 0; j < NrOfReps; j++) { //the AntiXss encoder int t_AntiXss = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = System.Web.Security.AntiXss.AntiXssEncoder.CssEncode(samples[i]); } t_AntiXss = System.Environment.TickCount - t_AntiXss; antiXssTimes.Add(t_AntiXss); //generated encoder without exploration int t_CssEncode = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode.Apply(samples[i]); } t_CssEncode = System.Environment.TickCount - t_CssEncode; CssEncodeTimes.Add(t_CssEncode); //generated encoder with Boolean exploration int t_CssEncode_B = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode_B.Apply(samples[i]); } t_CssEncode_B = System.Environment.TickCount - t_CssEncode_B; CssEncodeTimes_B.Add(t_CssEncode_B); //generated encoder with Full exploration int t_CssEncode_F = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode_F.Apply(samples[i]); } t_CssEncode_F = System.Environment.TickCount - t_CssEncode_F; CssEncodeTimes_F.Add(t_CssEncode_F); } //compute the average times int antiXssTime = ComputeAverage(antiXssTimes); int CssEncodeTime = ComputeAverage(CssEncodeTimes); int CssEncodeTime_B = ComputeAverage(CssEncodeTimes_B); int CssEncodeTime_F = ComputeAverage(CssEncodeTimes_F); double[] stdevs = CombinedStandardDeviation(antiXssTimes, CssEncodeTimes, CssEncodeTimes_B, CssEncodeTimes_F); Console.WriteLine("antiXssTime={0}, CssEncodeTime={1}, CssEncodeTime_B={2}, CssEncodeTime_F={3}, stddvAntiXSS={4}, stddvCssEncode={5}, stddvCssEncodeB={6}, stddvCssEncodeF={7}", antiXssTime, CssEncodeTime, CssEncodeTime_B, CssEncodeTime_F, stdevs[0], stdevs[1], stdevs[2], stdevs[3]); }
public void gen_chsarp_TestSampleRegexes2csharp() { var solver = new CharSetSolver(BitWidth.BV16); List<string> regexesAll = new List<string>(File.ReadAllLines(regexesFile)); List<int> timedout = new List<int>(); List<int> excluded = new List<int>(new int[] { 36, 50, 64, 65, 162, 166, 210, 238, 334, 355, 392, 455, 471, 490, 594, 611, 612, 671, 725, 731, 741, 760, 775, 800, 852, 870, 873, 880, 882, 893, 923, 991, 997, 1053, 1062, 1164, 1220, 1228, 1273, 1318, 1339, 1352, 1386, 1404, 1413, 1414, 1423, 1424, 1429, 1431, 1434, 1482, 1487, 1516, 1517, 1518, 1519, 1520, 1537, 1565, 1566, 1635, 1744, 1749, 1829, 1868 }); List<string> regexes = new List<string>(); for (int i = 1; i < regexesAll.Count; i++) if (!excluded.Contains(i)) regexes.Add(regexesAll[i]); int K = 50; //number of pos/neg strings to be generated for each regex for (int i = 1; i < 100; i++) { try { var regex = regexes[i]; var aut = solver.Convert(regex, RegexOptions.Singleline); var autDet = aut.Determinize(2000); var autMin = autDet.Minimize(); var autMinC = aut.Complement(); if (autMin.IsEmpty || autMinC.IsEmpty) continue; CheckIsClean(autMin); //var autMinExpr = z3.ConvertAutomatonGuardsToExpr(autMin); //var sfa = new SFA<FuncDecl, Expr, Sort>(z3, z3.CharacterSort, autMinExpr); //var stbb = new STBuilder<FuncDecl, Expr, Sort>(z3); //var st = ST<FuncDecl, Expr, Sort>.SFAtoST(sfa); //var stb = st.ToSTb(); ////var csAcceptor = stb.Compile("RegexTransfomer", "SampleAcceptor", false, true); var csAcceptor = solver.ToCS(autMin); HashSet<string> posSamples = new HashSet<string>(); HashSet<string> negSamples = new HashSet<string>(); int k = autMin.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut = solver.Convert("^.{0," + (3 * k) + "}$").Determinize().Minimize(); int tries = 0; var aut1 = autMin.Intersect(maxLengthAut); while (posSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(aut1); if (!s.EndsWith("\n")) if (!posSamples.Add(s)) tries++; } tries = 0; int k2 = autMinC.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut2 = solver.Convert("^.{0," + (3 * k2) + "}$").Determinize().Minimize(); var autMinCprefix = autMinC.Intersect(maxLengthAut2); while (negSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(autMinCprefix); if (!s.EndsWith("\n")) if (!negSamples.Add(s)) tries++; } foreach (string s in posSamples) { if (!RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("match expected regex:" + i); break; } if (!csAcceptor.IsMatch(s)) { Console.WriteLine("match expected regex:" + i); break; } } foreach (string s in negSamples) { if (RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("mismatch expected regex:" + i); break; } if (csAcceptor.IsMatch(s)) { Console.WriteLine("mismatch expected regex:" + i); break; } } } catch (TimeoutException) { Console.WriteLine("timeout regex:" + i); timedout.Add(i); continue; } } }
public void TestGeneratedCssEncodePerformance() { CharSetSolver css = new CharSetSolver(BitWidth.BV16); var A = css.Convert("^.{100,}$"); //at least 50 chars var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD]|([\uD800-\uDBFF][\uDC00-\uDFFF]))*$"); A = A.Intersect(utf16, css); //css.Chooser.RandomSeed = 123; List<string> samples = new List<string>(); //construct a sample set of 100000 strings of length >= 50 that are valid inputs while (samples.Count < 100) { string input = css.GenerateMember(A);//margus samples.Add(input); // if (TryActualCssEncode(input, out tmp) == 0) // samples.Add(input); } //now use the sample set for performace comparison var antiXssTimes = new List<int>(); var CssEncodeTimes = new List<int>(); var CssEncodeTimes_B = new List<int>(); var CssEncodeTimes_F = new List<int>(); int NrOfReps = 100; for (int j = 0; j < NrOfReps; j++) { //the AntiXss encoder int t_AntiXss = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = System.Web.Security.AntiXss.AntiXssEncoder.CssEncode(samples[i]); } t_AntiXss = System.Environment.TickCount - t_AntiXss; antiXssTimes.Add(t_AntiXss); //generated encoder without exploration int t_CssEncode = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode.Apply(samples[i]); } t_CssEncode = System.Environment.TickCount - t_CssEncode; CssEncodeTimes.Add(t_CssEncode); //generated encoder with Boolean exploration int t_CssEncode_B = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode_B.Apply(samples[i]); } t_CssEncode_B = System.Environment.TickCount - t_CssEncode_B; CssEncodeTimes_B.Add(t_CssEncode_B); //generated encoder with Full exploration int t_CssEncode_F = System.Environment.TickCount; for (int i = 0; i < samples.Count; i++) { string tmp = CssEncode_F.Apply(samples[i]); } t_CssEncode_F = System.Environment.TickCount - t_CssEncode_F; CssEncodeTimes_F.Add(t_CssEncode_F); } //compute the average times int antiXssTime = ComputeAverage(antiXssTimes); int CssEncodeTime = ComputeAverage(CssEncodeTimes); int CssEncodeTime_B = ComputeAverage(CssEncodeTimes_B); int CssEncodeTime_F = ComputeAverage(CssEncodeTimes_F); double[] stdevs = CombinedStandardDeviation(antiXssTimes, CssEncodeTimes, CssEncodeTimes_B, CssEncodeTimes_F); Console.WriteLine("antiXssTime={0}, CssEncodeTime={1}, CssEncodeTime_B={2}, CssEncodeTime_F={3}, stddvAntiXSS={4}, stddvCssEncode={5}, stddvCssEncodeB={6}, stddvCssEncodeF={7}", antiXssTime, CssEncodeTime, CssEncodeTime_B, CssEncodeTime_F, stdevs[0], stdevs[1], stdevs[2], stdevs[3]); }
public void SerializeDeserializeTest() { CharSetSolver solver = new CharSetSolver(); string a = @"^[A-Za-z0-9]{1,3}$"; //.Net regex //corresponding SFAs var dfa = solver.Convert(a); string ser = solver.SerializeAutomaton(dfa); var dfaback = solver.DeserializeAutomaton(ser); Assert.IsTrue(dfa.IsEquivalentWith(dfaback)); }
static void TestRegex(Regex regex) { var solver = new CharSetSolver(); string myregex = regex.ToString(); //Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo(myregex, RegexOptions.None, "EvilRegex", "RegexTransfomer", true) }, // new System.Reflection.AssemblyName("EvilRegex")); var sfa = solver.Convert(myregex, regex.Options).RemoveEpsilons(); var sfaDet = sfa.Determinize(); var sfaMin = sfaDet.Minimize(); //solver.ShowGraph(sfa, "sfa"); //solver.ShowGraph(sfaDet, "sfaDet"); //solver.ShowGraph(sfaMin, "sfaMin"); var cs = solver.ToCS(sfaMin, true, "MyRegex", "RegexTransfomer"); var regexMin = solver.ConvertToRegex(sfaMin); Console.WriteLine("------- given regex --------"); Console.WriteLine(myregex); Console.WriteLine("----------------------------"); Console.WriteLine("-------- regexMin ----------"); Console.WriteLine(regexMin); Console.WriteLine("----------------------------"); Console.WriteLine("-------- cs ----------------"); Console.WriteLine(cs.SourceCode); Console.WriteLine("----------------------------"); string sIn = solver.GenerateMember(sfaMin); string sOut = solver.GenerateMember(sfaMin.Complement()); string s = sIn; int t1; int t2; int t3; for (int i = 0; i < 2; i++) { //original regex t1 = System.Environment.TickCount; bool res1 = false; for (int j = 0; j < 100000; j++) res1 = Regex.IsMatch(s, regex.ToString(), regex.Options); //res1 = evilregex.IsMatch(s); t1 = System.Environment.TickCount - t1; ////minimized regex //t2 = System.Environment.TickCount; //bool res2 = false; //for (int j = 0; j < 100000; j++) // res2 = Regex.IsMatch(s, regexMin, regex.Options); //t2 = System.Environment.TickCount - t2; //code from minimized regex t3 = System.Environment.TickCount; bool res3 = false; for (int j = 0; j < 100000; j++) res3 = cs.IsMatch(s); t3 = System.Environment.TickCount - t3; Console.WriteLine(String.Format("{0}ms({1}), {2}ms({3})", t1, res1, t3, res3)); s = sOut; } Console.WriteLine("done...(press any key)"); Console.ReadKey(); }
public void TestGeneratedUtf8EncodeFlat() { int K = 100; //number of strings int L = 10000; //length of each string string _1; string _2; string _3; TryGeneratedUtf8EncodeFlat("\uDAE1\uDCA5", out _1); TryGeneratedUtf8Encode_F("\uDAE1\uDCA5", out _2); TryActualUtf8Encode("\uDAE1\uDCA5", out _3); Assert.AreEqual<string>(_1, _2); Assert.AreEqual<string>(_1, _3); CharSetSolver css = new CharSetSolver(BitWidth.BV16); var A = css.Convert("^.{" + L + "}$"); //var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD]|([\uD800-\uDBFF][\uDC00-\uDFFF]))*$"); //var utf16 = css.Convert(@"^([\uD800-\uDBFF][\uDC00-\uDFFF])*$"); var utf16 = css.Convert(@"^([\0-\uD7FF\uE000-\uFFFD])*$"); A = Automaton<BDD>.MkProduct(A, utf16, css); //css.Chooser.RandomSeed = 123; string[] inputs = new string[K]; for (int i = 0; i < K; i++) { inputs[i] = css.GenerateMember(A); } for (int i = 0; i < K; i++) { string out_expected; string out_bek; string out_bek_stream; string out_bek_orig; int stat_expected = TryActualUtf8Encode(inputs[i], out out_expected); int stat_actual = TryGeneratedUtf8EncodeFlat(inputs[i], out out_bek); int stat_actual_stream = TryGeneratedUtf8EncodeStream(inputs[i], out out_bek_stream); int stat_actual_orig = TryGeneratedUtf8Encode_F(inputs[i], out out_bek_orig); Assert.AreEqual<string>(out_expected, out_bek_orig); Assert.AreEqual<string>(out_expected, out_bek); Assert.AreEqual<string>(out_expected, out_bek_stream); } int timeOur = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8EncodeFlat(inputs[i], out tmp); } timeOur = System.Environment.TickCount - timeOur; int timeOurStream = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8EncodeStream(inputs[i], out tmp); } timeOurStream = System.Environment.TickCount - timeOurStream; int timeOurOrig = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryGeneratedUtf8Encode(inputs[i], out tmp); } timeOurOrig = System.Environment.TickCount - timeOurOrig; int timeSys = System.Environment.TickCount; for (int i = 0; i < K; i++) { string tmp; TryActualUtf8Encode(inputs[i], out tmp); } timeSys = System.Environment.TickCount - timeSys; Console.WriteLine("timeOurStream:{3}ms, timeOur:{0}ms, timeOurOrig:{1}ms, timeSys:{2}ms", timeOur, timeOurOrig, timeSys, timeOurStream); }
public void MSOForall() { var solver = new CharSetSolver(BitWidth.BV64); //new solver using ASCII encoding List<char> alph = new List<char> { 'a', 'b' }; int a2 = 'a' * 2; int b2 = 'b' * 2; List<char> alph2 = new List<char> { (char)a2, (char)b2, (char)(a2+1), (char)(b2+1) }; HashSet<char> al = new HashSet<char>(alph); //ex x. all y. x<=y and a(x) MSOFormula formula = new MSOForallFO("x", new MSOLabel("x", 'b')); Assert.IsTrue(formula.CheckUseOfVars()); var dfa = formula.getDFA(al, solver); var test = solver.Convert(@"^b*$"); Assert.IsTrue(dfa.IsEquivalentWith(test, solver)); string file = "../../../MSOZ3Test/DotFiles/bstar"; solver.SaveAsDot(dfa, "aut", file); //extension .dot is added automatically when missing }
public void TestWordBoundary() { CharSetSolver css = new CharSetSolver(BitWidth.BV7); var regex = @"\b(@|A)B\b"; Assert.IsTrue(Regex.IsMatch("AB", regex)); Assert.IsTrue(Regex.IsMatch("A@B", regex)); Assert.IsTrue(Regex.IsMatch("A@B&", regex)); Assert.IsTrue(Regex.IsMatch("+AB+", regex)); Assert.IsFalse(Regex.IsMatch("@B", regex)); Assert.IsFalse(Regex.IsMatch("A@B_", regex)); var aut = css.Convert(regex);//.RemoveEpsilons(css.MkOr).Determinize(css).Minimize(css); //css.ShowGraph(aut, "aut"); Assert.IsTrue(css.Accepts(aut, "AB")); Assert.IsTrue(css.Accepts(aut, "A@B")); Assert.IsTrue(css.Accepts(aut, "A@B&")); Assert.IsTrue(css.Accepts(aut, "+AB+")); Assert.IsFalse(css.Accepts(aut, "@B")); Assert.IsFalse(css.Accepts(aut, "A@B_")); CheckValidity(css, aut, new Regex(regex, RegexOptions.Singleline)); }
public void TestTrivialWordBoundary4() { Regex r = new Regex(@"^\b[A@]\b$",RegexOptions.None); CharSetSolver css = new CharSetSolver(BitWidth.BV16); var aut = css.Convert(r.ToString(), RegexOptions.Singleline, true); //css.ShowGraph(aut, "TrivialWordBoundary4_with_b"); css.RegexConverter.EliminateBoundaryStates(aut); var aut1 = aut.RemoveEpsilons().Determinize().Minimize(); //css.ShowGraph(aut, "TrivialWordBoundary4"); string s = "@"; bool ismatchExpected = r.IsMatch(s); bool ismatchActual = css.Accepts(aut1, s); CheckValidity(css, aut, r); }
STModel ConvertReplace(replace repl) { //create a disjunction of all the regexes //each case terminated by the identifier int K = 0; //max pattern length //HashSet<int> finalReplacers = new HashSet<int>(); //for efficieny keep lookup tables of character predicates to sets Dictionary <Expr, BDD> predLookup = new Dictionary <Expr, BDD>(); Automaton <BDD> previouspatterns = Automaton <BDD> .MkEmpty(css); Automaton <BV2> N = Automaton <BV2> .MkFull(css2); var hasNoEndAnchor = new HashSet <int>(); for (int i = 0; i < repl.CaseCount; i++) { replacecase rcase = repl.GetCase(i); var pat = "^" + rcase.Pattern.val; var M = css.Convert("^" + rcase.Pattern.val, System.Text.RegularExpressions.RegexOptions.Singleline).Determinize().Minimize(); #region check that the pattern is a feasible nonempty sequence if (M.IsEmpty) { throw new BekParseException(string.Format("Semantic error: pattern {0} is infeasible.", rcase.Pattern.ToString())); } int _K; if (!M.CheckIfSequence(out _K)) { throw new BekParseException(string.Format("Semantic error: pattern {0} is not a sequence.", rcase.Pattern.ToString())); } if (_K == 0) { throw new BekParseException(string.Format("Semantic error: empty pattern {0} is not allowed.", rcase.Pattern.ToString())); } K = Math.Max(_K, K); #endregion var liftedMoves = new List <Move <BV2> >(); var st = M.InitialState; var newFinalState = M.MaxState + 1; var endAnchor = css.MkCharConstraint((char)i); //lift the moves to BV2 moves, adding end-markers while (!M.IsFinalState(st)) { var mv = M.GetMoveFrom(st); var pair_cond = new BV2(mv.Label, css.False); var liftedMove = new Move <BV2>(mv.SourceState, mv.TargetState, pair_cond); liftedMoves.Add(liftedMove); if (M.IsFinalState(mv.TargetState)) { var end_cond = new BV2(css.False, endAnchor); if (M.IsLoopState(mv.TargetState)) { hasNoEndAnchor.Add(i); //var loop_cond = css2.MkNot(end_cond); //var loopMove = new Move<BV2>(mv.TargetState, mv.TargetState, loop_cond); //liftedMoves.Add(loopMove); } var endMove = new Move <BV2>(mv.TargetState, newFinalState, end_cond); liftedMoves.Add(endMove); } st = mv.TargetState; } var N_i = Automaton <BV2> .Create(css2, M.InitialState, new int[] { newFinalState }, liftedMoves); //Microsoft.Automata.Visualizer.ToDot(N_i, "N" + i , "C:\\Automata\\Docs\\Papers\\Bex\\N" + i +".dot", x => "(" + css.PrettyPrint(x.First) + "," + css.PrettyPrint(x.Second) + ")"); N = N.Intersect(N_i.Complement()); #region other approach: disallow overlapping patterns //Visualizer.ShowGraph(M2.Complement(css2), "M2", lab => { return "<" + css.PrettyPrint(lab.First) + "," + css.PrettyPrint(lab.Second) + ">"; }); //note: keep here the original pattern, add only the start anchor to synchronize prefixes //var thispattern = css.Convert("^" + rcase.Pattern.val, System.Text.RegularExpressions.RegexOptions.Singleline).Determinize(css).Minimize(css); //var thispattern1 = thispattern.Minus(previouspatterns, css); //Visualizer.ShowGraph(thispattern1, "test", css.PrettyPrint); //#region check that thispattern does not overlap with any previous pattern //var common = thispattern.Intersect(previouspatterns, css); //if (!(common.IsEmpty)) //{ // int j = 0; // while ((j < i) && css.Convert("^" + repl.GetCase(j).Pattern.val, // System.Text.RegularExpressions.RegexOptions.Singleline).Determinize(css).Intersect(thispattern, css).IsEmpty) // j++; // throw new BekParseException(rcase.id.line, rcase.id.pos, string.Format("Semantic error: pattern {0} overlaps pattern {1}.", // rcase.Pattern.ToString(), repl.GetCase(j).Pattern.ToString())); //} //previouspatterns = previouspatterns.Union(thispattern).RemoveEpsilons(css.MkOr); //TBD: better union //#endregion #endregion } N = N.Complement().Minimize(); //Microsoft.Automata.Visualizer.ShowGraph(N, "N", x => "<" + css.PrettyPrint(x.First) + "," + css.PrettyPrint(x.Second) + ">"); //Microsoft.Automata.Visualizer.ToDot(N, "N","C:\\Automata\\Docs\\Papers\\Bex\\N.dot", x => "(" + css.PrettyPrint(x.First) + "," + css.PrettyPrint(x.Second) + ")"); var D = new Dictionary <int, int>(); var G = new Dictionary <int, BDD>(); #region compute distance from initial state and compute guard unions var S = new Stack <int>(); D[N.InitialState] = 0; G[N.InitialState] = css.False; S.Push(N.InitialState); while (S.Count > 0) { var q = S.Pop(); foreach (var move in N.GetMovesFrom(q)) { G[q] = css.MkOr(G[q], move.Label.Item1); var p = move.TargetState; var d = D[q] + 1; if (!(N.IsFinalState(p)) && !D.ContainsKey(p)) { D[p] = d; G[p] = css.False; S.Push(p); } if (!(N.IsFinalState(p)) && D[p] != d) { throw new BekException(string.Format("Unexpected error, inconsitent distances {0} and {1} to state {2}", D[p], d, p)); } } } #endregion #region check that outputs do not have out of bound variables foreach (var fs in N.GetFinalStates()) { foreach (var move in N.GetMovesTo(fs)) { if (move.Label.Item2.IsEmpty) { throw new BekException("Internal error: missing end anchor"); } //if (!css.IsSingleton(move.Condition.Second)) //{ // var one = (int)css.GetMin(move.Condition.Second); // var two = (int)css.GetMax(move.Condition.Second); // throw new BekParseException(repl.GetCase(two).id.line, repl.GetCase(two).id.pos, string.Format("Ambiguous replacement patterns {0} and {1}.", repl.GetCase(one).Pattern, repl.GetCase(two).Pattern)); //} //pick the minimum case identifer when there are several, essentially pick the earliest case int id = (int)css.GetMin(move.Label.Item2); int distFromRoot = D[move.SourceState]; var e = repl.GetCase(id).Output; HashSet <int> vars = new HashSet <int>(); foreach (var v in e.GetBoundVars()) { if (v.GetVarId() >= distFromRoot) { throw new BekParseException(v.line, v.pos, string.Format("Syntax error: pattern variable '{0}' is out ouf bounds, valid range is from '#0' to '#{1}']", v.name, distFromRoot - 1)); } } } } #endregion int finalState = N.FinalState; K = K - 1; //this many registers are needed var zeroChar = stb.Solver.MkCharExpr('\0'); var STmoves = new List <Move <Rule <Expr> > >(); var STstates = new HashSet <int>(); var STdelta = new Dictionary <int, List <Move <Rule <Expr> > > >(); var STdeltaInv = new Dictionary <int, List <Move <Rule <Expr> > > >(); var FinalSTstates = new HashSet <int>(); var STdeletedMoves = new HashSet <Move <Rule <Expr> > >(); Action <Move <Rule <Expr> > > STmovesAdd = r => { var p = r.SourceState; var q = r.TargetState; STmoves.Add(r); if (STstates.Add(p)) { STdelta[p] = new List <Move <Rule <Expr> > >(); STdeltaInv[p] = new List <Move <Rule <Expr> > >(); } if (STstates.Add(q)) { STdelta[q] = new List <Move <Rule <Expr> > >(); STdeltaInv[q] = new List <Move <Rule <Expr> > >(); } if (r.Label.IsFinal) { FinalSTstates.Add(p); } STdelta[p].Add(r); STdeltaInv[q].Add(r); }; var regsorts = new Sort[K]; for (int j = 0; j < K; j++) { regsorts[j] = stb.Solver.CharSort; } var regsort = stb.Solver.MkTupleSort(regsorts); var regvar = stb.MkRegister(regsort); var initialRegisterValues = new Expr[K]; for (int j = 0; j < K; j++) { initialRegisterValues[j] = zeroChar; } var initialRegister = stb.Solver.MkTuple(initialRegisterValues); Predicate <int> IsCaseEndState = s => { return(N.OutDegree(s) == 1 && N.GetMoveFrom(s).Label.Item1.IsEmpty); }; #region compute the forward moves and the completion moves var V = new HashSet <int>(); S.Push(N.InitialState); while (S.Count > 0) { var p = S.Pop(); #region forward moves foreach (var move in N.GetMovesFrom(p)) { var q = move.TargetState; //this move occurs if p has both an end-move and a non-end-move //note that if p is an case-end-state then it is never pushed to S if (N.IsFinalState(q)) { continue; } var distance = D[p]; Expr chExpr; Expr chPred; MkExprPred(move.Label.Item1, out chExpr, out chPred); predLookup[chPred] = move.Label.Item1; Expr[] regUpds = new Expr[K]; for (int i = 0; i < K; i++) { if (i == distance) { regUpds[i] = chExpr; } else //if (i < distance) { regUpds[i] = stb.Solver.MkProj(i, regvar); } //else // regUpds[i] = zeroChar; } Expr regExpr = stb.Solver.MkTuple(regUpds); var moveST = stb.MkRule(p, q, chPred, regExpr); //there are no yields STmovesAdd(moveST); if (V.Add(q) && !IsCaseEndState(q)) { S.Push(q); } } #endregion #region completion is only enabled if there exists an else case if (repl.HasElseCase) { var guards = G[p]; var guards0 = G[N.InitialState]; #region nonmatching cases to the initial state var nomatch = css.MkNot(css.MkOr(guards, guards0)); if (!nomatch.IsEmpty) { Expr chExpr; Expr nomatchPred; MkExprPred(nomatch, out chExpr, out nomatchPred); predLookup[nomatchPred] = nomatch; var else_yields_list = new List <Expr>(); for (int i = 0; i < D[p]; i++) { else_yields_list.AddRange(GetElseYieldInstance(repl.ElseOutput, stb.Solver.MkProj(i, regvar))); } else_yields_list.AddRange(GetElseYieldInstance(repl.ElseOutput, stb.MkInputVariable(stb.Solver.CharSort))); var else_yields = else_yields_list.ToArray(); var resetMove = stb.MkRule(p, N.InitialState, nomatchPred, initialRegister, else_yields); STmovesAdd(resetMove); } #endregion #region matching cases via the initial state foreach (var move0 in N.GetMovesFrom(N.InitialState)) { var g0 = move0.Label.Item1; var match = css.MkAnd(css.MkNot(guards), g0); if (!match.IsEmpty) { Expr chExpr; Expr matchPred; MkExprPred(match, out chExpr, out matchPred); predLookup[matchPred] = match; var resetYieldsList = new List <Expr>(); //for all unprocessed inputs produce the output yield according to the else case for (int i = 0; i < D[p]; i++) { resetYieldsList.AddRange(GetElseYieldInstance(repl.ElseOutput, stb.Solver.MkProj(i, regvar))); } var resetYields = resetYieldsList.ToArray(); Expr[] regupd = new Expr[K]; regupd[0] = chExpr; for (int j = 1; j < K; j++) { regupd[j] = zeroChar; } var regupdExpr = stb.Solver.MkTuple(regupd); var resetMove = stb.MkRule(p, move0.TargetState, matchPred, regupdExpr, resetYields); STmovesAdd(resetMove); } } #endregion } #endregion } #endregion foreach (var last_move in N.GetMovesTo(N.FinalState)) { //i is the case identifier int i = (int)css.GetMin(last_move.Label.Item2); if (hasNoEndAnchor.Contains(i)) { #region this corresponds to looping back to the initial state on the given input //the final outputs produced after a successful pattern match #region compute the output terms int distFromRoot = D[last_move.SourceState]; Func <ident, Expr> registerMap = id => { // --- already checked I think --- if (!id.IsVar || id.GetVarId() >= distFromRoot) { throw new BekParseException(id.Line, id.Pos, string.Format("illeagal variable '{0}' in output", id.name)); } if (id.GetVarId() == distFromRoot - 1) //the last reg update refers to the current variable { return(stb.MkInputVariable(stb.Solver.CharSort)); } else { return(stb.Solver.MkProj(id.GetVarId(), regvar)); } }; Expr[] yields; var outp = repl.GetCase(i).Output; if (outp is strconst) { var s = ((strconst)outp).val; yields = Array.ConvertAll(s.ToCharArray(), c => this.str_handler.iter_handler.expr_handler.Convert(new charconst("'" + StringUtility.Escape(c) + "'"), registerMap)); } else //must be an explicit list construct { if (!(outp is functioncall) || !((functioncall)outp).id.name.Equals("string")) { throw new BekParseException("Invalid pattern output."); } var s = ((functioncall)outp).args; yields = Array.ConvertAll(s.ToArray(), e => this.str_handler.iter_handler.expr_handler.Convert(e, registerMap)); } #endregion //shortcut all the incoming transitions to the initial state foreach (var move in STdeltaInv[last_move.SourceState]) { //go to the initial state, i.e. the matching raps around int p = move.SourceState; int q0 = N.InitialState; List <Expr> yields1 = new List <Expr>(move.Label.Yields); //incoming yields are yields1.AddRange(yields); var rule = stb.MkRule(p, q0, move.Label.Guard, initialRegister, yields1.ToArray()); STmovesAdd(rule); //STdeletedMoves.Add(move); STmoves.Remove(move); //the move has been replaced } #endregion } else { #region this is the end of the input stream case #region compute the output terms int distFromRoot = D[last_move.SourceState]; Func <ident, Expr> registerMap = id => { if (!id.IsVar || id.GetVarId() >= distFromRoot) { throw new BekParseException(id.Line, id.Pos, string.Format("illeagal variable '{0}' in output", id.name)); } return(stb.Solver.MkProj(id.GetVarId(), regvar)); }; Expr[] yields; var outp = repl.GetCase(i).Output; if (outp is strconst) { var s = ((strconst)outp).val; yields = Array.ConvertAll(s.ToCharArray(), c => this.str_handler.iter_handler.expr_handler.Convert(new charconst("'" + c.ToString() + "'"), registerMap)); } else //must be an explicit list construct { if (!(outp is functioncall) || !((functioncall)outp).id.name.Equals("string")) { throw new BekParseException("Invalid pattern output."); } var s = ((functioncall)outp).args; yields = Array.ConvertAll(s.ToArray(), e => this.str_handler.iter_handler.expr_handler.Convert(e, registerMap)); } #endregion int p = last_move.SourceState; var rule = stb.MkFinalOutput(p, stb.Solver.True, yields); STmovesAdd(rule); #endregion } } if (repl.HasElseCase) { #region final completion (upon end of input) for all non-final states foreach (var p in STstates) { if (!FinalSTstates.Contains(p) && !IsCaseEndState(p)) //there is no final rule for p, so add the default one { Expr[] finalYields; finalYields = new Expr[D[p]]; for (int i = 0; i < finalYields.Length; i++) { finalYields[i] = stb.Solver.MkProj(i, regvar); } var p_finalMove = stb.MkFinalOutput(p, stb.Solver.True, finalYields); STmovesAdd(p_finalMove); } } #endregion } else { //in this case there is a final rule from the initial state var q0_finalMove = stb.MkFinalOutput(N.InitialState, stb.Solver.True); STmovesAdd(q0_finalMove); } var resST = stb.MkST(name, initialRegister, stb.Solver.CharSort, stb.Solver.CharSort, regsort, N.InitialState, STmoves); var resSTb = new STModel(stb.Solver, name, stb.Solver.CharSort, stb.Solver.CharSort, regsort, initialRegister, N.InitialState); //create STb from the moves, we use here the knowledge that the ST is deterministic //we also use the lookuptable of conditions to eliminate dead code //resST.ShowGraph(); //resST.ToDot("C:\\Automata\\Docs\\Papers\\Bex\\B.dot"); #region compute the rules of the resulting STb //V.Clear(); //S.Push(resST.InitialState); //V.Add(resST.InitialState); foreach (var st in resST.GetStates()) { var condUnion = css.False; var st_moves = new List <Move <Rule <Expr> > >(); foreach (var move in resST.GetNonFinalMovesFrom(st)) { condUnion = css.MkOr(condUnion, predLookup[move.Label.Guard]); st_moves.Add(move); } BranchingRule <Expr> st_rule; if (st_moves.Count > 0) { //collect all rules with singleton guards and put them into a switch statement var st_rules1 = new List <KeyValuePair <Expr, BranchingRule <Expr> > >(); var st_moves2 = new List <Move <Rule <Expr> > >(); foreach (var move in st_moves) { if (css.ComputeDomainSize(predLookup[move.Label.Guard]) == 1) { var v = stb.Solver.MkNumeral(css.Choose(predLookup[move.Label.Guard]), stb.Solver.CharSort); var r = new BaseRule <Expr>(new Sequence <Expr>(move.Label.Yields), move.Label.Update, move.TargetState); st_rules1.Add(new KeyValuePair <Expr, BranchingRule <Expr> >(v, r)); } else { st_moves2.Add(move); } } BranchingRule <Expr> defaultcase = new UndefRule <Expr>("reject"); //make st_moves2 into an ite rule if (st_moves2.Count > 0) { for (int j = st_moves2.Count - 1; j >= 0; j--) { var r = new BaseRule <Expr>(new Sequence <Expr>(st_moves2[j].Label.Yields), st_moves2[j].Label.Update, st_moves2[j].TargetState); if (j == (st_moves2.Count - 1) && condUnion.IsFull) { defaultcase = r; } else { defaultcase = new IteRule <Expr>(st_moves2[j].Label.Guard, r, defaultcase); } } } else if (condUnion.IsFull) { defaultcase = st_rules1[st_rules1.Count - 1].Value; st_rules1.RemoveAt(st_rules1.Count - 1); } if (st_rules1.Count == 0) { st_rule = defaultcase; } else { st_rule = new SwitchRule <Expr>(stb.MkInputVariable(stb.Solver.CharSort), defaultcase, st_rules1.ToArray()); } } else { st_rule = new UndefRule <Expr>("reject"); } resSTb.AssignRule(st, st_rule); var st_finalrules = new List <Rule <Expr> >(resST.GetFinalRules(st)); if (st_finalrules.Count > 1) { throw new BekException("Unexpected error: multiple final rules per state."); } if (st_finalrules.Count > 0) { resSTb.AssignFinalRule(st, new BaseRule <Expr>(new Sequence <Expr>(st_finalrules[0].Yields), initialRegister, st)); } } resSTb.ST = resST; resST.STb = resSTb; #endregion return(resSTb); }
public void TestWordBoundaryCase() { string s = "abc"; string r = @"\b"; CharSetSolver css = new CharSetSolver(BitWidth.BV7); var aut = css.Convert(r,RegexOptions.Singleline,true); //css.ShowGraph(aut, "test1"); css.RegexConverter.EliminateBoundaryStates(aut); //css.ShowGraph(aut,"test2"); Assert.IsTrue(css.Accepts(aut, s)); Assert.IsTrue(Regex.IsMatch(s, r, RegexOptions.Singleline)); }