//private static void TestIgnoreCase() //{ // Microsoft.Automata.Utilities.IgnoreCaseRelationGenerator.Generate( // "Microsoft.Automata.Generated", // "IgnoreCaseRelation", // @"C:\GitHub\AutomataDotNet\Automata\src\Automata\Internal\Generated"); //} static void TestCppCodeGen(Regex[] regexes) { Automaton <BDD>[] automata = new Automaton <BDD> [regexes.Length]; Automaton <BDD>[] Cautomata = new Automaton <BDD> [regexes.Length]; var solver = new CharSetSolver(); #region convert the regexes to automata Console.Write("Converting {0} regexes to automata and minimizing the automata ...", regexes.Length); int t = System.Environment.TickCount; Func <Automaton <BDD>, bool> IsFull = (a => a.StateCount == 1 && a.IsFinalState(a.InitialState) && a.IsLoopState(a.InitialState) && a.GetMovesCountFrom(a.InitialState) == 1 && a.GetMoveFrom(a.InitialState).Label.Equals(solver.True)); for (int i = 0; i < regexes.Length; i++) { try { var aut = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); automata[i] = aut; if (IsFull(automata[i]) || automata[i].IsEmpty) { Console.WriteLine("\nReplacing trivial regex \"{0}\" with \"^dummy$\"", i, regexes[i]); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } catch (Exception e) { Console.WriteLine("\nCoverting regex {0}: '{1}' failed, reason: {2}, replacing with \"^dummy$\"", i, regexes[i], e.Message); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region complement the automata t = System.Environment.TickCount; Console.Write("Creating complements of autmata ..."); for (int i = 0; i < regexes.Length; i++) { Cautomata[i] = automata[i].Complement().Minimize(); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate positive test strings Console.Write(string.Format("Generating a positive test set for all automata ", NrOfStrings)); t = System.Environment.TickCount; List <string[]> members = new List <string[]>(); List <string[]> Cmembers = new List <string[]>(); for (int id = 0; id < automata.Length; id++) { Console.Write("."); var M = automata[id].Intersect(solver.Convert("^[\0-\x7F]{0," + CodeGenTests.MaxStringLength + "}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; int time = System.Environment.TickCount; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } time = System.Environment.TickCount - time; members.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate negative test strings t = System.Environment.TickCount; Console.Write(string.Format("Generating a negative test set for all automata ", NrOfStrings)); for (int id = 0; id < Cautomata.Length; id++) { Console.Write("."); //var M = Cautomata[id].Intersect(solver.Convert("^[^\uD800-\uDFFF]{0,100}$", RegexOptions.Singleline), solver).Determinize(solver); var M = Cautomata[id].Intersect(solver.Convert("^[\0-\uFFFF]{0,100}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } Cmembers.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate c++ int t2 = System.Environment.TickCount; CppTest.Compile(automata, solver, true); t2 = System.Environment.TickCount - t2; Console.WriteLine(string.Format(" done ({0}ms)", t2)); #endregion #region convert the test strings to UTF8 List <byte[][]> membersUTF8 = new List <byte[][]>(); List <byte[][]> CmembersUTF8 = new List <byte[][]>(); for (int id = 0; id < automata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = Encoding.UTF8.GetBytes(members[id][i]); } membersUTF8.Add(tmp); } for (int id = 0; id < Cautomata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = Encoding.UTF8.GetBytes(Cmembers[id][i]); } CmembersUTF8.Add(tmp); } #endregion #region compute tot nr of bits double bits = 0; for (int id = 0; id < automata.Length; id++) { int nrBytes = 0; for (int i = 0; i < NrOfStrings; i++) { nrBytes += membersUTF8[id][i].Length + CmembersUTF8[id][i].Length; } bits += (nrBytes * 8.0); } bits = bits * CodeGenTests.Repetitions; //repeated Reps times #endregion #region run c++ tests Console.Write("Running c++ tests ... "); double totsec_cpp = 0; for (int id = 0; id < automata.Length; id++) { double sec_cpp = 0; int accepted = CppTest.Test(true, id, membersUTF8[id], members[id], out sec_cpp); totsec_cpp += sec_cpp; int Caccepted = CppTest.Test(false, id, CmembersUTF8[id], Cmembers[id], out sec_cpp); totsec_cpp += sec_cpp; } double bps_cpp = bits / totsec_cpp; double mbps_cpp = (bps_cpp / 1000000.0); int Mbps_cpp = (int)Math.Round(mbps_cpp); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_cpp, Mbps_cpp)); #endregion #region run .NET tests Console.Write("Running .NET tests ... "); double totsec_net = 0; for (int id = 0; id < automata.Length; id++) { DotNetTest.Compile(regexes[id]); //make sure each regex is precompiled double sec_net; int accepted2 = DotNetTest.Test(true, members[id], out sec_net); totsec_net += sec_net; int Caccepted2 = DotNetTest.Test(false, Cmembers[id], out sec_net); totsec_net += sec_net; } double bps_net = bits / totsec_net; double mbps_net = (bps_net / 1000000.0); int Mbps_net = (int)Math.Round(mbps_net); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_net, Mbps_net)); #endregion Console.WriteLine(string.Format("speedup (.NET-time/c++-time) = {0}X", ((int)Math.Round(totsec_net / totsec_cpp)))); }
public void gen_chsarp_TestSampleRegexes2csharp() { var solver = new CharSetSolver(BitWidth.BV16); List<string> regexesAll = new List<string>(File.ReadAllLines(regexesFile)); List<int> timedout = new List<int>(); List<int> excluded = new List<int>(new int[] { 36, 50, 64, 65, 162, 166, 210, 238, 334, 355, 392, 455, 471, 490, 594, 611, 612, 671, 725, 731, 741, 760, 775, 800, 852, 870, 873, 880, 882, 893, 923, 991, 997, 1053, 1062, 1164, 1220, 1228, 1273, 1318, 1339, 1352, 1386, 1404, 1413, 1414, 1423, 1424, 1429, 1431, 1434, 1482, 1487, 1516, 1517, 1518, 1519, 1520, 1537, 1565, 1566, 1635, 1744, 1749, 1829, 1868 }); List<string> regexes = new List<string>(); for (int i = 1; i < regexesAll.Count; i++) if (!excluded.Contains(i)) regexes.Add(regexesAll[i]); int K = 50; //number of pos/neg strings to be generated for each regex for (int i = 1; i < 100; i++) { try { var regex = regexes[i]; var aut = solver.Convert(regex, RegexOptions.Singleline); var autDet = aut.Determinize(2000); var autMin = autDet.Minimize(); var autMinC = aut.Complement(); if (autMin.IsEmpty || autMinC.IsEmpty) continue; CheckIsClean(autMin); //var autMinExpr = z3.ConvertAutomatonGuardsToExpr(autMin); //var sfa = new SFA<FuncDecl, Expr, Sort>(z3, z3.CharacterSort, autMinExpr); //var stbb = new STBuilder<FuncDecl, Expr, Sort>(z3); //var st = ST<FuncDecl, Expr, Sort>.SFAtoST(sfa); //var stb = st.ToSTb(); ////var csAcceptor = stb.Compile("RegexTransfomer", "SampleAcceptor", false, true); var csAcceptor = solver.ToCS(autMin); HashSet<string> posSamples = new HashSet<string>(); HashSet<string> negSamples = new HashSet<string>(); int k = autMin.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut = solver.Convert("^.{0," + (3 * k) + "}$").Determinize().Minimize(); int tries = 0; var aut1 = autMin.Intersect(maxLengthAut); while (posSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(aut1); if (!s.EndsWith("\n")) if (!posSamples.Add(s)) tries++; } tries = 0; int k2 = autMinC.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut2 = solver.Convert("^.{0," + (3 * k2) + "}$").Determinize().Minimize(); var autMinCprefix = autMinC.Intersect(maxLengthAut2); while (negSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(autMinCprefix); if (!s.EndsWith("\n")) if (!negSamples.Add(s)) tries++; } foreach (string s in posSamples) { if (!RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("match expected regex:" + i); break; } if (!csAcceptor.IsMatch(s)) { Console.WriteLine("match expected regex:" + i); break; } } foreach (string s in negSamples) { if (RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("mismatch expected regex:" + i); break; } if (csAcceptor.IsMatch(s)) { Console.WriteLine("mismatch expected regex:" + i); break; } } } catch (TimeoutException) { Console.WriteLine("timeout regex:" + i); timedout.Add(i); continue; } } }
public void gen_chsarp_TestSampleRegexes2csharp() { var solver = new CharSetSolver(BitWidth.BV16); List <string> regexesAll = new List <string>(File.ReadAllLines(regexesFile)); List <int> timedout = new List <int>(); List <int> excluded = new List <int>(new int[] { 36, 50, 64, 65, 162, 166, 210, 238, 334, 355, 392, 455, 471, 490, 594, 611, 612, 671, 725, 731, 741, 760, 775, 800, 852, 870, 873, 880, 882, 893, 923, 991, 997, 1053, 1062, 1164, 1220, 1228, 1273, 1318, 1339, 1352, 1386, 1404, 1413, 1414, 1423, 1424, 1429, 1431, 1434, 1482, 1487, 1516, 1517, 1518, 1519, 1520, 1537, 1565, 1566, 1635, 1744, 1749, 1829, 1868 }); List <string> regexes = new List <string>(); for (int i = 1; i < regexesAll.Count; i++) { if (!excluded.Contains(i)) { regexes.Add(regexesAll[i]); } } int K = 10; //number of pos/neg strings to be generated for each regex for (int i = 1; i < 100; i++) { try { var regex = regexes[i]; var aut = solver.Convert(regex, RegexOptions.Singleline); var autDet = aut.Determinize(2000); var autMin = autDet.Minimize(); var autMinC = aut.Complement(); if (autMin.IsEmpty || autMinC.IsEmpty || autMinC.IsEpsilon) { continue; } CheckIsClean(autMin); //var autMinExpr = z3.ConvertAutomatonGuardsToExpr(autMin); //var sfa = new SFA<FuncDecl, Expr, Sort>(z3, z3.CharacterSort, autMinExpr); //var stbb = new STBuilder<FuncDecl, Expr, Sort>(z3); //var st = ST<FuncDecl, Expr, Sort>.SFAtoST(sfa); //var stb = st.ToSTb(); ////var csAcceptor = stb.Compile("RegexTransfomer", "SampleAcceptor", false, true); var csAcceptor = solver.ToCS(autMin); HashSet <string> posSamples = new HashSet <string>(); HashSet <string> negSamples = new HashSet <string>(); int k = autMin.FindShortestFinalPath(autMin.InitialState).Item1.Length; var maxLengthAut = solver.Convert("^.{0," + (3 * k) + "}$").Determinize().Minimize(); int tries = 0; var aut1 = autMin.Intersect(maxLengthAut); while (posSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(aut1); if (!s.EndsWith("\n")) { if (!posSamples.Add(s)) { tries++; } } } tries = 0; int k2 = autMinC.FindShortestFinalPath(autMinC.InitialState).Item1.Length; var maxLengthAut2 = solver.Convert("^.{0," + (3 * k2) + "}$").Determinize().Minimize(); var autMinCprefix = autMinC.Intersect(maxLengthAut2); while (negSamples.Count < K && tries < 10 * K) { var s = solver.GenerateMemberUniformly(autMinCprefix); if (!s.EndsWith("\n")) { if (!negSamples.Add(s)) { tries++; } } } foreach (string s in posSamples) { if (!RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("match expected regex:" + i); break; } if (!csAcceptor.IsMatch(s)) { Console.WriteLine("match expected regex:" + i); break; } } foreach (string s in negSamples) { if (RexEngine.IsMatch(s, regex, RegexOptions.Singleline)) { Console.WriteLine("mismatch expected regex:" + i); break; } if (csAcceptor.IsMatch(s)) { Console.WriteLine("mismatch expected regex:" + i); break; } } } catch (TimeoutException) { Console.WriteLine("timeout regex:" + i); timedout.Add(i); continue; } } }
//private static void TestIgnoreCase() //{ // Microsoft.Automata.Internal.Utilities.IgnoreCaseRelationGenerator.Generate( // "Microsoft.Automata.Internal.Generated", // "IgnoreCaseRelation", // @"C:\GitHub\AutomataDotNet\Automata\src\Automata\Internal\Generated"); //} static void TestCppCodeGen(Regex[] regexes) { Automaton<BDD>[] automata = new Automaton<BDD>[regexes.Length]; Automaton<BDD>[] Cautomata = new Automaton<BDD>[regexes.Length]; var solver = new CharSetSolver(); #region convert the regexes to automata Console.Write("Converting {0} regexes to automata and minimizing the automata ...", regexes.Length); int t = System.Environment.TickCount; Func<Automaton<BDD>, bool> IsFull = (a => a.StateCount == 1 && a.IsFinalState(a.InitialState) && a.IsLoopState(a.InitialState) && a.GetMovesCountFrom(a.InitialState) == 1 && a.GetMoveFrom(a.InitialState).Label.Equals(solver.True)); for (int i = 0; i < regexes.Length; i++) { try { var aut = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); automata[i] = aut; if (IsFull(automata[i]) || automata[i].IsEmpty) { Console.WriteLine("\nReplacing trivial regex \"{0}\" with \"^dummy$\"", i, regexes[i]); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } catch (Exception e) { Console.WriteLine("\nCoverting regex {0}: '{1}' failed, reason: {2}, replacing with \"^dummy$\"", i, regexes[i], e.Message); regexes[i] = new Regex("^dummy$"); automata[i] = CppCodeGenerator.Regex2Automaton(solver, regexes[i]); } } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region complement the automata t = System.Environment.TickCount; Console.Write("Creating complements of autmata ..."); for (int i = 0; i < regexes.Length; i++) { Cautomata[i] = automata[i].Complement().Minimize(); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate positive test strings Console.Write(string.Format("Generating a positive test set for all automata ", NrOfStrings)); t = System.Environment.TickCount; List<string[]> members = new List<string[]>(); List<string[]> Cmembers = new List<string[]>(); for (int id = 0; id < automata.Length; id++) { Console.Write("."); var M = automata[id].Intersect(solver.Convert("^[\0-\x7F]{0," + CodeGenTests.MaxStringLength + "}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; int time = System.Environment.TickCount; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } time = System.Environment.TickCount - time; members.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate negative test strings t = System.Environment.TickCount; Console.Write(string.Format("Generating a negative test set for all automata ", NrOfStrings)); for (int id = 0; id < Cautomata.Length; id++) { Console.Write("."); //var M = Cautomata[id].Intersect(solver.Convert("^[^\uD800-\uDFFF]{0,100}$", RegexOptions.Singleline), solver).Determinize(solver); var M = Cautomata[id].Intersect(solver.Convert("^[\0-\uFFFF]{0,100}$", RegexOptions.Singleline)).Determinize(); var tmp = new string[NrOfStrings]; for (int i = 0; i < NrOfStrings; i++) { tmp[i] = solver.GenerateMemberUniformly(M); //if (i % 10 == 0) // Console.Write("."); } Cmembers.Add(tmp); } t = System.Environment.TickCount - t; Console.WriteLine(string.Format(" done ({0}ms)", t)); #endregion #region generate c++ int t2 = System.Environment.TickCount; CppTest.Compile(automata, solver, true); t2 = System.Environment.TickCount - t2; Console.WriteLine(string.Format(" done ({0}ms)", t2)); #endregion #region convert the test strings to UTF8 List<byte[][]> membersUTF8 = new List<byte[][]>(); List<byte[][]> CmembersUTF8 = new List<byte[][]>(); for (int id = 0; id < automata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) tmp[i] = Encoding.UTF8.GetBytes(members[id][i]); membersUTF8.Add(tmp); } for (int id = 0; id < Cautomata.Length; id++) { var tmp = new byte[NrOfStrings][]; for (int i = 0; i < NrOfStrings; i++) tmp[i] = Encoding.UTF8.GetBytes(Cmembers[id][i]); CmembersUTF8.Add(tmp); } #endregion #region compute tot nr of bits double bits = 0; for (int id = 0; id < automata.Length; id++) { int nrBytes = 0; for (int i = 0; i < NrOfStrings; i++) nrBytes += membersUTF8[id][i].Length + CmembersUTF8[id][i].Length; bits += (nrBytes * 8.0); } bits = bits * CodeGenTests.Repetitions; //repeated Reps times #endregion #region run c++ tests Console.Write("Running c++ tests ... "); double totsec_cpp = 0; for (int id = 0; id < automata.Length; id++) { double sec_cpp = 0; int accepted = CppTest.Test(true, id, membersUTF8[id], members[id], out sec_cpp); totsec_cpp += sec_cpp; int Caccepted = CppTest.Test(false, id, CmembersUTF8[id], Cmembers[id], out sec_cpp); totsec_cpp += sec_cpp; } double bps_cpp = bits / totsec_cpp; double mbps_cpp = (bps_cpp / 1000000.0); int Mbps_cpp = (int)Math.Round(mbps_cpp); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_cpp, Mbps_cpp)); #endregion #region run .NET tests Console.Write("Running .NET tests ... "); double totsec_net = 0; for (int id = 0; id < automata.Length; id++) { DotNetTest.Compile(regexes[id]); //make sure each regex is precompiled double sec_net; int accepted2 = DotNetTest.Test(true, members[id], out sec_net); totsec_net += sec_net; int Caccepted2 = DotNetTest.Test(false, Cmembers[id], out sec_net); totsec_net += sec_net; } double bps_net = bits / totsec_net; double mbps_net = (bps_net / 1000000.0); int Mbps_net = (int)Math.Round(mbps_net); Console.WriteLine(string.Format("{0}sec, throughput = {1}Mbps", totsec_net, Mbps_net)); #endregion Console.WriteLine(string.Format("speedup (.NET-time/c++-time) = {0}X", ((int)Math.Round(totsec_net / totsec_cpp)))); }