public void TestWS1S_GetAutomatonBDD_eq_GetAutomaton() { var solver = new CharSetSolver(BitWidth.BV7); //var nrOfLabelBits = (int)BitWidth.BV7; var isDigit = solver.MkCharSetFromRegexCharClass(@"\d"); var isLetter = solver.MkCharSetFromRegexCharClass(@"(c|C)"); var x = new Variable("x", false); var y = new Variable("y", false); var z = new Variable("z", false); var X = new Variable("X", false); //there are at least two distinct positions x and y var xy = new MSOAnd <BDD>(new MSONot <BDD>(new MSOEq <BDD>(x, y)), new MSOAnd <BDD>(new MSOIsSingleton <BDD>(x), new MSOIsSingleton <BDD>(y))); //there is a set X containing x and y and all positions z in X have characters that satisfy isWordLetter var x_sub_X = new MSOSubset <BDD>(x, X); var y_sub_X = new MSOSubset <BDD>(y, X); var z_sub_X = new MSOSubset <BDD>(z, X); var isletter_z = new MSOPredicate <BDD>(isLetter, z); var psi = new MSOExists <BDD>(X, (x_sub_X & y_sub_X & ~(new MSOExists <BDD>(z, ~((~((new MSOIsSingleton <BDD>(z)) & z_sub_X)) | isletter_z))))); var atLeast2w = xy & psi; var atLeast2wEE = new MSOExists <BDD>(x, (new MSOExists <BDD>(y, atLeast2w))); var autBDD = atLeast2w.GetAutomaton(solver); var ca = new CartesianAlgebraBDD <BDD>(solver); var autPROD = atLeast2w.GetAutomaton(ca); //autBDD.ShowGraph("autBDD"); //autPROD.ShowGraph("autPROD"); var aut_atLeast2wEE1 = BasicAutomata.Restrict(atLeast2wEE.GetAutomaton(ca)); var aut_atLeast2wEE2 = atLeast2wEE.GetAutomaton(solver); //aut_atLeast2wEE1.ShowGraph("aut_atLeast2wEE1"); //aut_atLeast2wEE2.ShowGraph("aut_atLeast2wEE2"); Assert.IsTrue(aut_atLeast2wEE1.IsEquivalentWith(aut_atLeast2wEE2)); }
public void TestWS1S_UseOfCharRangePreds_BDD() { var solver = new CharSetSolver(BitWidth.BV7); var isDigit = solver.MkCharSetFromRegexCharClass(@"\d"); var isWordLetter = solver.MkCharSetFromRegexCharClass(@"\w"); TestWS1S_UseOfCharRangePreds<BDD>(solver, isDigit, isWordLetter, solver.RegexConverter); }
public void TestWS1S_GetAutomatonBDD_eq_GetAutomaton() { var solver = new CharSetSolver(BitWidth.BV7); var nrOfLabelBits = (int)BitWidth.BV7; var isDigit = solver.MkCharSetFromRegexCharClass(@"\d"); var isLetter = solver.MkCharSetFromRegexCharClass(@"(c|C)"); var x = new WS1SVariable <BDD>("x"); var y = new WS1SVariable <BDD>("y"); var z = new WS1SVariable <BDD>("z"); var X = new WS1SVariable <BDD>("X"); //there are at least two distinct positions x and y var xy = (x != y) & !x & !y; //there is a set X containing x and y and all positions z in X have characters that satisfy isWordLetter var psi = X ^ ((x <= X) & (y <= X) & ~(z ^ ~(~(!z & z <= X) | isLetter % z))); var atLeast2w = xy & psi; var atLeast2wEE = x ^ (y ^ atLeast2w); var autBDD = atLeast2w.GetAutomatonBDD(solver, nrOfLabelBits, x, y); var ca = new CartesianAlgebraBDD <BDD>(solver); var autPROD = atLeast2w.GetAutomaton(ca, x, y); //autBDD.ShowGraph("autBDD"); //autPROD.ShowGraph("autPROD"); var aut_atLeast2wEE1 = BasicAutomata.Restrict(atLeast2wEE.GetAutomaton(ca)); var aut_atLeast2wEE2 = atLeast2wEE.GetAutomatonBDD(solver, nrOfLabelBits); //aut_atLeast2wEE1.ShowGraph("aut_atLeast2wEE1"); //aut_atLeast2wEE2.ShowGraph("aut_atLeast2wEE2"); Assert.IsTrue(aut_atLeast2wEE1.IsEquivalentWith(aut_atLeast2wEE2, solver)); }
public void TestCardinality3() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\w-[\d]]"); int cnt = cond.CountNodes(); Pair <uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); int size = (int)solver.ComputeDomainSize(set); int expected = 0; foreach (var range in ranges) { expected += ((int)(range.Second - range.First) + 1); } Assert.AreEqual <int>(expected, size); int wCnt = 0; for (int i = 0; i <= 0xFFFF; i++) { int cat = (int)char.GetUnicodeCategory((char)i); if (cat == 0 || cat == 1 || cat == 2 || cat == 3 || cat == 4 || cat == 5 || cat == 8 || cat == 18) //same as \w in regex { if (!char.IsDigit((char)i)) { wCnt += 1; } } } Assert.AreEqual <int>(wCnt, size); }
public void TestCardinality() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"\d"); int cnt = cond.CountNodes(); Pair <uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); int size = (int)solver.ComputeDomainSize(set); int expected = 0; foreach (var range in ranges) { expected += ((int)(range.Second - range.First) + 1); } Assert.AreEqual <int>(expected, size); int digitCnt = 0; for (int i = 0; i <= 0xFFFF; i++) { if (char.IsDigit(((char)i))) { digitCnt += 1; } } Assert.AreEqual <int>(digitCnt, size); }
public void TestRanges3() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"\d"); int cnt = cond.CountNodes(); Pair <uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); var ranges2 = new List <Pair <uint, uint> >(ranges); ranges2.Reverse(); BDD set2 = solver.MkCharSetFromRanges(ranges2); int nodes2 = set.CountNodes(); var ranges3 = solver.ToRanges(set2); BDD set3 = solver.MkCharSetFromRanges(ranges3); int cnt2 = set2.CountNodes(); int cnt3 = set3.CountNodes(); Assert.IsTrue(set2 == set3); Assert.AreEqual <int>(nodes, nodes2); Assert.AreSame(set, set2); set.ToDot("digits.dot"); //check equivalence bool equiv = solver.MkOr(solver.MkAnd(cond, solver.MkNot(set)), solver.MkAnd(set, solver.MkNot(cond))) == solver.False; Assert.AreEqual <int>(31, ranges.Length); }
public void TestNodeCount() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\x00-\u7FFF]"); int cnt = cond.CountNodes(); Assert.AreEqual <int>(3, cnt); }
public void TestSymbolicRegex_Restrict() { CharSetSolver solver = new CharSetSolver(); var regex = new Regex("^([5-8]|[d-g]+)+([a-k]|()|[1-9][1-9])(?(d)[de]|f)(?([a-k])[de]|f)def[a-g]*(e|8)+$"); var sr = solver.RegexConverter.ConvertToSymbolicRegex(regex, true); var sr1 = sr.Restrict(solver.MkCharSetFromRegexCharClass("[d-x0-8]")); Assert.IsTrue(sr1.ToString() == "^([5-8]|[d-g]+)+(()|[1-8][1-8]|[d-k])(?(d)[de]|f)(?([d-k])[de]|f)def[d-g]*[8e]+$"); }
public void TestLargeRange() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\u0000-\u7FFF]"); int elems = (int)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual <int>(3, nodes); Assert.AreEqual <int>((1 << 15), elems); }
public void TestLargeRange2() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\u0000-\u7FFF\uA000-\uA00F]"); uint elems = (uint)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual <int>(14, nodes); var ranges = solver.ToRanges(cond); Assert.AreEqual <int>(2, ranges.Length); Assert.AreEqual <uint>(ranges[0].First, 0); Assert.AreEqual <uint>(ranges[0].Second, 0x7FFF); Assert.AreEqual <uint>(ranges[1].First, 0xA000); Assert.AreEqual <uint>(ranges[1].Second, 0xA00F); Assert.AreEqual <uint>(((uint)1 << 15) + ((uint)1 << 4), elems); }
public void TestSurrogateRange() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); //high and low surrogate pair elements BDD cond = solver.MkCharSetFromRegexCharClass(@"\p{Cs}"); cond.ToDot("surr.dot"); int elems = (int)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual <int>(7, nodes); //highly compact BDD representation var ranges = solver.ToRanges(cond); Assert.AreEqual <int>(1, ranges.Length); Assert.AreEqual <uint>(ranges[0].First, 0xd800); Assert.AreEqual <uint>(ranges[0].Second, 0xdFFF); //the total number of surrogates (there are 1024 low surrogates and 1024 high surrogates) Assert.AreEqual <int>(2048, elems); }
public void TestRanges2b() { BitWidth enc = BitWidth.BV16; CharSetSolver solver = new CharSetSolver(enc); BDD cond = solver.MkCharSetFromRegexCharClass(@"\w"); var ranges1 = solver.ToRanges(cond); var cond1 = solver.MkCharSetFromRanges(ranges1); Tuple <uint, uint>[] ranges = solver.ToRanges(cond1); var cond2 = solver.MkCharSetFromRanges(ranges); Assert.AreSame(cond1, cond2); Assert.AreSame(cond, cond1); //cond.ToDot("cond.dot"); Assert.AreEqual <uint>((uint)'0', ranges[0].Item1); Assert.AreEqual <uint>((uint)'9', ranges[0].Item2); Assert.AreEqual <uint>((uint)'A', ranges[1].Item1); Assert.AreEqual <uint>((uint)'Z', ranges[1].Item2); Assert.AreEqual <uint>((uint)'_', ranges[2].Item1); Assert.AreEqual <uint>((uint)'_', ranges[2].Item2); Assert.AreEqual <uint>((uint)'a', ranges[3].Item1); Assert.AreEqual <uint>((uint)'z', ranges[3].Item2); }
public void TestRanges2() { BitWidth enc = BitWidth.BV7; CharSetSolver solver = new CharSetSolver(enc); BDD cond = solver.MkCharSetFromRegexCharClass(@"\w"); int nodes = cond.CountNodes(); Pair <uint, uint>[] ranges = solver.ToRanges(cond); BDD cond2 = solver.MkCharSetFromRanges(ranges); Assert.AreSame(cond, cond2); int nodes2 = cond2.CountNodes(); Assert.AreEqual <uint>((uint)'0', ranges[0].First); Assert.AreEqual <uint>((uint)'9', ranges[0].Second); Assert.AreEqual <uint>((uint)'A', ranges[1].First); Assert.AreEqual <uint>((uint)'Z', ranges[1].Second); Assert.AreEqual <uint>((uint)'_', ranges[2].First); Assert.AreEqual <uint>((uint)'_', ranges[2].Second); Assert.AreEqual <uint>((uint)'a', ranges[3].First); Assert.AreEqual <uint>((uint)'z', ranges[3].Second); Assert.AreEqual <int>(4, ranges.Length); }
public void TestSurrogateRange() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); //high and low surrogate pair elements BDD cond = solver.MkCharSetFromRegexCharClass(@"\p{Cs}"); cond.ToDot("surr.dot"); int elems = (int)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual<int>(7, nodes); //highly compact BDD representation var ranges = solver.ToRanges(cond); Assert.AreEqual<int>(1, ranges.Length); Assert.AreEqual<uint>(ranges[0].First, 0xd800); Assert.AreEqual<uint>(ranges[0].Second, 0xdFFF); //the total number of surrogates (there are 1024 low surrogates and 1024 high surrogates) Assert.AreEqual<int>(2048, elems); }
public void TestRanges3() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"\d"); int cnt = cond.CountNodes(); Pair<uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); var ranges2 = new List<Pair<uint, uint>>(ranges); ranges2.Reverse(); BDD set2 = solver.MkCharSetFromRanges(ranges2); int nodes2 = set.CountNodes(); var ranges3 = solver.ToRanges(set2); BDD set3 = solver.MkCharSetFromRanges(ranges3); int cnt2 = set2.CountNodes(); int cnt3 = set3.CountNodes(); Assert.IsTrue(set2 == set3); Assert.AreEqual<int>(nodes, nodes2); Assert.AreSame(set,set2); set.ToDot("digits.dot"); //check equivalence bool equiv = solver.MkOr(solver.MkAnd(cond, solver.MkNot(set)), solver.MkAnd(set, solver.MkNot(cond))) == solver.False; Assert.AreEqual<int>(31, ranges.Length); }
public void TestRanges2b() { BitWidth enc = BitWidth.BV16; CharSetSolver solver = new CharSetSolver(enc); BDD cond = solver.MkCharSetFromRegexCharClass(@"\w"); var ranges1 = solver.ToRanges(cond); var cond1 = solver.MkCharSetFromRanges(ranges1); Pair<uint, uint>[] ranges = solver.ToRanges(cond1); var cond2 = solver.MkCharSetFromRanges(ranges); Assert.AreSame(cond1, cond2); Assert.AreSame(cond, cond1); //cond.ToDot("cond.dot"); Assert.AreEqual<uint>((uint)'0', ranges[0].First); Assert.AreEqual<uint>((uint)'9', ranges[0].Second); Assert.AreEqual<uint>((uint)'A', ranges[1].First); Assert.AreEqual<uint>((uint)'Z', ranges[1].Second); Assert.AreEqual<uint>((uint)'_', ranges[2].First); Assert.AreEqual<uint>((uint)'_', ranges[2].Second); Assert.AreEqual<uint>((uint)'a', ranges[3].First); Assert.AreEqual<uint>((uint)'z', ranges[3].Second); Assert.AreEqual<int>(426, ranges.Length); }
public void TestRanges2() { BitWidth enc = BitWidth.BV7; CharSetSolver solver = new CharSetSolver(enc); BDD cond = solver.MkCharSetFromRegexCharClass(@"\w"); int nodes = cond.CountNodes(); Pair<uint, uint>[] ranges = solver.ToRanges(cond); BDD cond2 = solver.MkCharSetFromRanges(ranges); Assert.AreSame(cond, cond2); int nodes2 = cond2.CountNodes(); Assert.AreEqual<uint>((uint)'0', ranges[0].First); Assert.AreEqual<uint>((uint)'9', ranges[0].Second); Assert.AreEqual<uint>((uint)'A', ranges[1].First); Assert.AreEqual<uint>((uint)'Z', ranges[1].Second); Assert.AreEqual<uint>((uint)'_', ranges[2].First); Assert.AreEqual<uint>((uint)'_', ranges[2].Second); Assert.AreEqual<uint>((uint)'a', ranges[3].First); Assert.AreEqual<uint>((uint)'z', ranges[3].Second); Assert.AreEqual<int>(4, ranges.Length); }
public void TestNodeCount() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\x00-\u7FFF]"); int cnt = cond.CountNodes(); Assert.AreEqual<int>(3, cnt); }
public void TestLargeRange2() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\u0000-\u7FFF\uA000-\uA00F]"); uint elems = (uint)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual<int>(14, nodes); var ranges = solver.ToRanges(cond); Assert.AreEqual<int>(2, ranges.Length); Assert.AreEqual<uint>(ranges[0].First, 0); Assert.AreEqual<uint>(ranges[0].Second, 0x7FFF); Assert.AreEqual<uint>(ranges[1].First, 0xA000); Assert.AreEqual<uint>(ranges[1].Second, 0xA00F); Assert.AreEqual<uint>(((uint)1 << 15) + ((uint)1 << 4), elems); }
public void TestLargeRange() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\u0000-\u7FFF]"); int elems = (int)solver.ComputeDomainSize(cond); int nodes = cond.CountNodes(); Assert.AreEqual<int>(3, nodes); Assert.AreEqual<int>((1 << 15), elems); }
public void TestCardinality3() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"[\w-[\d]]"); int cnt = cond.CountNodes(); Pair<uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); int size = (int)solver.ComputeDomainSize(set); int expected = 0; foreach (var range in ranges) expected += ((int)(range.Second - range.First) + 1); Assert.AreEqual<int>(expected, size); int wCnt = 0; for (int i = 0; i <= 0xFFFF; i++) { int cat = (int)char.GetUnicodeCategory((char)i); if (cat == 0 || cat == 1 || cat == 2 || cat == 3 || cat == 4 || cat == 5 || cat == 8 || cat == 18) //same as \w in regex if (!char.IsDigit((char)i)) wCnt += 1; } Assert.AreEqual<int>(wCnt, size); }
public void TestCardinality() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRegexCharClass(@"\d"); int cnt = cond.CountNodes(); Pair<uint, uint>[] ranges = solver.ToRanges(cond); BDD set = solver.MkCharSetFromRanges(ranges); int nodes = set.CountNodes(); int size = (int)solver.ComputeDomainSize(set); int expected = 0; foreach (var range in ranges) expected += ((int)(range.Second - range.First) + 1); Assert.AreEqual<int>(expected, size); int digitCnt = 0; for (int i = 0; i <= 0xFFFF; i++) { if (char.IsDigit(((char)i))) digitCnt += 1; } Assert.AreEqual<int>(digitCnt, size); }
public void ConvertUTF16BDDtoUTF8Test_Helper(string testClass) { var css = new CharSetSolver(); var bdd = css.MkCharSetFromRegexCharClass(testClass); var ascii = bdd & css.MkCharSetFromRange('\0', '\x7F'); var onebyte_encodings = bdd & ascii; var threebyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract3ByteUTF8Encodings(bdd); var twobyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract2ByteUTF8Encodings(bdd); HashSet <Sequence <byte> > utf8_encoding_actual = new HashSet <Sequence <byte> >(); foreach (var c in css.GenerateAllCharacters(onebyte_encodings)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)c)); } List <Move <BDD> > moves = new List <Move <BDD> >(); int q = 2; moves.Add(Move <BDD> .Create(0, 1, onebyte_encodings)); for (int i = 0; i < twobyte_encodings.Length; i += 1) { moves.Add(Move <BDD> .Create(0, q, twobyte_encodings[i].Item1)); moves.Add(Move <BDD> .Create(q, 1, twobyte_encodings[i].Item2)); q += 1; foreach (var first_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item1)) { foreach (var second_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item2)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte)); } } } foreach (var triple in threebyte_encodings) { foreach (var pair in triple.Item2) { moves.Add(Move <BDD> .Create(0, q, triple.Item1)); moves.Add(Move <BDD> .Create(q, q + 1, pair.Item1)); moves.Add(Move <BDD> .Create(q + 1, 1, pair.Item2)); q += 2; foreach (var first_byte in css.GenerateAllCharacters(triple.Item1)) { foreach (var second_byte in css.GenerateAllCharacters(pair.Item1)) { foreach (var third_byte in css.GenerateAllCharacters(pair.Item2)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte, (byte)third_byte)); } } } } } HashSet <Sequence <byte> > utf8_encoding_expected = new HashSet <Sequence <byte> >(); for (int i = 0; i <= 0xFFFF; i++) { char c = (char)i; if (!char.IsSurrogate(c)) { if (Regex.IsMatch(c.ToString(), "^" + testClass + "$")) { var bytes = new Sequence <byte>(System.Text.UnicodeEncoding.UTF8.GetBytes(new char[] { c })); utf8_encoding_expected.Add(bytes); } } } //Automaton<BDD> aut = Automaton<BDD>.Create(css, 0, new int[] { 1 }, moves).Determinize().Minimize(); //aut.ShowGraph(); bool encoding_ok = utf8_encoding_expected.IsSubsetOf(utf8_encoding_actual) && utf8_encoding_actual.IsSubsetOf(utf8_encoding_expected); Assert.IsTrue(encoding_ok, "incorrectly ecoded character class: " + testClass); }