public static Tuple <BDD, Tuple <BDD, BDD>[]>[] Extract3ByteUTF8Encodings(BDD set) { var alg = set.algebra; CharSetSolver css = alg as CharSetSolver; if (css == null) { throw new AutomataException(AutomataExceptionKind.NotSupported); } var surrogates = css.MkCharSetFromRange('\uD800', '\uDFFF'); var threebyterange = css.MkCharSetFromRange('\u0800', '\uFFFF').Diff(surrogates); var uptoFF = css.MkCharSetFromRange('\0', '\xFF'); var set3 = set & threebyterange; var lowerpartition = set3.Partition(11); var b5 = alg.MkBitTrue(5); var b6 = alg.MkBitTrue(6); var b7 = alg.MkBitTrue(7); var b4_false = alg.MkBitFalse(4); var b6_false = alg.MkBitFalse(6); var start_mask = b7 & b6 & b5 & b4_false & uptoFF; var val_mask = b7 & b6_false & uptoFF; var partition = Array.ConvertAll(lowerpartition, x => new Tuple <BDD, Tuple <BDD, BDD>[]>( css.OmitBitsAbove(x.Item2 >> 12, 4) & start_mask, Array.ConvertAll <Tuple <BDD, BDD>, Tuple <BDD, BDD> >(x.Item1.Partition(5), y => new Tuple <BDD, BDD>(css.OmitBitsAbove(y.Item2 >> 6, 6) & val_mask, y.Item1 & val_mask)) )); return(partition); }
public static Tuple <BDD, BDD>[] Extract2ByteUTF8Encodings(BDD set) { CharSetSolver css = set.algebra as CharSetSolver; if (css == null) { throw new AutomataException(AutomataExceptionKind.NotSupported); } var twobyterange = css.MkCharSetFromRange('\x80', '\u07FF'); var uptoFF = css.MkCharSetFromRange('\0', '\xFF'); var b6 = set.algebra.MkBitTrue(6); var b7 = set.algebra.MkBitTrue(7); var b5_false = set.algebra.MkBitFalse(5); var b6_false = set.algebra.MkBitFalse(6); var byte1_mask = b7 & b6 & b5_false & uptoFF; var byte2_mask = b7 & b6_false & uptoFF; var d2 = set & twobyterange; var partition = d2.Partition(5); var res = Array.ConvertAll(partition, x => new Tuple <BDD, BDD>(css.OmitBitsAbove(x.Item2 >> 6, 5) & byte1_mask, x.Item1 & byte2_mask)); return(res); }
private static Dictionary<char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver) { var ignoreCase = new Dictionary<char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c); char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c); if (c != cU || c != cL || cU != cL) { //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. //These characters are: //c=\xB5,cU=\u039C //c=\u0131,cU=I //c=\u017F,cU=S //c=\u0345,cU=\u0399 //c=\u03C2,cU=\u03A3 //c=\u03D0,cU=\u0392 //c=\u03D1,cU=\u0398 //c=\u03D5,cU=\u03A6 //c=\u03D6,cU=\u03A0 //c=\u03F0,cU=\u039A //c=\u03F1,cU=\u03A1 //c=\u03F5,cU=\u0395 //c=\u1E9B,cU=\u1E60 //c=\u1FBE,cU=\u0399 if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) equiv = equiv.Or(ignoreCase[c]); if (ignoreCase.ContainsKey(cU)) equiv = equiv.Or(ignoreCase[cU]); if (ignoreCase.ContainsKey(cL)) equiv = equiv.Or(ignoreCase[cL]); equiv = equiv.Or(solver.MkCharSetFromRange(c, c)).Or(solver.MkCharSetFromRange(cU, cU)).Or(solver.MkCharSetFromRange(cL, cL)); foreach (char d in solver.GenerateAllCharacters(equiv)) ignoreCase[d] = equiv; } //else //{ // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU); // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU)); //} } } return ignoreCase; }
/// <summary> /// Each transition has the form int[]{fromState, intervalStart, intervalEnd, toState}. /// If intervalStart = intervalEnd = -1 then this is an epsilon move. /// </summary> public static Automaton<BDD> ReadFromRanges(CharSetSolver solver, int initialState, int[] finalStates, IEnumerable<int[]> transitions) { var moves = new Dictionary<Pair<int, int>, BDD>(); var allmoves = new List<Move<BDD>>(); int[] finals = finalStates; foreach (var elems in transitions) { var key = new Pair<int, int>(elems[0], elems[3]); if (elems[1] == -1) allmoves.Add(Move<BDD>.Epsilon(elems[0], elems[3])); else { var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]); if (moves.ContainsKey(key)) moves[key] = solver.MkOr(moves[key], pred); else moves[key] = pred; } } foreach (var kv in moves) allmoves.Add(Move<BDD>.Create(kv.Key.First, kv.Key.Second, kv.Value)); var aut = Automaton<BDD>.Create(solver, initialState, finals, allmoves); return aut; }
/// <summary> /// Each transition has the form int[]{fromState, intervalStart, intervalEnd, toState}. /// If intervalStart = intervalEnd = -1 then this is an epsilon move. /// </summary> public static Automaton <BDD> ReadFromRanges(CharSetSolver solver, int initialState, int[] finalStates, IEnumerable <int[]> transitions) { var moves = new Dictionary <Pair <int, int>, BDD>(); var allmoves = new List <Move <BDD> >(); int[] finals = finalStates; foreach (var elems in transitions) { var key = new Pair <int, int>(elems[0], elems[3]); if (elems[1] == -1) { allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3])); } else { var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]); if (moves.ContainsKey(key)) { moves[key] = solver.MkOr(moves[key], pred); } else { moves[key] = pred; } } } foreach (var kv in moves) { allmoves.Add(Move <BDD> .Create(kv.Key.First, kv.Key.Second, kv.Value)); } var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves); return(aut); }
public static Automaton<BDD> ReadFromString(CharSetSolver solver, string automaton) { var lines = automaton.Split(new char[] { '\n','\r' }, StringSplitOptions.RemoveEmptyEntries); int initialState = int.Parse(lines[0]); var moves = new Dictionary<Pair<int, int>, BDD>(); var allmoves = new List<Move<BDD>>(); int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s)); for (int i = 2; i < lines.Length; i++) { int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s)); var key = new Pair<int, int>(elems[0], elems[3]); if (elems[1] == -1) allmoves.Add(Move<BDD>.Epsilon(elems[0], elems[3])); else { var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]); if (moves.ContainsKey(key)) moves[key] = solver.MkOr(moves[key], pred); else moves[key] = pred; } } foreach (var kv in moves) allmoves.Add(Move<BDD>.Create(kv.Key.First, kv.Key.Second, kv.Value)); var aut = Automaton<BDD>.Create(solver, initialState, finals, allmoves); return aut; }
private static void CreateUlongArray(StreamWriter sw) { sw.WriteLine("/// <summary>"); sw.WriteLine("/// Serialized BDD for mapping characters to their case-ignoring equivalence classes."); sw.WriteLine("/// </summary>"); sw.WriteLine("public static ulong[] ignorecase = new ulong[]{"); CharSetSolver solver = new CharSetSolver(); Dictionary <char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver); BDD ignorecase = solver.False; foreach (var kv in ignoreCase) { var a = solver.MkCharSetFromRange(kv.Key, kv.Key); var b = kv.Value; ignorecase = ignorecase.Or(a.ShiftLeft(16).And(b)); } var ignorecaseArray = solver.Serialize(ignorecase); for (int i = 0; i < ignorecaseArray.Length; i++) { sw.WriteLine("0x{0:X16},", ignorecaseArray[i]); } sw.WriteLine("};"); //end of array }
public void TestDotGen() { CharSetSolver solver = new CharSetSolver(BitWidth.BV7); BDD cond = solver.MkCharSetFromRange('\0', '\x0F'); int cnt = (int)solver.ComputeDomainSize(cond); cond.ToDot(@"bdd2.dot"); }
public void TestDotGenTmp() { CharSetSolver solver = new CharSetSolver(BitWidth.BV7); BDD cond = solver.MkCharSetFromRange('0', '9'); int cnt = (int)solver.ComputeDomainSize(cond); cond.ToDot(@"C:\git\loris\msrpapers\CACM\figures\is_digit_bdd.dot"); }
public CSharpGenerator(Automaton <BDD> automaton, CharSetSolver solver, string classname, string namespacename, bool OptimzeForAsciiInput = true) { this.solver = solver; this.automaton = automaton; this.namespacename = namespacename; this.classname = classname; ASCII = solver.MkCharSetFromRange('\0', '\x7F'); helper_predicates = new HelperPredicates(solver, OptimzeForAsciiInput); }
public CSharpGenerator(Automaton<BDD> automaton, CharSetSolver solver, string classname, string namespacename, bool OptimzeForAsciiInput = true) { this.solver = solver; this.automaton = automaton; this.namespacename = namespacename; this.classname = classname; ASCII = solver.MkCharSetFromRange('\0', '\x7F'); helper_predicates = new HelperPredicates(solver, OptimzeForAsciiInput); }
public void TestRanges() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRange('A', 'Y'); Pair <uint, uint>[] ranges = solver.ToRanges(cond); Assert.AreEqual <int>(1, ranges.Length); Assert.AreEqual <uint>((uint)'A', ranges[0].First); Assert.AreEqual <uint>((uint)'Y', ranges[0].Second); }
public static Automaton <BDD> ReadFromString(CharSetSolver solver, string automaton) { var lines = automaton.Split(new char[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); int initialState = int.Parse(lines[0]); var moves = new Dictionary <Pair <int, int>, BDD>(); var allmoves = new List <Move <BDD> >(); int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s)); for (int i = 2; i < lines.Length; i++) { int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s)); var key = new Pair <int, int>(elems[0], elems[3]); if (elems[1] == -1) { allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3])); } else { var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]); if (moves.ContainsKey(key)) { moves[key] = solver.MkOr(moves[key], pred); } else { moves[key] = pred; } } } foreach (var kv in moves) { allmoves.Add(Move <BDD> .Create(kv.Key.First, kv.Key.Second, kv.Value)); } var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves); return(aut); }
public static Automaton <BDD> Read(CharSetSolver solver, string file) { var lines = System.IO.File.ReadAllLines(file); int initialState = int.Parse(lines[0]); var moves = new Dictionary <Tuple <int, int>, BDD>(); var allmoves = new List <Move <BDD> >(); int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s)); for (int i = 2; i < lines.Length; i++) { int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s)); var key = new Tuple <int, int>(elems[0], elems[3]); if (elems[1] == -1) { allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3])); } else { var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]); if (moves.ContainsKey(key)) { moves[key] = solver.MkOr(moves[key], pred); } else { moves[key] = pred; } } } foreach (var kv in moves) { allmoves.Add(Move <BDD> .Create(kv.Key.Item1, kv.Key.Item2, kv.Value)); } var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves); return(aut); }
public void TestRanges() { CharSetSolver solver = new CharSetSolver(BitWidth.BV16); BDD cond = solver.MkCharSetFromRange('A', 'Y'); Pair<uint, uint>[] ranges = solver.ToRanges(cond); Assert.AreEqual<int>(1, ranges.Length); Assert.AreEqual<uint>((uint)'A', ranges[0].First); Assert.AreEqual<uint>((uint)'Y', ranges[0].Second); }
public void ConvertUTF16BDDtoUTF8Test_Helper(string testClass) { var css = new CharSetSolver(); var bdd = css.MkCharSetFromRegexCharClass(testClass); var ascii = bdd & css.MkCharSetFromRange('\0', '\x7F'); var onebyte_encodings = bdd & ascii; var threebyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract3ByteUTF8Encodings(bdd); var twobyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract2ByteUTF8Encodings(bdd); HashSet <Sequence <byte> > utf8_encoding_actual = new HashSet <Sequence <byte> >(); foreach (var c in css.GenerateAllCharacters(onebyte_encodings)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)c)); } List <Move <BDD> > moves = new List <Move <BDD> >(); int q = 2; moves.Add(Move <BDD> .Create(0, 1, onebyte_encodings)); for (int i = 0; i < twobyte_encodings.Length; i += 1) { moves.Add(Move <BDD> .Create(0, q, twobyte_encodings[i].Item1)); moves.Add(Move <BDD> .Create(q, 1, twobyte_encodings[i].Item2)); q += 1; foreach (var first_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item1)) { foreach (var second_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item2)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte)); } } } foreach (var triple in threebyte_encodings) { foreach (var pair in triple.Item2) { moves.Add(Move <BDD> .Create(0, q, triple.Item1)); moves.Add(Move <BDD> .Create(q, q + 1, pair.Item1)); moves.Add(Move <BDD> .Create(q + 1, 1, pair.Item2)); q += 2; foreach (var first_byte in css.GenerateAllCharacters(triple.Item1)) { foreach (var second_byte in css.GenerateAllCharacters(pair.Item1)) { foreach (var third_byte in css.GenerateAllCharacters(pair.Item2)) { utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte, (byte)third_byte)); } } } } } HashSet <Sequence <byte> > utf8_encoding_expected = new HashSet <Sequence <byte> >(); for (int i = 0; i <= 0xFFFF; i++) { char c = (char)i; if (!char.IsSurrogate(c)) { if (Regex.IsMatch(c.ToString(), "^" + testClass + "$")) { var bytes = new Sequence <byte>(System.Text.UnicodeEncoding.UTF8.GetBytes(new char[] { c })); utf8_encoding_expected.Add(bytes); } } } //Automaton<BDD> aut = Automaton<BDD>.Create(css, 0, new int[] { 1 }, moves).Determinize().Minimize(); //aut.ShowGraph(); bool encoding_ok = utf8_encoding_expected.IsSubsetOf(utf8_encoding_actual) && utf8_encoding_actual.IsSubsetOf(utf8_encoding_expected); Assert.IsTrue(encoding_ok, "incorrectly ecoded character class: " + testClass); }
private static void CreateUlongArray(StreamWriter sw) { sw.WriteLine("/// <summary>"); sw.WriteLine("/// Serialized BDD for mapping characters to their case-ignoring equivalence classes."); sw.WriteLine("/// </summary>"); sw.WriteLine("public static ulong[] ignorecase = new ulong[]{"); CharSetSolver solver = new CharSetSolver(); Dictionary<char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver); BDD ignorecase = solver.False; foreach (var kv in ignoreCase) { var a = solver.MkCharSetFromRange(kv.Key, kv.Key); var b = kv.Value; ignorecase = ignorecase.Or(a.ShiftLeft(16).And(b)); } var ignorecaseArray = solver.Serialize(ignorecase); for (int i = 0; i < ignorecaseArray.Length; i++) sw.WriteLine("0x{0:X16},", ignorecaseArray[i]); sw.WriteLine("};"); //end of array }
private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver) { var ignoreCase = new Dictionary <char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c); char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c); if (c != cU || c != cL || cU != cL) { //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. //These characters are: //c=\xB5,cU=\u039C //c=\u0131,cU=I //c=\u017F,cU=S //c=\u0345,cU=\u0399 //c=\u03C2,cU=\u03A3 //c=\u03D0,cU=\u0392 //c=\u03D1,cU=\u0398 //c=\u03D5,cU=\u03A6 //c=\u03D6,cU=\u03A0 //c=\u03F0,cU=\u039A //c=\u03F1,cU=\u03A1 //c=\u03F5,cU=\u0395 //c=\u1E9B,cU=\u1E60 //c=\u1FBE,cU=\u0399 if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) { equiv = equiv.Or(ignoreCase[c]); } if (ignoreCase.ContainsKey(cU)) { equiv = equiv.Or(ignoreCase[cU]); } if (ignoreCase.ContainsKey(cL)) { equiv = equiv.Or(ignoreCase[cL]); } equiv = equiv.Or(solver.MkCharSetFromRange(c, c)).Or(solver.MkCharSetFromRange(cU, cU)).Or(solver.MkCharSetFromRange(cL, cL)); foreach (char d in solver.GenerateAllCharacters(equiv)) { ignoreCase[d] = equiv; } } //else //{ // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU); // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU)); //} } } return(ignoreCase); }