예제 #1
0
        public static Tuple <BDD, Tuple <BDD, BDD>[]>[] Extract3ByteUTF8Encodings(BDD set)
        {
            var           alg = set.algebra;
            CharSetSolver css = alg as CharSetSolver;

            if (css == null)
            {
                throw new AutomataException(AutomataExceptionKind.NotSupported);
            }

            var surrogates     = css.MkCharSetFromRange('\uD800', '\uDFFF');
            var threebyterange = css.MkCharSetFromRange('\u0800', '\uFFFF').Diff(surrogates);
            var uptoFF         = css.MkCharSetFromRange('\0', '\xFF');

            var set3 = set & threebyterange;

            var lowerpartition = set3.Partition(11);

            var b5       = alg.MkBitTrue(5);
            var b6       = alg.MkBitTrue(6);
            var b7       = alg.MkBitTrue(7);
            var b4_false = alg.MkBitFalse(4);
            var b6_false = alg.MkBitFalse(6);

            var start_mask = b7 & b6 & b5 & b4_false & uptoFF;
            var val_mask   = b7 & b6_false & uptoFF;

            var partition = Array.ConvertAll(lowerpartition, x => new Tuple <BDD, Tuple <BDD, BDD>[]>(
                                                 css.OmitBitsAbove(x.Item2 >> 12, 4) & start_mask,
                                                 Array.ConvertAll <Tuple <BDD, BDD>, Tuple <BDD, BDD> >(x.Item1.Partition(5),
                                                                                                        y => new Tuple <BDD, BDD>(css.OmitBitsAbove(y.Item2 >> 6, 6) & val_mask, y.Item1 & val_mask))
                                                 ));

            return(partition);
        }
예제 #2
0
        public static Tuple <BDD, BDD>[] Extract2ByteUTF8Encodings(BDD set)
        {
            CharSetSolver css = set.algebra as CharSetSolver;

            if (css == null)
            {
                throw new AutomataException(AutomataExceptionKind.NotSupported);
            }

            var twobyterange = css.MkCharSetFromRange('\x80', '\u07FF');
            var uptoFF       = css.MkCharSetFromRange('\0', '\xFF');

            var b6       = set.algebra.MkBitTrue(6);
            var b7       = set.algebra.MkBitTrue(7);
            var b5_false = set.algebra.MkBitFalse(5);
            var b6_false = set.algebra.MkBitFalse(6);

            var byte1_mask = b7 & b6 & b5_false & uptoFF;
            var byte2_mask = b7 & b6_false & uptoFF;

            var d2        = set & twobyterange;
            var partition = d2.Partition(5);
            var res       = Array.ConvertAll(partition, x => new Tuple <BDD, BDD>(css.OmitBitsAbove(x.Item2 >> 6, 5) & byte1_mask, x.Item1 & byte2_mask));

            return(res);
        }
        private static Dictionary<char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver)
        {
            var ignoreCase = new Dictionary<char, BDD>();
            for (uint i = 0; i <= 0xFFFF; i++)
            {
                char c = (char)i;
                char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
                char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
                if (c != cU || c != cL || cU != cL)
                {
                    //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
                    //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
                    //These characters are:
                    //c=\xB5,cU=\u039C
                    //c=\u0131,cU=I
                    //c=\u017F,cU=S
                    //c=\u0345,cU=\u0399
                    //c=\u03C2,cU=\u03A3
                    //c=\u03D0,cU=\u0392
                    //c=\u03D1,cU=\u0398
                    //c=\u03D5,cU=\u03A6
                    //c=\u03D6,cU=\u03A0
                    //c=\u03F0,cU=\u039A
                    //c=\u03F1,cU=\u03A1
                    //c=\u03F5,cU=\u0395
                    //c=\u1E9B,cU=\u1E60
                    //c=\u1FBE,cU=\u0399
                    if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
                    {
                        BDD equiv = solver.False;

                        if (ignoreCase.ContainsKey(c))
                            equiv = equiv.Or(ignoreCase[c]);
                        if (ignoreCase.ContainsKey(cU))
                            equiv = equiv.Or(ignoreCase[cU]);
                        if (ignoreCase.ContainsKey(cL))
                            equiv = equiv.Or(ignoreCase[cL]);

                        equiv = equiv.Or(solver.MkCharSetFromRange(c, c)).Or(solver.MkCharSetFromRange(cU, cU)).Or(solver.MkCharSetFromRange(cL, cL));

                        foreach (char d in solver.GenerateAllCharacters(equiv))
                            ignoreCase[d] = equiv;
                    }
                    //else
                    //{
                    //    outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
                    //    Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
                    //}
                }
            }
            return ignoreCase;
        }
        /// <summary>
        /// Each transition has the form int[]{fromState, intervalStart, intervalEnd, toState}.
        /// If intervalStart = intervalEnd = -1 then this is an epsilon move.
        /// </summary>
        public static Automaton<BDD> ReadFromRanges(CharSetSolver solver, int initialState, int[] finalStates, IEnumerable<int[]> transitions)
        {
            var moves = new Dictionary<Pair<int, int>, BDD>();
            var allmoves = new List<Move<BDD>>();
            int[] finals = finalStates;
            foreach (var elems in transitions)
            {
                var key = new Pair<int, int>(elems[0], elems[3]);
                if (elems[1] == -1)
                    allmoves.Add(Move<BDD>.Epsilon(elems[0], elems[3]));
                else
                {
                    var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]);
                    if (moves.ContainsKey(key))
                        moves[key] = solver.MkOr(moves[key], pred);
                    else
                        moves[key] = pred;
                }
            }
            foreach (var kv in moves)
                allmoves.Add(Move<BDD>.Create(kv.Key.First, kv.Key.Second, kv.Value));

            var aut = Automaton<BDD>.Create(solver, initialState, finals, allmoves);
            return aut;
        }
        /// <summary>
        /// Each transition has the form int[]{fromState, intervalStart, intervalEnd, toState}.
        /// If intervalStart = intervalEnd = -1 then this is an epsilon move.
        /// </summary>
        public static Automaton <BDD> ReadFromRanges(CharSetSolver solver, int initialState, int[] finalStates, IEnumerable <int[]> transitions)
        {
            var moves    = new Dictionary <Pair <int, int>, BDD>();
            var allmoves = new List <Move <BDD> >();

            int[] finals = finalStates;
            foreach (var elems in transitions)
            {
                var key = new Pair <int, int>(elems[0], elems[3]);
                if (elems[1] == -1)
                {
                    allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3]));
                }
                else
                {
                    var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]);
                    if (moves.ContainsKey(key))
                    {
                        moves[key] = solver.MkOr(moves[key], pred);
                    }
                    else
                    {
                        moves[key] = pred;
                    }
                }
            }
            foreach (var kv in moves)
            {
                allmoves.Add(Move <BDD> .Create(kv.Key.First, kv.Key.Second, kv.Value));
            }

            var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves);

            return(aut);
        }
        public static Automaton<BDD> ReadFromString(CharSetSolver solver, string automaton)
        {
            var lines = automaton.Split(new char[] { '\n','\r' }, StringSplitOptions.RemoveEmptyEntries);
            int initialState = int.Parse(lines[0]);
            var moves = new Dictionary<Pair<int, int>, BDD>();
            var allmoves = new List<Move<BDD>>();
            int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s));
            for (int i = 2; i < lines.Length; i++)
            {
                int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s));
                var key = new Pair<int, int>(elems[0], elems[3]);
                if (elems[1] == -1)
                    allmoves.Add(Move<BDD>.Epsilon(elems[0], elems[3]));
                else
                {
                    var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]);
                    if (moves.ContainsKey(key))
                        moves[key] = solver.MkOr(moves[key], pred);
                    else
                        moves[key] = pred;
                }
            }
            foreach (var kv in moves)
                allmoves.Add(Move<BDD>.Create(kv.Key.First, kv.Key.Second, kv.Value));

            var aut = Automaton<BDD>.Create(solver, initialState, finals, allmoves);
            return aut;
        }
예제 #7
0
        private static void CreateUlongArray(StreamWriter sw)
        {
            sw.WriteLine("/// <summary>");
            sw.WriteLine("/// Serialized BDD for mapping characters to their case-ignoring equivalence classes.");
            sw.WriteLine("/// </summary>");
            sw.WriteLine("public static ulong[] ignorecase = new ulong[]{");
            CharSetSolver solver = new CharSetSolver();

            Dictionary <char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver);

            BDD ignorecase = solver.False;

            foreach (var kv in ignoreCase)
            {
                var a = solver.MkCharSetFromRange(kv.Key, kv.Key);
                var b = kv.Value;
                ignorecase = ignorecase.Or(a.ShiftLeft(16).And(b));
            }
            var ignorecaseArray = solver.Serialize(ignorecase);

            for (int i = 0; i < ignorecaseArray.Length; i++)
            {
                sw.WriteLine("0x{0:X16},", ignorecaseArray[i]);
            }

            sw.WriteLine("};"); //end of array
        }
예제 #8
0
        public void TestDotGen()
        {
            CharSetSolver solver = new CharSetSolver(BitWidth.BV7);
            BDD           cond   = solver.MkCharSetFromRange('\0', '\x0F');
            int           cnt    = (int)solver.ComputeDomainSize(cond);

            cond.ToDot(@"bdd2.dot");
        }
예제 #9
0
        public void TestDotGenTmp()
        {
            CharSetSolver solver = new CharSetSolver(BitWidth.BV7);
            BDD           cond   = solver.MkCharSetFromRange('0', '9');
            int           cnt    = (int)solver.ComputeDomainSize(cond);

            cond.ToDot(@"C:\git\loris\msrpapers\CACM\figures\is_digit_bdd.dot");
        }
예제 #10
0
 public CSharpGenerator(Automaton <BDD> automaton, CharSetSolver solver, string classname, string namespacename, bool OptimzeForAsciiInput = true)
 {
     this.solver        = solver;
     this.automaton     = automaton;
     this.namespacename = namespacename;
     this.classname     = classname;
     ASCII             = solver.MkCharSetFromRange('\0', '\x7F');
     helper_predicates = new HelperPredicates(solver, OptimzeForAsciiInput);
 }
예제 #11
0
 public CSharpGenerator(Automaton<BDD> automaton, CharSetSolver solver, string classname, string namespacename, bool OptimzeForAsciiInput = true)
 {
     this.solver = solver;
     this.automaton = automaton;
     this.namespacename = namespacename;
     this.classname = classname;
     ASCII = solver.MkCharSetFromRange('\0', '\x7F');
     helper_predicates = new HelperPredicates(solver, OptimzeForAsciiInput);
 }
예제 #12
0
        public void TestRanges()
        {
            CharSetSolver solver = new CharSetSolver(BitWidth.BV16);
            BDD           cond   = solver.MkCharSetFromRange('A', 'Y');

            Pair <uint, uint>[] ranges = solver.ToRanges(cond);
            Assert.AreEqual <int>(1, ranges.Length);
            Assert.AreEqual <uint>((uint)'A', ranges[0].First);
            Assert.AreEqual <uint>((uint)'Y', ranges[0].Second);
        }
        public static Automaton <BDD> ReadFromString(CharSetSolver solver, string automaton)
        {
            var lines        = automaton.Split(new char[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
            int initialState = int.Parse(lines[0]);
            var moves        = new Dictionary <Pair <int, int>, BDD>();
            var allmoves     = new List <Move <BDD> >();

            int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s));
            for (int i = 2; i < lines.Length; i++)
            {
                int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s));
                var   key   = new Pair <int, int>(elems[0], elems[3]);
                if (elems[1] == -1)
                {
                    allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3]));
                }
                else
                {
                    var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]);
                    if (moves.ContainsKey(key))
                    {
                        moves[key] = solver.MkOr(moves[key], pred);
                    }
                    else
                    {
                        moves[key] = pred;
                    }
                }
            }
            foreach (var kv in moves)
            {
                allmoves.Add(Move <BDD> .Create(kv.Key.First, kv.Key.Second, kv.Value));
            }

            var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves);

            return(aut);
        }
예제 #14
0
        public static Automaton <BDD> Read(CharSetSolver solver, string file)
        {
            var lines        = System.IO.File.ReadAllLines(file);
            int initialState = int.Parse(lines[0]);
            var moves        = new Dictionary <Tuple <int, int>, BDD>();
            var allmoves     = new List <Move <BDD> >();

            int[] finals = Array.ConvertAll(lines[1].TrimEnd(' ').Split(' '), s => int.Parse(s));
            for (int i = 2; i < lines.Length; i++)
            {
                int[] elems = Array.ConvertAll(lines[i].TrimEnd(' ').Split(' '), s => int.Parse(s));
                var   key   = new Tuple <int, int>(elems[0], elems[3]);
                if (elems[1] == -1)
                {
                    allmoves.Add(Move <BDD> .Epsilon(elems[0], elems[3]));
                }
                else
                {
                    var pred = solver.MkCharSetFromRange((char)elems[1], (char)elems[2]);
                    if (moves.ContainsKey(key))
                    {
                        moves[key] = solver.MkOr(moves[key], pred);
                    }
                    else
                    {
                        moves[key] = pred;
                    }
                }
            }
            foreach (var kv in moves)
            {
                allmoves.Add(Move <BDD> .Create(kv.Key.Item1, kv.Key.Item2, kv.Value));
            }

            var aut = Automaton <BDD> .Create(solver, initialState, finals, allmoves);

            return(aut);
        }
예제 #15
0
 public void TestRanges()
 {
     CharSetSolver solver = new CharSetSolver(BitWidth.BV16);
     BDD cond = solver.MkCharSetFromRange('A', 'Y');
     Pair<uint, uint>[] ranges = solver.ToRanges(cond);
     Assert.AreEqual<int>(1, ranges.Length);
     Assert.AreEqual<uint>((uint)'A', ranges[0].First);
     Assert.AreEqual<uint>((uint)'Y', ranges[0].Second);
 }
예제 #16
0
 public void TestDotGen()
 {
     CharSetSolver solver = new CharSetSolver(BitWidth.BV7);
     BDD cond = solver.MkCharSetFromRange('\0', '\x0F');
     int cnt = (int)solver.ComputeDomainSize(cond);
     cond.ToDot(@"bdd2.dot");
 }
예제 #17
0
        public void ConvertUTF16BDDtoUTF8Test_Helper(string testClass)
        {
            var css = new CharSetSolver();

            var bdd = css.MkCharSetFromRegexCharClass(testClass);

            var ascii = bdd & css.MkCharSetFromRange('\0', '\x7F');

            var onebyte_encodings = bdd & ascii;

            var threebyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract3ByteUTF8Encodings(bdd);

            var twobyte_encodings = Microsoft.Automata.Utilities.UTF8Encoding.Extract2ByteUTF8Encodings(bdd);

            HashSet <Sequence <byte> > utf8_encoding_actual = new HashSet <Sequence <byte> >();

            foreach (var c in css.GenerateAllCharacters(onebyte_encodings))
            {
                utf8_encoding_actual.Add(new Sequence <byte>((byte)c));
            }

            List <Move <BDD> > moves = new List <Move <BDD> >();
            int q = 2;

            moves.Add(Move <BDD> .Create(0, 1, onebyte_encodings));
            for (int i = 0; i < twobyte_encodings.Length; i += 1)
            {
                moves.Add(Move <BDD> .Create(0, q, twobyte_encodings[i].Item1));
                moves.Add(Move <BDD> .Create(q, 1, twobyte_encodings[i].Item2));
                q += 1;
                foreach (var first_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item1))
                {
                    foreach (var second_byte in css.GenerateAllCharacters(twobyte_encodings[i].Item2))
                    {
                        utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte));
                    }
                }
            }

            foreach (var triple in threebyte_encodings)
            {
                foreach (var pair in triple.Item2)
                {
                    moves.Add(Move <BDD> .Create(0, q, triple.Item1));
                    moves.Add(Move <BDD> .Create(q, q + 1, pair.Item1));
                    moves.Add(Move <BDD> .Create(q + 1, 1, pair.Item2));
                    q += 2;
                    foreach (var first_byte in css.GenerateAllCharacters(triple.Item1))
                    {
                        foreach (var second_byte in css.GenerateAllCharacters(pair.Item1))
                        {
                            foreach (var third_byte in css.GenerateAllCharacters(pair.Item2))
                            {
                                utf8_encoding_actual.Add(new Sequence <byte>((byte)first_byte, (byte)second_byte, (byte)third_byte));
                            }
                        }
                    }
                }
            }

            HashSet <Sequence <byte> > utf8_encoding_expected = new HashSet <Sequence <byte> >();

            for (int i = 0; i <= 0xFFFF; i++)
            {
                char c = (char)i;
                if (!char.IsSurrogate(c))
                {
                    if (Regex.IsMatch(c.ToString(), "^" + testClass + "$"))
                    {
                        var bytes = new Sequence <byte>(System.Text.UnicodeEncoding.UTF8.GetBytes(new char[] { c }));
                        utf8_encoding_expected.Add(bytes);
                    }
                }
            }

            //Automaton<BDD> aut = Automaton<BDD>.Create(css, 0, new int[] { 1 }, moves).Determinize().Minimize();
            //aut.ShowGraph();


            bool encoding_ok = utf8_encoding_expected.IsSubsetOf(utf8_encoding_actual) &&
                               utf8_encoding_actual.IsSubsetOf(utf8_encoding_expected);

            Assert.IsTrue(encoding_ok, "incorrectly ecoded character class: " + testClass);
        }
        private static void CreateUlongArray(StreamWriter sw)
        {
            sw.WriteLine("/// <summary>");
            sw.WriteLine("/// Serialized BDD for mapping characters to their case-ignoring equivalence classes.");
            sw.WriteLine("/// </summary>");
            sw.WriteLine("public static ulong[] ignorecase = new ulong[]{");
            CharSetSolver solver = new CharSetSolver();

            Dictionary<char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver);

            BDD ignorecase = solver.False;
            foreach (var kv in ignoreCase)
            {
                var a = solver.MkCharSetFromRange(kv.Key, kv.Key);
                var b = kv.Value;
                ignorecase = ignorecase.Or(a.ShiftLeft(16).And(b));
            }
            var ignorecaseArray = solver.Serialize(ignorecase);
            for (int i = 0; i < ignorecaseArray.Length; i++)
                sw.WriteLine("0x{0:X16},", ignorecaseArray[i]);

            sw.WriteLine("};"); //end of array
        }
예제 #19
0
        private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver)
        {
            var ignoreCase = new Dictionary <char, BDD>();

            for (uint i = 0; i <= 0xFFFF; i++)
            {
                char c  = (char)i;
                char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
                char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
                if (c != cU || c != cL || cU != cL)
                {
                    //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
                    //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
                    //These characters are:
                    //c=\xB5,cU=\u039C
                    //c=\u0131,cU=I
                    //c=\u017F,cU=S
                    //c=\u0345,cU=\u0399
                    //c=\u03C2,cU=\u03A3
                    //c=\u03D0,cU=\u0392
                    //c=\u03D1,cU=\u0398
                    //c=\u03D5,cU=\u03A6
                    //c=\u03D6,cU=\u03A0
                    //c=\u03F0,cU=\u039A
                    //c=\u03F1,cU=\u03A1
                    //c=\u03F5,cU=\u0395
                    //c=\u1E9B,cU=\u1E60
                    //c=\u1FBE,cU=\u0399
                    if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
                    {
                        BDD equiv = solver.False;

                        if (ignoreCase.ContainsKey(c))
                        {
                            equiv = equiv.Or(ignoreCase[c]);
                        }
                        if (ignoreCase.ContainsKey(cU))
                        {
                            equiv = equiv.Or(ignoreCase[cU]);
                        }
                        if (ignoreCase.ContainsKey(cL))
                        {
                            equiv = equiv.Or(ignoreCase[cL]);
                        }

                        equiv = equiv.Or(solver.MkCharSetFromRange(c, c)).Or(solver.MkCharSetFromRange(cU, cU)).Or(solver.MkCharSetFromRange(cL, cL));

                        foreach (char d in solver.GenerateAllCharacters(equiv))
                        {
                            ignoreCase[d] = equiv;
                        }
                    }
                    //else
                    //{
                    //    outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
                    //    Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
                    //}
                }
            }
            return(ignoreCase);
        }