Пример #1
0
        public static BV64Algebra Create(CharSetSolver solver, BDD[] minterms)
        {
            if (minterms.Length > 64)
            {
                throw new AutomataException(AutomataExceptionKind.NrOfMintermsCanBeAtMost64);
            }
            var dtree         = DecisionTree.Create(solver, minterms);
            var partitionBase = Array.ConvertAll(minterms, m => solver.ToRanges(m));
            var partition     = Array.ConvertAll(partitionBase, p => new IntervalSet(p));

            return(new BV64Algebra(dtree, partition));
        }
Пример #2
0
        /// <summary>
        /// Crteate a Boolean decision tree.
        /// References to solver and domain are not saved in the resulting decision tree.
        /// </summary>
        /// <param name="solver">character alberbra</param>
        /// <param name="domain">elements that map to true</param>
        /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param>
        /// <returns></returns>
        internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF)
        {
            BDD domain_compl = solver.MkNot(domain);
            var partition    = new BDD[] { domain_compl, domain };

            if (precomputeLimit == 0)
            {
                return(new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF)));
            }

            bool[]           precomp = Precompute(solver, domain, precomputeLimit);
            DecisionTree.BST bst     = null;
            if (precomputeLimit < ushort.MaxValue)
            {
                bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
            }

            return(new BooleanDecisionTree(precomp, bst));
        }
Пример #3
0
        private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit)
        {
            bool[]           precomp = new bool[precomputeLimit + 1];
            Func <int, bool> F       = i =>
            {
                var bdd = solver.MkCharConstraint((char)i);
                if (solver.IsSatisfiable(solver.MkAnd(bdd, domain)))
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            };

            for (int c = 0; c <= precomputeLimit; c++)
            {
                precomp[c] = F(c);
            }
            return(precomp);
        }
Пример #4
0
        /// <summary>
        /// Crteate a decision tree that maps a character into a partion block id
        /// </summary>
        /// <param name="solver">character alberbra</param>
        /// <param name="partition">partition of the whole set of all characters into pairwise disjoint nonempty sets</param>
        /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param>
        /// <returns></returns>
        internal static DecisionTree Create(CharSetSolver solver, BDD[] partition, ushort precomputeLimit = 0xFF)
        {
            if (partition.Length == 1)
            {
                //there is no actual partition, everything maps to one id 0, e.g. as in .*
                return(new DecisionTree(new int[(int)precomputeLimit], new BST(0, null, null)));
            }

            if (precomputeLimit == 0)
            {
                return(new DecisionTree(new int[] { }, MkBST(new PartitionCut(solver, partition), 0, 0xFFFF)));
            }

            int[] precomp = Precompute(solver, partition, precomputeLimit);
            BST   bst     = null;

            if (precomputeLimit < ushort.MaxValue)
            {
                bst = MkBST(new PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
            }

            return(new DecisionTree(precomp, bst));
        }
Пример #5
0
        internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver)
        {
            if (categorizer.CategoryCondition(8) == label)
            {
                return(@"\d");
            }
            if (solver.MkNot(categorizer.CategoryCondition(8)) == label)
            {
                return(@"\D");
            }
            if (categorizer.WordLetterCondition == label)
            {
                return(@"\w");
            }
            if (solver.MkNot(categorizer.WordLetterCondition) == label)
            {
                return(@"\W");
            }
            if (categorizer.WhiteSpaceCondition == label)
            {
                return(@"\s");
            }
            if (solver.MkNot(categorizer.WhiteSpaceCondition) == label)
            {
                return(@"\S");
            }
            for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++)
            {
                if (categorizer.CategoryCondition(i) == label)
                {
                    return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}");
                }
            }

            var ranges = solver.ToRanges(label);

            if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2)
            {
                return(StringUtility.Escape((char)ranges[0].Item1));
            }

            var res = new StringBuilder("[");

            for (int i = 0; i < ranges.Length; i++)
            {
                var range = ranges[i];
                if (range.Item1 == range.Item2)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                }
                else if (range.Item1 == range.Item2 - 1)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
                else
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append("-");
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
            }
            res.Append("]");
            return(res.ToString());
        }
Пример #6
0
 public IgnoreCaseTransformer(CharSetSolver charSetSolver)
 {
     this.solver   = charSetSolver;
     IgnoreCaseRel = charSetSolver.Deserialize(Microsoft.SRM.Generated.IgnoreCaseRelation.ignorecase);
     domain        = IgnoreCaseRel >> 16;
 }
Пример #7
0
        private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field)
        {
            int bits    = (int)encoding;
            int maxChar = (1 << bits) - 1;
            var catMap  = new Dictionary <UnicodeCategory, Ranges>();

            for (int c = 0; c < 30; c++)
            {
                catMap[(UnicodeCategory)c] = new Ranges();
            }
            Ranges whitespace    = new Ranges();
            Ranges wordcharacter = new Ranges();

            for (int i = 0; i <= maxChar; i++)
            {
                char ch = (char)i;
                if (char.IsWhiteSpace(ch))
                {
                    whitespace.Add(i);
                }
                UnicodeCategory cat = char.GetUnicodeCategory(ch);
                catMap[cat].Add(i);
                int catCode = (int)cat;
                //in .NET 3.5
                if (bits == 7)
                {
                    if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18)
                    {
                        wordcharacter.Add(i);
                    }
                }
            }
            //generate bdd reprs for each of the category ranges
            BDD[]         catBDDs = new BDD[30];
            CharSetSolver bddb    = new CharSetSolver(encoding);

            for (int c = 0; c < 30; c++)
            {
                catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges);
            }

            BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges);

            //in .NET 3.5 category 5 was NOT a word character
            //union of categories 0,1,2,3,4,8,18
            BDD wordCharBdd = bddb.MkOr(catBDDs[0],
                                        bddb.MkOr(catBDDs[1],
                                                  bddb.MkOr(catBDDs[2],
                                                            bddb.MkOr(catBDDs[3],
                                                                      bddb.MkOr(catBDDs[4],
                                                                                bddb.MkOr(catBDDs[5],
                                                                                          bddb.MkOr(catBDDs[8], catBDDs[18])))))));

            if (bits == 7)
            {
                sw.WriteLine(@"/// <summary>
/// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers. 
/// corresponding to the lower and upper bounds of the unicodes of the characters
/// that have the given UnicodeCategory code (between 0 and 29).
/// </summary>");
                sw.WriteLine("public static int[][][] " + field + " = new int[][][]{");
                foreach (UnicodeCategory c in catMap.Keys)
                {
                    sw.WriteLine("//{0}({1}):", c, (int)c);
                    if (catMap[c].Count == 0)
                    {
                        sw.WriteLine("null,");
                    }
                    else
                    {
                        sw.WriteLine("new int[][]{");
                        foreach (int[] range in catMap[c].ranges)
                        {
                            sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                        }
                        sw.WriteLine("},");
                    }
                }
                sw.WriteLine("};");
            }

            sw.WriteLine(@"/// <summary>
/// Compact BDD encodings of the categories.
/// </summary>");
            sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{");
            foreach (UnicodeCategory c in catMap.Keys)
            {
                sw.WriteLine("//{0}({1}):", c, (int)c);
                BDD catBdd = catBDDs[(int)c];
                if (catBdd == null || catBdd.IsEmpty)
                {
                    sw.WriteLine("null, //false");
                }
                else if (catBdd.IsFull)
                {
                    sw.WriteLine("new int[]{0,0}, //true");
                }
                else
                {
                    sw.WriteLine("new int[]{");
                    foreach (var arc in bddb.SerializeCompact(catBdd))
                    {
                        sw.WriteLine("{0},", arc);
                    }
                    sw.WriteLine("},");
                }
            }
            sw.WriteLine("};");

            if (bits == 7)
            {
                sw.WriteLine(@"/// <summary>
/// Whitespace character ranges.
/// </summary>");
                sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{");
                foreach (int[] range in whitespace.ranges)
                {
                    sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                }
                sw.WriteLine("};");

                sw.WriteLine(@"/// <summary>
/// Word character ranges.
/// </summary>");
                sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{");
                foreach (int[] range in wordcharacter.ranges)
                {
                    sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                }
                sw.WriteLine("};");
            }

            sw.WriteLine(@"/// <summary>
/// Compact BDD encoding of the whitespace characters.
/// </summary>");
            sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{");
            foreach (var arc in bddb.SerializeCompact(whitespaceBdd))
            {
                sw.WriteLine("{0},", arc);
            }
            sw.WriteLine("};");

            sw.WriteLine(@"/// <summary>
/// Compact BDD encoding of word characters
/// </summary>");
            sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{");
            foreach (var arc in bddb.SerializeCompact(wordCharBdd))
            {
                sw.WriteLine("{0},", arc);
            }
            sw.WriteLine("};");
        }
Пример #8
0
 static Regex()
 {
     solver    = new CharSetSolver();
     converter = new RegexToAutomatonConverter <BDD>(solver);
 }
Пример #9
0
 internal PartitionCut(CharSetSolver solver, BDD[] blocks)
 {
     this.blocks = blocks;
     this.solver = solver;
 }
Пример #10
0
        private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver)
        {
            var ignoreCase = new Dictionary <char, BDD>();

            for (uint i = 0; i <= 0xFFFF; i++)
            {
                char c  = (char)i;
                char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
                char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
                if (c != cU || c != cL || cU != cL)
                {
                    //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
                    //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
                    //These characters are:
                    //c=\xB5,cU=\u039C
                    //c=\u0131,cU=I
                    //c=\u017F,cU=S
                    //c=\u0345,cU=\u0399
                    //c=\u03C2,cU=\u03A3
                    //c=\u03D0,cU=\u0392
                    //c=\u03D1,cU=\u0398
                    //c=\u03D5,cU=\u03A6
                    //c=\u03D6,cU=\u03A0
                    //c=\u03F0,cU=\u039A
                    //c=\u03F1,cU=\u03A1
                    //c=\u03F5,cU=\u0395
                    //c=\u1E9B,cU=\u1E60
                    //c=\u1FBE,cU=\u0399
                    if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
                    {
                        BDD equiv = solver.False;

                        if (ignoreCase.ContainsKey(c))
                        {
                            equiv = equiv | ignoreCase[c];
                        }
                        if (ignoreCase.ContainsKey(cU))
                        {
                            equiv = equiv | ignoreCase[cU];
                        }
                        if (ignoreCase.ContainsKey(cL))
                        {
                            equiv = equiv | ignoreCase[cL];
                        }

                        equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL);

                        foreach (char d in solver.GenerateAllCharacters(equiv))
                        {
                            ignoreCase[d] = equiv;
                        }
                    }
                    //else
                    //{
                    //    outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
                    //    Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
                    //}
                }
            }
            return(ignoreCase);
        }