Exemplo n.º 1
0
        /// <summary>
        /// Crteate a Boolean decision tree.
        /// References to solver and domain are not saved in the resulting decision tree.
        /// </summary>
        /// <param name="solver">character alberbra</param>
        /// <param name="domain">elements that map to true</param>
        /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param>
        /// <returns></returns>
        internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF)
        {
            BDD domain_compl = solver.MkNot(domain);
            var partition    = new BDD[] { domain_compl, domain };

            if (precomputeLimit == 0)
            {
                return(new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF)));
            }

            bool[]           precomp = Precompute(solver, domain, precomputeLimit);
            DecisionTree.BST bst     = null;
            if (precomputeLimit < ushort.MaxValue)
            {
                bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
            }

            return(new BooleanDecisionTree(precomp, bst));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Assumes that set is a union of some minterms (or empty).
        /// If null then null is returned.
        /// </summary>
        public BV ConvertFromCharSet(BDD set)
        {
            if (set == null)
            {
                return(null);
            }
            var alg = set.algebra;
            BV  res = this.zero;

            for (int i = 0; i < partition.Length; i++)
            {
                BDD bdd_i = partition[i].AsBDD(alg);
                var conj  = alg.MkAnd(bdd_i, set);
                if (alg.IsSatisfiable(conj))
                {
                    res = res | atoms[i];
                }
            }
            return(res);
        }
Exemplo n.º 3
0
        private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit)
        {
            bool[]           precomp = new bool[precomputeLimit + 1];
            Func <int, bool> F       = i =>
            {
                var bdd = solver.MkCharConstraint((char)i);
                if (solver.IsSatisfiable(solver.MkAnd(bdd, domain)))
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            };

            for (int c = 0; c <= precomputeLimit; c++)
            {
                precomp[c] = F(c);
            }
            return(precomp);
        }
Exemplo n.º 4
0
        internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver)
        {
            if (categorizer.CategoryCondition(8) == label)
            {
                return(@"\d");
            }
            if (solver.MkNot(categorizer.CategoryCondition(8)) == label)
            {
                return(@"\D");
            }
            if (categorizer.WordLetterCondition == label)
            {
                return(@"\w");
            }
            if (solver.MkNot(categorizer.WordLetterCondition) == label)
            {
                return(@"\W");
            }
            if (categorizer.WhiteSpaceCondition == label)
            {
                return(@"\s");
            }
            if (solver.MkNot(categorizer.WhiteSpaceCondition) == label)
            {
                return(@"\S");
            }
            for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++)
            {
                if (categorizer.CategoryCondition(i) == label)
                {
                    return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}");
                }
            }

            var ranges = solver.ToRanges(label);

            if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2)
            {
                return(StringUtility.Escape((char)ranges[0].Item1));
            }

            var res = new StringBuilder("[");

            for (int i = 0; i < ranges.Length; i++)
            {
                var range = ranges[i];
                if (range.Item1 == range.Item2)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                }
                else if (range.Item1 == range.Item2 - 1)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
                else
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append("-");
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
            }
            res.Append("]");
            return(res.ToString());
        }
Exemplo n.º 5
0
 public IgnoreCaseTransformer(CharSetSolver charSetSolver)
 {
     this.solver   = charSetSolver;
     IgnoreCaseRel = charSetSolver.Deserialize(Microsoft.SRM.Generated.IgnoreCaseRelation.ignorecase);
     domain        = IgnoreCaseRel >> 16;
 }
Exemplo n.º 6
0
        private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field)
        {
            int bits    = (int)encoding;
            int maxChar = (1 << bits) - 1;
            var catMap  = new Dictionary <UnicodeCategory, Ranges>();

            for (int c = 0; c < 30; c++)
            {
                catMap[(UnicodeCategory)c] = new Ranges();
            }
            Ranges whitespace    = new Ranges();
            Ranges wordcharacter = new Ranges();

            for (int i = 0; i <= maxChar; i++)
            {
                char ch = (char)i;
                if (char.IsWhiteSpace(ch))
                {
                    whitespace.Add(i);
                }
                UnicodeCategory cat = char.GetUnicodeCategory(ch);
                catMap[cat].Add(i);
                int catCode = (int)cat;
                //in .NET 3.5
                if (bits == 7)
                {
                    if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18)
                    {
                        wordcharacter.Add(i);
                    }
                }
            }
            //generate bdd reprs for each of the category ranges
            BDD[]         catBDDs = new BDD[30];
            CharSetSolver bddb    = new CharSetSolver(encoding);

            for (int c = 0; c < 30; c++)
            {
                catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges);
            }

            BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges);

            //in .NET 3.5 category 5 was NOT a word character
            //union of categories 0,1,2,3,4,8,18
            BDD wordCharBdd = bddb.MkOr(catBDDs[0],
                                        bddb.MkOr(catBDDs[1],
                                                  bddb.MkOr(catBDDs[2],
                                                            bddb.MkOr(catBDDs[3],
                                                                      bddb.MkOr(catBDDs[4],
                                                                                bddb.MkOr(catBDDs[5],
                                                                                          bddb.MkOr(catBDDs[8], catBDDs[18])))))));

            if (bits == 7)
            {
                sw.WriteLine(@"/// <summary>
/// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers. 
/// corresponding to the lower and upper bounds of the unicodes of the characters
/// that have the given UnicodeCategory code (between 0 and 29).
/// </summary>");
                sw.WriteLine("public static int[][][] " + field + " = new int[][][]{");
                foreach (UnicodeCategory c in catMap.Keys)
                {
                    sw.WriteLine("//{0}({1}):", c, (int)c);
                    if (catMap[c].Count == 0)
                    {
                        sw.WriteLine("null,");
                    }
                    else
                    {
                        sw.WriteLine("new int[][]{");
                        foreach (int[] range in catMap[c].ranges)
                        {
                            sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                        }
                        sw.WriteLine("},");
                    }
                }
                sw.WriteLine("};");
            }

            sw.WriteLine(@"/// <summary>
/// Compact BDD encodings of the categories.
/// </summary>");
            sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{");
            foreach (UnicodeCategory c in catMap.Keys)
            {
                sw.WriteLine("//{0}({1}):", c, (int)c);
                BDD catBdd = catBDDs[(int)c];
                if (catBdd == null || catBdd.IsEmpty)
                {
                    sw.WriteLine("null, //false");
                }
                else if (catBdd.IsFull)
                {
                    sw.WriteLine("new int[]{0,0}, //true");
                }
                else
                {
                    sw.WriteLine("new int[]{");
                    foreach (var arc in bddb.SerializeCompact(catBdd))
                    {
                        sw.WriteLine("{0},", arc);
                    }
                    sw.WriteLine("},");
                }
            }
            sw.WriteLine("};");

            if (bits == 7)
            {
                sw.WriteLine(@"/// <summary>
/// Whitespace character ranges.
/// </summary>");
                sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{");
                foreach (int[] range in whitespace.ranges)
                {
                    sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                }
                sw.WriteLine("};");

                sw.WriteLine(@"/// <summary>
/// Word character ranges.
/// </summary>");
                sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{");
                foreach (int[] range in wordcharacter.ranges)
                {
                    sw.WriteLine("    new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
                }
                sw.WriteLine("};");
            }

            sw.WriteLine(@"/// <summary>
/// Compact BDD encoding of the whitespace characters.
/// </summary>");
            sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{");
            foreach (var arc in bddb.SerializeCompact(whitespaceBdd))
            {
                sw.WriteLine("{0},", arc);
            }
            sw.WriteLine("};");

            sw.WriteLine(@"/// <summary>
/// Compact BDD encoding of word characters
/// </summary>");
            sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{");
            foreach (var arc in bddb.SerializeCompact(wordCharBdd))
            {
                sw.WriteLine("{0},", arc);
            }
            sw.WriteLine("};");
        }
Exemplo n.º 7
0
        Tuple <uint, uint>[] ToRanges1(BDD set)
        {
            Tuple <uint, uint>[] ranges;
            if (!rangeCache.TryGetValue(set, out ranges))
            {
                int  b    = set.Ordinal;
                uint mask = (uint)1 << b;
                if (set.Zero.IsEmpty)
                {
                    #region 0-case is empty
                    if (set.One.IsFull)
                    {
                        var range = new Tuple <uint, uint>(mask, (mask << 1) - 1);
                        ranges = new Tuple <uint, uint>[] { range };
                    }
                    else //1-case is neither full nor empty
                    {
                        var ranges1 = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One));
                        ranges = new Tuple <uint, uint> [ranges1.Length];
                        for (int i = 0; i < ranges1.Length; i++)
                        {
                            ranges[i] = new Tuple <uint, uint>(ranges1[i].Item1 | mask, ranges1[i].Item2 | mask);
                        }
                    }
                    #endregion
                }
                else if (set.Zero.IsFull)
                {
                    #region 0-case is full
                    if (set.One.IsEmpty)
                    {
                        var range = new Tuple <uint, uint>(0, mask - 1);
                        ranges = new Tuple <uint, uint>[] { range };
                    }
                    else
                    {
                        var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One));
                        var range   = rangesR[0];
                        if (range.Item1 == 0)
                        {
                            ranges    = new Tuple <uint, uint> [rangesR.Length];
                            ranges[0] = new Tuple <uint, uint>(0, range.Item2 | mask);
                            for (int i = 1; i < rangesR.Length; i++)
                            {
                                ranges[i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
                            }
                        }
                        else
                        {
                            ranges    = new Tuple <uint, uint> [rangesR.Length + 1];
                            ranges[0] = new Tuple <uint, uint>(0, mask - 1);
                            for (int i = 0; i < rangesR.Length; i++)
                            {
                                ranges[i + 1] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
                            }
                        }
                    }
                    #endregion
                }
                else
                {
                    #region 0-case is neither full nor empty
                    var rangesL = LiftRanges(b, (b - set.Zero.Ordinal) - 1, ToRanges1(set.Zero));
                    var last    = rangesL[rangesL.Length - 1];

                    if (set.One.IsEmpty)
                    {
                        ranges = rangesL;
                    }

                    else if (set.One.IsFull)
                    {
                        var ranges1 = new List <Tuple <uint, uint> >();
                        for (int i = 0; i < rangesL.Length - 1; i++)
                        {
                            ranges1.Add(rangesL[i]);
                        }
                        if (last.Item2 == (mask - 1))
                        {
                            ranges1.Add(new Tuple <uint, uint>(last.Item1, (mask << 1) - 1));
                        }
                        else
                        {
                            ranges1.Add(last);
                            ranges1.Add(new Tuple <uint, uint>(mask, (mask << 1) - 1));
                        }
                        ranges = ranges1.ToArray();
                    }
                    else //general case: neither 0-case, not 1-case is full or empty
                    {
                        var rangesR0 = ToRanges1(set.One);

                        var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, rangesR0);

                        var first = rangesR[0];

                        if (last.Item2 == (mask - 1) && first.Item1 == 0) //merge together the last and first ranges
                        {
                            ranges = new Tuple <uint, uint> [rangesL.Length + rangesR.Length - 1];
                            for (int i = 0; i < rangesL.Length - 1; i++)
                            {
                                ranges[i] = rangesL[i];
                            }
                            ranges[rangesL.Length - 1] = new Tuple <uint, uint>(last.Item1, first.Item2 | mask);
                            for (int i = 1; i < rangesR.Length; i++)
                            {
                                ranges[rangesL.Length - 1 + i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
                            }
                        }
                        else
                        {
                            ranges = new Tuple <uint, uint> [rangesL.Length + rangesR.Length];
                            for (int i = 0; i < rangesL.Length; i++)
                            {
                                ranges[i] = rangesL[i];
                            }
                            for (int i = 0; i < rangesR.Length; i++)
                            {
                                ranges[rangesL.Length + i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
                            }
                        }
                    }
                    #endregion
                }
                rangeCache[set] = ranges;
            }
            return(ranges);
        }
Exemplo n.º 8
0
        private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver)
        {
            var ignoreCase = new Dictionary <char, BDD>();

            for (uint i = 0; i <= 0xFFFF; i++)
            {
                char c  = (char)i;
                char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
                char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
                if (c != cU || c != cL || cU != cL)
                {
                    //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
                    //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
                    //These characters are:
                    //c=\xB5,cU=\u039C
                    //c=\u0131,cU=I
                    //c=\u017F,cU=S
                    //c=\u0345,cU=\u0399
                    //c=\u03C2,cU=\u03A3
                    //c=\u03D0,cU=\u0392
                    //c=\u03D1,cU=\u0398
                    //c=\u03D5,cU=\u03A6
                    //c=\u03D6,cU=\u03A0
                    //c=\u03F0,cU=\u039A
                    //c=\u03F1,cU=\u03A1
                    //c=\u03F5,cU=\u0395
                    //c=\u1E9B,cU=\u1E60
                    //c=\u1FBE,cU=\u0399
                    if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
                    {
                        BDD equiv = solver.False;

                        if (ignoreCase.ContainsKey(c))
                        {
                            equiv = equiv | ignoreCase[c];
                        }
                        if (ignoreCase.ContainsKey(cU))
                        {
                            equiv = equiv | ignoreCase[cU];
                        }
                        if (ignoreCase.ContainsKey(cL))
                        {
                            equiv = equiv | ignoreCase[cL];
                        }

                        equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL);

                        foreach (char d in solver.GenerateAllCharacters(equiv))
                        {
                            ignoreCase[d] = equiv;
                        }
                    }
                    //else
                    //{
                    //    outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
                    //    Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
                    //}
                }
            }
            return(ignoreCase);
        }