Exemple #1
0
        private static void CreateStringArray(StreamWriter sw)
        {
            sw.WriteLine("/// <summary>");
            sw.WriteLine("/// Each string correponds to an equivalence class of characters when case is ignored.");
            sw.WriteLine("/// </summary>");
            sw.WriteLine("public static string[] ignorecase = new string[]{");
            CharSetSolver solver = new CharSetSolver();

            Dictionary <char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver);

            HashSet <BDD> done = new HashSet <BDD>();

            foreach (var kv in ignoreCase)
            {
                if (done.Add(kv.Value))
                {
                    var         ranges = solver.ToRanges(kv.Value);
                    List <char> s      = new List <char>();
                    for (int i = 0; i < ranges.Length; i++)
                    {
                        var l = (int)ranges[i].Item1;
                        var h = (int)ranges[i].Item2;
                        for (int j = l; j <= h; j++)
                        {
                            s.Add((char)j);
                        }
                    }
                    var str = StringUtility.Escape(new String(s.ToArray()));
                    sw.WriteLine(@"{0},", str);
                }
            }
            sw.WriteLine("};"); //end of array
        }
Exemple #2
0
        private static string ToCharacterClassInterval(uint m, uint n)
        {
            if (m == 0 && n == 0xFFFF)
            {
                return(".");
            }

            if (m == n)
            {
                return(StringUtility.Escape((char)m));
            }

            string res = StringUtility.Escape((char)m);

            if (n > m + 1)
            {
                res += "-";
            }
            res += StringUtility.Escape((char)n);
            return(res);
        }
Exemple #3
0
        internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver)
        {
            if (categorizer.CategoryCondition(8) == label)
            {
                return(@"\d");
            }
            if (solver.MkNot(categorizer.CategoryCondition(8)) == label)
            {
                return(@"\D");
            }
            if (categorizer.WordLetterCondition == label)
            {
                return(@"\w");
            }
            if (solver.MkNot(categorizer.WordLetterCondition) == label)
            {
                return(@"\W");
            }
            if (categorizer.WhiteSpaceCondition == label)
            {
                return(@"\s");
            }
            if (solver.MkNot(categorizer.WhiteSpaceCondition) == label)
            {
                return(@"\S");
            }
            for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++)
            {
                if (categorizer.CategoryCondition(i) == label)
                {
                    return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}");
                }
            }

            var ranges = solver.ToRanges(label);

            if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2)
            {
                return(StringUtility.Escape((char)ranges[0].Item1));
            }

            var res = new StringBuilder("[");

            for (int i = 0; i < ranges.Length; i++)
            {
                var range = ranges[i];
                if (range.Item1 == range.Item2)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                }
                else if (range.Item1 == range.Item2 - 1)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
                else
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append("-");
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
            }
            res.Append("]");
            return(res.ToString());
        }
Exemple #4
0
        private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver)
        {
            var ignoreCase = new Dictionary <char, BDD>();

            for (uint i = 0; i <= 0xFFFF; i++)
            {
                char c  = (char)i;
                char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
                char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
                if (c != cU || c != cL || cU != cL)
                {
                    //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
                    //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
                    //These characters are:
                    //c=\xB5,cU=\u039C
                    //c=\u0131,cU=I
                    //c=\u017F,cU=S
                    //c=\u0345,cU=\u0399
                    //c=\u03C2,cU=\u03A3
                    //c=\u03D0,cU=\u0392
                    //c=\u03D1,cU=\u0398
                    //c=\u03D5,cU=\u03A6
                    //c=\u03D6,cU=\u03A0
                    //c=\u03F0,cU=\u039A
                    //c=\u03F1,cU=\u03A1
                    //c=\u03F5,cU=\u0395
                    //c=\u1E9B,cU=\u1E60
                    //c=\u1FBE,cU=\u0399
                    if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
                    {
                        BDD equiv = solver.False;

                        if (ignoreCase.ContainsKey(c))
                        {
                            equiv = equiv | ignoreCase[c];
                        }
                        if (ignoreCase.ContainsKey(cU))
                        {
                            equiv = equiv | ignoreCase[cU];
                        }
                        if (ignoreCase.ContainsKey(cL))
                        {
                            equiv = equiv | ignoreCase[cL];
                        }

                        equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL);

                        foreach (char d in solver.GenerateAllCharacters(equiv))
                        {
                            ignoreCase[d] = equiv;
                        }
                    }
                    //else
                    //{
                    //    outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
                    //    Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
                    //}
                }
            }
            return(ignoreCase);
        }