private static void CreateStringArray(StreamWriter sw) { sw.WriteLine("/// <summary>"); sw.WriteLine("/// Each string correponds to an equivalence class of characters when case is ignored."); sw.WriteLine("/// </summary>"); sw.WriteLine("public static string[] ignorecase = new string[]{"); CharSetSolver solver = new CharSetSolver(); Dictionary <char, BDD> ignoreCase = ComputeIgnoreCaseDistionary(solver); HashSet <BDD> done = new HashSet <BDD>(); foreach (var kv in ignoreCase) { if (done.Add(kv.Value)) { var ranges = solver.ToRanges(kv.Value); List <char> s = new List <char>(); for (int i = 0; i < ranges.Length; i++) { var l = (int)ranges[i].Item1; var h = (int)ranges[i].Item2; for (int j = l; j <= h; j++) { s.Add((char)j); } } var str = StringUtility.Escape(new String(s.ToArray())); sw.WriteLine(@"{0},", str); } } sw.WriteLine("};"); //end of array }
private static string ToCharacterClassInterval(uint m, uint n) { if (m == 0 && n == 0xFFFF) { return("."); } if (m == n) { return(StringUtility.Escape((char)m)); } string res = StringUtility.Escape((char)m); if (n > m + 1) { res += "-"; } res += StringUtility.Escape((char)n); return(res); }
internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver) { if (categorizer.CategoryCondition(8) == label) { return(@"\d"); } if (solver.MkNot(categorizer.CategoryCondition(8)) == label) { return(@"\D"); } if (categorizer.WordLetterCondition == label) { return(@"\w"); } if (solver.MkNot(categorizer.WordLetterCondition) == label) { return(@"\W"); } if (categorizer.WhiteSpaceCondition == label) { return(@"\s"); } if (solver.MkNot(categorizer.WhiteSpaceCondition) == label) { return(@"\S"); } for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++) { if (categorizer.CategoryCondition(i) == label) { return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}"); } } var ranges = solver.ToRanges(label); if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2) { return(StringUtility.Escape((char)ranges[0].Item1)); } var res = new StringBuilder("["); for (int i = 0; i < ranges.Length; i++) { var range = ranges[i]; if (range.Item1 == range.Item2) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); } else if (range.Item1 == range.Item2 - 1) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } else { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append("-"); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } } res.Append("]"); return(res.ToString()); }
private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver) { var ignoreCase = new Dictionary <char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c); char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c); if (c != cU || c != cL || cU != cL) { //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. //These characters are: //c=\xB5,cU=\u039C //c=\u0131,cU=I //c=\u017F,cU=S //c=\u0345,cU=\u0399 //c=\u03C2,cU=\u03A3 //c=\u03D0,cU=\u0392 //c=\u03D1,cU=\u0398 //c=\u03D5,cU=\u03A6 //c=\u03D6,cU=\u03A0 //c=\u03F0,cU=\u039A //c=\u03F1,cU=\u03A1 //c=\u03F5,cU=\u0395 //c=\u1E9B,cU=\u1E60 //c=\u1FBE,cU=\u0399 if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) { equiv = equiv | ignoreCase[c]; } if (ignoreCase.ContainsKey(cU)) { equiv = equiv | ignoreCase[cU]; } if (ignoreCase.ContainsKey(cL)) { equiv = equiv | ignoreCase[cL]; } equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL); foreach (char d in solver.GenerateAllCharacters(equiv)) { ignoreCase[d] = equiv; } } //else //{ // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU); // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU)); //} } } return(ignoreCase); }