/// <summary> /// Describe hash set /// </summary> new public string Describe(HashSet <char> label) { var ranges = new Utilities.Ranges(); foreach (char c in label) { ranges.Add((int)c); } string res = ""; for (int i = 0; i < ranges.ranges.Count; i++) { var range = ranges.ranges[i]; if (range[0] == range[1]) { res += Rex.RexEngine.Escape((char)range[0]); } else { res += "["; res += Rex.RexEngine.Escape((char)range[0]); res += "-"; res += Rex.RexEngine.Escape((char)range[1]); res += "]"; } if (i < ranges.ranges.Count - 1) { res += "|"; } } description[label] = res; return(res); }
private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field) { int bits = (int)encoding; int maxChar = (1 << bits) - 1; var catMap = new Dictionary <UnicodeCategory, Ranges>(); for (int c = 0; c < 30; c++) { catMap[(UnicodeCategory)c] = new Ranges(); } Ranges whitespace = new Ranges(); Ranges wordcharacter = new Ranges(); for (int i = 0; i <= maxChar; i++) { char ch = (char)i; if (char.IsWhiteSpace(ch)) { whitespace.Add(i); } UnicodeCategory cat = char.GetUnicodeCategory(ch); catMap[cat].Add(i); int catCode = (int)cat; //in .NET 3.5 if (bits == 7) { if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18) { wordcharacter.Add(i); } } } //generate bdd reprs for each of the category ranges BDD[] catBDDs = new BDD[30]; CharSetSolver bddb = new CharSetSolver(encoding); for (int c = 0; c < 30; c++) { catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges); } BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges); //in .NET 3.5 category 5 was NOT a word character //union of categories 0,1,2,3,4,8,18 BDD wordCharBdd = bddb.MkOr(catBDDs[0], bddb.MkOr(catBDDs[1], bddb.MkOr(catBDDs[2], bddb.MkOr(catBDDs[3], bddb.MkOr(catBDDs[4], bddb.MkOr(catBDDs[5], bddb.MkOr(catBDDs[8], catBDDs[18]))))))); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers. /// corresponding to the lower and upper bounds of the unicodes of the characters /// that have the given UnicodeCategory code (between 0 and 29). /// </summary>"); sw.WriteLine("public static int[][][] " + field + " = new int[][][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); if (catMap[c].Count == 0) { sw.WriteLine("null,"); } else { sw.WriteLine("new int[][]{"); foreach (int[] range in catMap[c].ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("},"); } } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encodings of the categories. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); BDD catBdd = catBDDs[(int)c]; if (catBdd == null || catBdd.IsEmpty) { sw.WriteLine("null, //false"); } else if (catBdd.IsFull) { sw.WriteLine("new int[]{0,0}, //true"); } else { sw.WriteLine("new int[]{"); foreach (var arc in bddb.SerializeCompact(catBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("},"); } } sw.WriteLine("};"); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Whitespace character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{"); foreach (int[] range in whitespace.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Word character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{"); foreach (int[] range in wordcharacter.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encoding of the whitespace characters. /// </summary>"); sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(whitespaceBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Compact BDD encoding of word characters /// </summary>"); sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(wordCharBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); }