public static BV64Algebra Create(CharSetSolver solver, BDD[] minterms) { if (minterms.Length > 64) { throw new AutomataException(AutomataExceptionKind.NrOfMintermsCanBeAtMost64); } var dtree = DecisionTree.Create(solver, minterms); var partitionBase = Array.ConvertAll(minterms, m => solver.ToRanges(m)); var partition = Array.ConvertAll(partitionBase, p => new IntervalSet(p)); return(new BV64Algebra(dtree, partition)); }
/// <summary> /// Crteate a Boolean decision tree. /// References to solver and domain are not saved in the resulting decision tree. /// </summary> /// <param name="solver">character alberbra</param> /// <param name="domain">elements that map to true</param> /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param> /// <returns></returns> internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF) { BDD domain_compl = solver.MkNot(domain); var partition = new BDD[] { domain_compl, domain }; if (precomputeLimit == 0) { return(new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF))); } bool[] precomp = Precompute(solver, domain, precomputeLimit); DecisionTree.BST bst = null; if (precomputeLimit < ushort.MaxValue) { bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue); } return(new BooleanDecisionTree(precomp, bst)); }
private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit) { bool[] precomp = new bool[precomputeLimit + 1]; Func <int, bool> F = i => { var bdd = solver.MkCharConstraint((char)i); if (solver.IsSatisfiable(solver.MkAnd(bdd, domain))) { return(true); } else { return(false); } }; for (int c = 0; c <= precomputeLimit; c++) { precomp[c] = F(c); } return(precomp); }
/// <summary> /// Crteate a decision tree that maps a character into a partion block id /// </summary> /// <param name="solver">character alberbra</param> /// <param name="partition">partition of the whole set of all characters into pairwise disjoint nonempty sets</param> /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param> /// <returns></returns> internal static DecisionTree Create(CharSetSolver solver, BDD[] partition, ushort precomputeLimit = 0xFF) { if (partition.Length == 1) { //there is no actual partition, everything maps to one id 0, e.g. as in .* return(new DecisionTree(new int[(int)precomputeLimit], new BST(0, null, null))); } if (precomputeLimit == 0) { return(new DecisionTree(new int[] { }, MkBST(new PartitionCut(solver, partition), 0, 0xFFFF))); } int[] precomp = Precompute(solver, partition, precomputeLimit); BST bst = null; if (precomputeLimit < ushort.MaxValue) { bst = MkBST(new PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue); } return(new DecisionTree(precomp, bst)); }
internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver) { if (categorizer.CategoryCondition(8) == label) { return(@"\d"); } if (solver.MkNot(categorizer.CategoryCondition(8)) == label) { return(@"\D"); } if (categorizer.WordLetterCondition == label) { return(@"\w"); } if (solver.MkNot(categorizer.WordLetterCondition) == label) { return(@"\W"); } if (categorizer.WhiteSpaceCondition == label) { return(@"\s"); } if (solver.MkNot(categorizer.WhiteSpaceCondition) == label) { return(@"\S"); } for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++) { if (categorizer.CategoryCondition(i) == label) { return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}"); } } var ranges = solver.ToRanges(label); if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2) { return(StringUtility.Escape((char)ranges[0].Item1)); } var res = new StringBuilder("["); for (int i = 0; i < ranges.Length; i++) { var range = ranges[i]; if (range.Item1 == range.Item2) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); } else if (range.Item1 == range.Item2 - 1) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } else { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append("-"); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } } res.Append("]"); return(res.ToString()); }
public IgnoreCaseTransformer(CharSetSolver charSetSolver) { this.solver = charSetSolver; IgnoreCaseRel = charSetSolver.Deserialize(Microsoft.SRM.Generated.IgnoreCaseRelation.ignorecase); domain = IgnoreCaseRel >> 16; }
private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field) { int bits = (int)encoding; int maxChar = (1 << bits) - 1; var catMap = new Dictionary <UnicodeCategory, Ranges>(); for (int c = 0; c < 30; c++) { catMap[(UnicodeCategory)c] = new Ranges(); } Ranges whitespace = new Ranges(); Ranges wordcharacter = new Ranges(); for (int i = 0; i <= maxChar; i++) { char ch = (char)i; if (char.IsWhiteSpace(ch)) { whitespace.Add(i); } UnicodeCategory cat = char.GetUnicodeCategory(ch); catMap[cat].Add(i); int catCode = (int)cat; //in .NET 3.5 if (bits == 7) { if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18) { wordcharacter.Add(i); } } } //generate bdd reprs for each of the category ranges BDD[] catBDDs = new BDD[30]; CharSetSolver bddb = new CharSetSolver(encoding); for (int c = 0; c < 30; c++) { catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges); } BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges); //in .NET 3.5 category 5 was NOT a word character //union of categories 0,1,2,3,4,8,18 BDD wordCharBdd = bddb.MkOr(catBDDs[0], bddb.MkOr(catBDDs[1], bddb.MkOr(catBDDs[2], bddb.MkOr(catBDDs[3], bddb.MkOr(catBDDs[4], bddb.MkOr(catBDDs[5], bddb.MkOr(catBDDs[8], catBDDs[18]))))))); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers. /// corresponding to the lower and upper bounds of the unicodes of the characters /// that have the given UnicodeCategory code (between 0 and 29). /// </summary>"); sw.WriteLine("public static int[][][] " + field + " = new int[][][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); if (catMap[c].Count == 0) { sw.WriteLine("null,"); } else { sw.WriteLine("new int[][]{"); foreach (int[] range in catMap[c].ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("},"); } } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encodings of the categories. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); BDD catBdd = catBDDs[(int)c]; if (catBdd == null || catBdd.IsEmpty) { sw.WriteLine("null, //false"); } else if (catBdd.IsFull) { sw.WriteLine("new int[]{0,0}, //true"); } else { sw.WriteLine("new int[]{"); foreach (var arc in bddb.SerializeCompact(catBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("},"); } } sw.WriteLine("};"); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Whitespace character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{"); foreach (int[] range in whitespace.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Word character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{"); foreach (int[] range in wordcharacter.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encoding of the whitespace characters. /// </summary>"); sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(whitespaceBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Compact BDD encoding of word characters /// </summary>"); sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(wordCharBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); }
static Regex() { solver = new CharSetSolver(); converter = new RegexToAutomatonConverter <BDD>(solver); }
internal PartitionCut(CharSetSolver solver, BDD[] blocks) { this.blocks = blocks; this.solver = solver; }
private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver) { var ignoreCase = new Dictionary <char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c); char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c); if (c != cU || c != cL || cU != cL) { //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. //These characters are: //c=\xB5,cU=\u039C //c=\u0131,cU=I //c=\u017F,cU=S //c=\u0345,cU=\u0399 //c=\u03C2,cU=\u03A3 //c=\u03D0,cU=\u0392 //c=\u03D1,cU=\u0398 //c=\u03D5,cU=\u03A6 //c=\u03D6,cU=\u03A0 //c=\u03F0,cU=\u039A //c=\u03F1,cU=\u03A1 //c=\u03F5,cU=\u0395 //c=\u1E9B,cU=\u1E60 //c=\u1FBE,cU=\u0399 if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) { equiv = equiv | ignoreCase[c]; } if (ignoreCase.ContainsKey(cU)) { equiv = equiv | ignoreCase[cU]; } if (ignoreCase.ContainsKey(cL)) { equiv = equiv | ignoreCase[cL]; } equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL); foreach (char d in solver.GenerateAllCharacters(equiv)) { ignoreCase[d] = equiv; } } //else //{ // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU); // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU)); //} } } return(ignoreCase); }