/// <summary> /// Crteate a Boolean decision tree. /// References to solver and domain are not saved in the resulting decision tree. /// </summary> /// <param name="solver">character alberbra</param> /// <param name="domain">elements that map to true</param> /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param> /// <returns></returns> internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF) { BDD domain_compl = solver.MkNot(domain); var partition = new BDD[] { domain_compl, domain }; if (precomputeLimit == 0) { return(new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF))); } bool[] precomp = Precompute(solver, domain, precomputeLimit); DecisionTree.BST bst = null; if (precomputeLimit < ushort.MaxValue) { bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue); } return(new BooleanDecisionTree(precomp, bst)); }
/// <summary> /// Assumes that set is a union of some minterms (or empty). /// If null then null is returned. /// </summary> public BV ConvertFromCharSet(BDD set) { if (set == null) { return(null); } var alg = set.algebra; BV res = this.zero; for (int i = 0; i < partition.Length; i++) { BDD bdd_i = partition[i].AsBDD(alg); var conj = alg.MkAnd(bdd_i, set); if (alg.IsSatisfiable(conj)) { res = res | atoms[i]; } } return(res); }
private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit) { bool[] precomp = new bool[precomputeLimit + 1]; Func <int, bool> F = i => { var bdd = solver.MkCharConstraint((char)i); if (solver.IsSatisfiable(solver.MkAnd(bdd, domain))) { return(true); } else { return(false); } }; for (int c = 0; c <= precomputeLimit; c++) { precomp[c] = F(c); } return(precomp); }
internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver) { if (categorizer.CategoryCondition(8) == label) { return(@"\d"); } if (solver.MkNot(categorizer.CategoryCondition(8)) == label) { return(@"\D"); } if (categorizer.WordLetterCondition == label) { return(@"\w"); } if (solver.MkNot(categorizer.WordLetterCondition) == label) { return(@"\W"); } if (categorizer.WhiteSpaceCondition == label) { return(@"\s"); } if (solver.MkNot(categorizer.WhiteSpaceCondition) == label) { return(@"\S"); } for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++) { if (categorizer.CategoryCondition(i) == label) { return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}"); } } var ranges = solver.ToRanges(label); if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2) { return(StringUtility.Escape((char)ranges[0].Item1)); } var res = new StringBuilder("["); for (int i = 0; i < ranges.Length; i++) { var range = ranges[i]; if (range.Item1 == range.Item2) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); } else if (range.Item1 == range.Item2 - 1) { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } else { res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1)); res.Append("-"); res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2)); } } res.Append("]"); return(res.ToString()); }
public IgnoreCaseTransformer(CharSetSolver charSetSolver) { this.solver = charSetSolver; IgnoreCaseRel = charSetSolver.Deserialize(Microsoft.SRM.Generated.IgnoreCaseRelation.ignorecase); domain = IgnoreCaseRel >> 16; }
private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field) { int bits = (int)encoding; int maxChar = (1 << bits) - 1; var catMap = new Dictionary <UnicodeCategory, Ranges>(); for (int c = 0; c < 30; c++) { catMap[(UnicodeCategory)c] = new Ranges(); } Ranges whitespace = new Ranges(); Ranges wordcharacter = new Ranges(); for (int i = 0; i <= maxChar; i++) { char ch = (char)i; if (char.IsWhiteSpace(ch)) { whitespace.Add(i); } UnicodeCategory cat = char.GetUnicodeCategory(ch); catMap[cat].Add(i); int catCode = (int)cat; //in .NET 3.5 if (bits == 7) { if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18) { wordcharacter.Add(i); } } } //generate bdd reprs for each of the category ranges BDD[] catBDDs = new BDD[30]; CharSetSolver bddb = new CharSetSolver(encoding); for (int c = 0; c < 30; c++) { catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges); } BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges); //in .NET 3.5 category 5 was NOT a word character //union of categories 0,1,2,3,4,8,18 BDD wordCharBdd = bddb.MkOr(catBDDs[0], bddb.MkOr(catBDDs[1], bddb.MkOr(catBDDs[2], bddb.MkOr(catBDDs[3], bddb.MkOr(catBDDs[4], bddb.MkOr(catBDDs[5], bddb.MkOr(catBDDs[8], catBDDs[18]))))))); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers. /// corresponding to the lower and upper bounds of the unicodes of the characters /// that have the given UnicodeCategory code (between 0 and 29). /// </summary>"); sw.WriteLine("public static int[][][] " + field + " = new int[][][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); if (catMap[c].Count == 0) { sw.WriteLine("null,"); } else { sw.WriteLine("new int[][]{"); foreach (int[] range in catMap[c].ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("},"); } } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encodings of the categories. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{"); foreach (UnicodeCategory c in catMap.Keys) { sw.WriteLine("//{0}({1}):", c, (int)c); BDD catBdd = catBDDs[(int)c]; if (catBdd == null || catBdd.IsEmpty) { sw.WriteLine("null, //false"); } else if (catBdd.IsFull) { sw.WriteLine("new int[]{0,0}, //true"); } else { sw.WriteLine("new int[]{"); foreach (var arc in bddb.SerializeCompact(catBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("},"); } } sw.WriteLine("};"); if (bits == 7) { sw.WriteLine(@"/// <summary> /// Whitespace character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{"); foreach (int[] range in whitespace.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Word character ranges. /// </summary>"); sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{"); foreach (int[] range in wordcharacter.ranges) { sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},"); } sw.WriteLine("};"); } sw.WriteLine(@"/// <summary> /// Compact BDD encoding of the whitespace characters. /// </summary>"); sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(whitespaceBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); sw.WriteLine(@"/// <summary> /// Compact BDD encoding of word characters /// </summary>"); sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{"); foreach (var arc in bddb.SerializeCompact(wordCharBdd)) { sw.WriteLine("{0},", arc); } sw.WriteLine("};"); }
Tuple <uint, uint>[] ToRanges1(BDD set) { Tuple <uint, uint>[] ranges; if (!rangeCache.TryGetValue(set, out ranges)) { int b = set.Ordinal; uint mask = (uint)1 << b; if (set.Zero.IsEmpty) { #region 0-case is empty if (set.One.IsFull) { var range = new Tuple <uint, uint>(mask, (mask << 1) - 1); ranges = new Tuple <uint, uint>[] { range }; } else //1-case is neither full nor empty { var ranges1 = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One)); ranges = new Tuple <uint, uint> [ranges1.Length]; for (int i = 0; i < ranges1.Length; i++) { ranges[i] = new Tuple <uint, uint>(ranges1[i].Item1 | mask, ranges1[i].Item2 | mask); } } #endregion } else if (set.Zero.IsFull) { #region 0-case is full if (set.One.IsEmpty) { var range = new Tuple <uint, uint>(0, mask - 1); ranges = new Tuple <uint, uint>[] { range }; } else { var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One)); var range = rangesR[0]; if (range.Item1 == 0) { ranges = new Tuple <uint, uint> [rangesR.Length]; ranges[0] = new Tuple <uint, uint>(0, range.Item2 | mask); for (int i = 1; i < rangesR.Length; i++) { ranges[i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask); } } else { ranges = new Tuple <uint, uint> [rangesR.Length + 1]; ranges[0] = new Tuple <uint, uint>(0, mask - 1); for (int i = 0; i < rangesR.Length; i++) { ranges[i + 1] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask); } } } #endregion } else { #region 0-case is neither full nor empty var rangesL = LiftRanges(b, (b - set.Zero.Ordinal) - 1, ToRanges1(set.Zero)); var last = rangesL[rangesL.Length - 1]; if (set.One.IsEmpty) { ranges = rangesL; } else if (set.One.IsFull) { var ranges1 = new List <Tuple <uint, uint> >(); for (int i = 0; i < rangesL.Length - 1; i++) { ranges1.Add(rangesL[i]); } if (last.Item2 == (mask - 1)) { ranges1.Add(new Tuple <uint, uint>(last.Item1, (mask << 1) - 1)); } else { ranges1.Add(last); ranges1.Add(new Tuple <uint, uint>(mask, (mask << 1) - 1)); } ranges = ranges1.ToArray(); } else //general case: neither 0-case, not 1-case is full or empty { var rangesR0 = ToRanges1(set.One); var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, rangesR0); var first = rangesR[0]; if (last.Item2 == (mask - 1) && first.Item1 == 0) //merge together the last and first ranges { ranges = new Tuple <uint, uint> [rangesL.Length + rangesR.Length - 1]; for (int i = 0; i < rangesL.Length - 1; i++) { ranges[i] = rangesL[i]; } ranges[rangesL.Length - 1] = new Tuple <uint, uint>(last.Item1, first.Item2 | mask); for (int i = 1; i < rangesR.Length; i++) { ranges[rangesL.Length - 1 + i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask); } } else { ranges = new Tuple <uint, uint> [rangesL.Length + rangesR.Length]; for (int i = 0; i < rangesL.Length; i++) { ranges[i] = rangesL[i]; } for (int i = 0; i < rangesR.Length; i++) { ranges[rangesL.Length + i] = new Tuple <uint, uint>(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask); } } } #endregion } rangeCache[set] = ranges; } return(ranges); }
private static Dictionary <char, BDD> ComputeIgnoreCaseDistionary(CharSetSolver solver) { var ignoreCase = new Dictionary <char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c); char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c); if (c != cU || c != cL || cU != cL) { //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. //These characters are: //c=\xB5,cU=\u039C //c=\u0131,cU=I //c=\u017F,cU=S //c=\u0345,cU=\u0399 //c=\u03C2,cU=\u03A3 //c=\u03D0,cU=\u0392 //c=\u03D1,cU=\u0398 //c=\u03D5,cU=\u03A6 //c=\u03D6,cU=\u03A0 //c=\u03F0,cU=\u039A //c=\u03F1,cU=\u03A1 //c=\u03F5,cU=\u0395 //c=\u1E9B,cU=\u1E60 //c=\u1FBE,cU=\u0399 if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) { equiv = equiv | ignoreCase[c]; } if (ignoreCase.ContainsKey(cU)) { equiv = equiv | ignoreCase[cU]; } if (ignoreCase.ContainsKey(cL)) { equiv = equiv | ignoreCase[cL]; } equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL); foreach (char d in solver.GenerateAllCharacters(equiv)) { ignoreCase[d] = equiv; } } //else //{ // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU); // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU)); //} } } return(ignoreCase); }