public IgnoreCaseTransformer(CharSetSolver solver) { _solver = solver; _i_Invariant = solver.Or(_solver.CharConstraint('i'), solver.CharConstraint('I')); _i_Default = solver.Or(_i_Invariant, solver.CharConstraint(Turkish_I_WithDot)); _i_Turkish = solver.Or(solver.CharConstraint('i'), solver.CharConstraint(Turkish_I_WithDot)); _I_Turkish = solver.Or(solver.CharConstraint('I'), solver.CharConstraint(Turkish_i_WithoutDot)); }
private static List <EquivalenceClass> ComputeIgnoreCaseEquivalenceClasses(CharSetSolver solver, CultureInfo culture) { var ignoreCase = new Dictionary <char, EquivalenceClass>(); var sets = new List <EquivalenceClass>(); for (uint i = 65; i <= 0xFFFF; i++) { char C = (char)i; char c = char.ToLower(C, culture); if (c == C) { continue; } EquivalenceClass?ec; if (!ignoreCase.TryGetValue(c, out ec)) { ec = new EquivalenceClass(solver.CharConstraint(c)); ignoreCase[c] = ec; sets.Add(ec); } ec._set = solver.Or(ec._set, solver.CharConstraint(C)); } return(sets); }
private static void WriteIgnoreCaseBDD(StreamWriter sw) { sw.WriteLine(" /// <summary>Serialized BDD for mapping characters to their case-ignoring equivalence classes in the default (en-US) culture.</summary>"); var solver = new CharSetSolver(); Dictionary <char, BDD> ignoreCase = ComputeIgnoreCaseDictionary(solver, new CultureInfo(DefaultCultureName)); BDD ignorecase = solver.False; foreach (KeyValuePair <char, BDD> kv in ignoreCase) { BDD a = solver.CreateCharSetFromRange(kv.Key, kv.Key); BDD b = kv.Value; ignorecase = solver.Or(ignorecase, solver.And(solver.ShiftLeft(a, 16), b)); } sw.Write(" public static readonly long[] IgnoreCaseEnUsSerializedBDD = "); GeneratorHelper.WriteInt64ArrayInitSyntax(sw, ignorecase.Serialize()); sw.WriteLine(";"); }
private static void WriteIgnoreCaseBDD(StreamWriter sw) { sw.WriteLine(" /// <summary>Serialized BDD for mapping characters to their case-ignoring equivalence classes in the default (en-US) culture.</summary>"); var solver = new CharSetSolver(); List <EquivalenceClass> ignoreCaseEquivalenceClasses = ComputeIgnoreCaseEquivalenceClasses(solver, new CultureInfo(DefaultCultureName)); BDD ignorecase = solver.False; foreach (EquivalenceClass ec in ignoreCaseEquivalenceClasses) { // Create the Cartesian product of ec._set with itself BDD crossproduct = solver.And(solver.ShiftLeft(ec._set, 16), ec._set); // Add the product into the overall lookup table ignorecase = solver.Or(ignorecase, crossproduct); } sw.Write(" public static readonly byte[] IgnoreCaseEnUsSerializedBDD = "); GeneratorHelper.WriteByteArrayInitSyntax(sw, ignorecase.SerializeToBytes()); sw.WriteLine(";"); }
/// <summary> /// Get the set of CI-equivalent characters to c. /// This operation depends on culture for i, I, '\u0130', and '\u0131'; /// culture="" means InvariantCulture while culture=null means to use the current culture. /// </summary> public BDD Apply(char c, string?culture = null) { if (Volatile.Read(ref _cultureIndependentChars[c]) is BDD bdd) { return(bdd); } culture ??= CultureInfo.CurrentCulture.Name; switch (c) { // Do not cache in _cultureIndependentChars values that are culture-dependent case 'i': return (culture == string.Empty ? _i_Invariant : IsTurkishAlphabet(culture) ? _i_Turkish : _i_Default); // for all other cultures, case-sensitivity is the same as for en-US case 'I': return (culture == string.Empty ? _i_Invariant : IsTurkishAlphabet(culture) ? _I_Turkish : // different from 'i' above _i_Default); case Turkish_I_WithDot: return (culture == string.Empty ? _solver.CharConstraint(Turkish_I_WithDot) : IsTurkishAlphabet(culture) ? _i_Turkish : _i_Default); case Turkish_i_WithoutDot: return (IsTurkishAlphabet(culture) ? _I_Turkish : _solver.CharConstraint(Turkish_i_WithoutDot)); case 'k': case 'K': case KelvinSign: Volatile.Write(ref _cultureIndependentChars[c], _solver.Or(_solver.Or(_solver.CharConstraint('k'), _solver.CharConstraint('K')), _solver.CharConstraint(KelvinSign))); return(_cultureIndependentChars[c] !); // Cache in _cultureIndependentChars entries that are culture-independent. // BDDs are idempotent, so while we use volatile to ensure proper adherence // to ECMA's memory model, we don't need Interlocked.CompareExchange. case <= '\x7F': // For ASCII range other than letters i, I, k, and K, the case-conversion is independent of culture and does // not include case-insensitive-equivalent non-ASCII. Volatile.Write(ref _cultureIndependentChars[c], _solver.Or(_solver.CharConstraint(char.ToLower(c)), _solver.CharConstraint(char.ToUpper(c)))); return(_cultureIndependentChars[c] !); default: // Bring in the full transfomation relation, but here it does not actually depend on culture // so it is safe to store the result for c. Volatile.Write(ref _cultureIndependentChars[c], Apply(_solver.CharConstraint(c))); return(_cultureIndependentChars[c] !); } }
private static Dictionary <char, BDD> ComputeIgnoreCaseDictionary(CharSetSolver solver, CultureInfo culture) { CultureInfo originalCulture = CultureInfo.CurrentCulture; try { CultureInfo.CurrentCulture = culture; var ignoreCase = new Dictionary <char, BDD>(); for (uint i = 0; i <= 0xFFFF; i++) { char c = (char)i; char cUpper = char.ToUpper(c); char cLower = char.ToLower(c); if (cUpper == cLower) { continue; } // c may be different from both cUpper as well as cLower. // Make sure that the regex engine considers c as being equivalent to cUpper and cLower, else ignore c. // In some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option. if (Regex.IsMatch($"{cUpper}{cLower}", $"^(?i:\\u{i:X4}\\u{i:X4})$")) { BDD equiv = solver.False; if (ignoreCase.ContainsKey(c)) { equiv = solver.Or(equiv, ignoreCase[c]); } if (ignoreCase.ContainsKey(cUpper)) { equiv = solver.Or(equiv, ignoreCase[cUpper]); } if (ignoreCase.ContainsKey(cLower)) { equiv = solver.Or(equiv, ignoreCase[cLower]); } // Make sure all characters are included initially or when some is still missing equiv = solver.Or(equiv, solver.Or(solver.CreateCharSetFromRange(c, c), solver.Or(solver.CreateCharSetFromRange(cUpper, cUpper), solver.CreateCharSetFromRange(cLower, cLower)))); // Update all the members with their case-invariance equivalence classes foreach (char d in solver.GenerateAllCharacters(equiv)) { ignoreCase[d] = equiv; } } } return(ignoreCase); } finally { CultureInfo.CurrentCulture = originalCulture; } }