/// <summary> /// 获取字符类中包含的所有字符。 /// </summary> /// <param name="charClass">要获取所有字符的字符类。</param> /// <returns>字符类中包含的所有字符。</returns> private static CharSet GetCharClassSet(string charClass) { CharSet set = new CharSet(); if (RegexCharClass.IsSubtraction(charClass) || RegexCharClass.ContainsCategory(charClass)) { for (int i = 0; i <= char.MaxValue; i++) { if (RegexCharClass.CharInClass((char)i, charClass)) { set.Add((char)i); } } } else { // 如果不包含差集和 Unicode 字符分类的话,可以更快。 string ranges = RegexCharClass.GetCharClassRanges(charClass); if (RegexCharClass.IsNegated(charClass)) { int s = 0; for (int i = 0; i < ranges.Length; i++) { for (int j = s; j < ranges[i]; j++) { set.Add((char)j); } i++; s = i < ranges.Length ? ranges[i] : char.MaxValue + 1; } for (int j = s; j <= char.MaxValue; j++) { set.Add((char)j); } } else { for (int i = 0; i < ranges.Length; i++) { int j = ranges[i++]; int end = i < ranges.Length ? ranges[i] : char.MaxValue + 1; for (; j < end; j++) { set.Add((char)j); } } } } return(set); }
internal S CreateConditionFromSet(bool ignoreCase, string set) { //char at position 0 is 1 iff the set is negated //bool negate = ((int)set[0] == 1); bool negate = RegexCharClass.IsNegated(set); //following are conditions over characters in the set //these will become disjuncts of a single disjunction //or conjuncts of a conjunction in case negate is true //negation is pushed in when the conditions are created List <S> conditions = new List <S>(); #region ranges var ranges = ComputeRanges(set); foreach (var range in ranges) { S cond = solver.MkRangeConstraint(range.Item1, range.Item2, ignoreCase); conditions.Add(negate ? solver.MkNot(cond) : cond); } #endregion #region categories int setLength = set[SETLENGTH]; int catLength = set[CATEGORYLENGTH]; //int myEndPosition = SETSTART + setLength + catLength; int catStart = setLength + SETSTART; int j = catStart; while (j < catStart + catLength) { //singleton categories are stored as unicode characters whose code is //1 + the unicode category code as a short //thus - 1 is applied to exctarct the actual code of the category //the category itself may be negated e.g. \D instead of \d short catCode = (short)set[j++]; if (catCode != 0) { //note that double negation cancels out the negation of the category S cond = MapCategoryCodeToCondition(Math.Abs(catCode) - 1); conditions.Add(catCode < 0 ^ negate ? solver.MkNot(cond) : cond); } else { //special case for a whole group G of categories surrounded by 0's //essentially 0 C1 C2 ... Cn 0 ==> G = (C1 | C2 | ... | Cn) catCode = (short)set[j++]; if (catCode == 0) { continue; //empty set of categories } //collect individual category codes into this set var catCodes = new HashSet <int>(); //if the first catCode is negated, the group as a whole is negated bool negGroup = (catCode < 0); while (catCode != 0) { catCodes.Add(Math.Abs(catCode) - 1); catCode = (short)set[j++]; } // C1 | C2 | ... | Cn S catCondDisj = MapCategoryCodeSetToCondition(catCodes); S catGroupCond = (negate ^ negGroup ? solver.MkNot(catCondDisj) : catCondDisj); conditions.Add(catGroupCond); } } #endregion #region Subtractor S subtractorCond = default(S); if (set.Length > j) { //the set has a subtractor-set at the end //all characters in the subtractor-set are excluded from the set //note that the subtractor sets may be nested, e.g. in r=[a-z-[b-g-[cd]]] //the subtractor set [b-g-[cd]] has itself a subtractor set [cd] //thus r is the set of characters between a..z except b,e,f,g var subtractor = set.Substring(j); subtractorCond = CreateConditionFromSet(ignoreCase, subtractor); } #endregion S moveCond; //if there are no ranges and no groups then there are no conditions //this situation arises for SingleLine regegex option and . //and means that all characters are accepted if (conditions.Count == 0) { moveCond = (negate ? solver.False : solver.True); } else { moveCond = (negate ? solver.MkAnd(conditions) : solver.MkOr(conditions)); } //Subtelty of regex sematics: //note that the subtractor is not within the scope of the negation (if there is a negation) //thus the negated subtractor is conjuncted with moveCond after the negation has been //performed above if (!object.Equals(subtractorCond, default(S))) { moveCond = solver.MkAnd(moveCond, solver.MkNot(subtractorCond)); } return(moveCond); }