Exemple #1
0
        /// <summary>
        /// 获取字符类中包含的所有字符。
        /// </summary>
        /// <param name="charClass">要获取所有字符的字符类。</param>
        /// <returns>字符类中包含的所有字符。</returns>
        private static CharSet GetCharClassSet(string charClass)
        {
            CharSet set = new CharSet();

            if (RegexCharClass.IsSubtraction(charClass) || RegexCharClass.ContainsCategory(charClass))
            {
                for (int i = 0; i <= char.MaxValue; i++)
                {
                    if (RegexCharClass.CharInClass((char)i, charClass))
                    {
                        set.Add((char)i);
                    }
                }
            }
            else
            {
                // 如果不包含差集和 Unicode 字符分类的话,可以更快。
                string ranges = RegexCharClass.GetCharClassRanges(charClass);
                if (RegexCharClass.IsNegated(charClass))
                {
                    int s = 0;
                    for (int i = 0; i < ranges.Length; i++)
                    {
                        for (int j = s; j < ranges[i]; j++)
                        {
                            set.Add((char)j);
                        }
                        i++;
                        s = i < ranges.Length ? ranges[i] : char.MaxValue + 1;
                    }
                    for (int j = s; j <= char.MaxValue; j++)
                    {
                        set.Add((char)j);
                    }
                }
                else
                {
                    for (int i = 0; i < ranges.Length; i++)
                    {
                        int j   = ranges[i++];
                        int end = i < ranges.Length ? ranges[i] : char.MaxValue + 1;
                        for (; j < end; j++)
                        {
                            set.Add((char)j);
                        }
                    }
                }
            }
            return(set);
        }
Exemple #2
0
        internal S CreateConditionFromSet(bool ignoreCase, string set)
        {
            //char at position 0 is 1 iff the set is negated
            //bool negate = ((int)set[0] == 1);
            bool negate = RegexCharClass.IsNegated(set);

            //following are conditions over characters in the set
            //these will become disjuncts of a single disjunction
            //or conjuncts of a conjunction in case negate is true
            //negation is pushed in when the conditions are created
            List <S> conditions = new List <S>();

            #region ranges
            var ranges = ComputeRanges(set);

            foreach (var range in ranges)
            {
                S cond = solver.MkRangeConstraint(range.Item1, range.Item2, ignoreCase);
                conditions.Add(negate ? solver.MkNot(cond) : cond);
            }
            #endregion

            #region categories
            int setLength = set[SETLENGTH];
            int catLength = set[CATEGORYLENGTH];
            //int myEndPosition = SETSTART + setLength + catLength;

            int catStart = setLength + SETSTART;
            int j        = catStart;
            while (j < catStart + catLength)
            {
                //singleton categories are stored as unicode characters whose code is
                //1 + the unicode category code as a short
                //thus - 1 is applied to exctarct the actual code of the category
                //the category itself may be negated e.g. \D instead of \d
                short catCode = (short)set[j++];
                if (catCode != 0)
                {
                    //note that double negation cancels out the negation of the category
                    S cond = MapCategoryCodeToCondition(Math.Abs(catCode) - 1);
                    conditions.Add(catCode < 0 ^ negate ? solver.MkNot(cond) : cond);
                }
                else
                {
                    //special case for a whole group G of categories surrounded by 0's
                    //essentially 0 C1 C2 ... Cn 0 ==> G = (C1 | C2 | ... | Cn)
                    catCode = (short)set[j++];
                    if (catCode == 0)
                    {
                        continue; //empty set of categories
                    }
                    //collect individual category codes into this set
                    var catCodes = new HashSet <int>();
                    //if the first catCode is negated, the group as a whole is negated
                    bool negGroup = (catCode < 0);

                    while (catCode != 0)
                    {
                        catCodes.Add(Math.Abs(catCode) - 1);
                        catCode = (short)set[j++];
                    }

                    // C1 | C2 | ... | Cn
                    S catCondDisj = MapCategoryCodeSetToCondition(catCodes);

                    S catGroupCond = (negate ^ negGroup ? solver.MkNot(catCondDisj) : catCondDisj);
                    conditions.Add(catGroupCond);
                }
            }
            #endregion

            #region Subtractor
            S subtractorCond = default(S);
            if (set.Length > j)
            {
                //the set has a subtractor-set at the end
                //all characters in the subtractor-set are excluded from the set
                //note that the subtractor sets may be nested, e.g. in r=[a-z-[b-g-[cd]]]
                //the subtractor set [b-g-[cd]] has itself a subtractor set [cd]
                //thus r is the set of characters between a..z except b,e,f,g
                var subtractor = set.Substring(j);
                subtractorCond = CreateConditionFromSet(ignoreCase, subtractor);
            }

            #endregion

            S moveCond;
            //if there are no ranges and no groups then there are no conditions
            //this situation arises for SingleLine regegex option and .
            //and means that all characters are accepted
            if (conditions.Count == 0)
            {
                moveCond = (negate ? solver.False : solver.True);
            }
            else
            {
                moveCond = (negate ? solver.MkAnd(conditions) : solver.MkOr(conditions));
            }

            //Subtelty of regex sematics:
            //note that the subtractor is not within the scope of the negation (if there is a negation)
            //thus the negated subtractor is conjuncted with moveCond after the negation has been
            //performed above
            if (!object.Equals(subtractorCond, default(S)))
            {
                moveCond = solver.MkAnd(moveCond, solver.MkNot(subtractorCond));
            }

            return(moveCond);
        }