Пример #1
0
        /// <summary>
        /// Yet another related computation: it takes a RegexTree and computes
        /// the leading anchors that it encounters.
        /// </summary>
        public static int Anchors(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;
            int       result     = 0;

            curNode = tree.Root;

            for (; ;)
            {
                switch (curNode.NType)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                    return(result | AnchorFromType(curNode.NType));

                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(result);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(result);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Пример #2
0
        /// <summary>
        /// Nested repeaters just get multiplied with each other if they're not
        /// too lumpy
        /// </summary>
        private RegexNode ReduceRep()
        {
            RegexNode u = this;
            RegexNode child;
            int       type = Type();
            int       min  = M;
            int       max  = N;

            for (; ;)
            {
                if (u.ChildCount() == 0)
                {
                    break;
                }

                child = u.Child(0);

                // multiply reps of the same type only
                if (child.Type() != type)
                {
                    int childType = child.Type();

                    if (!(childType >= Oneloop && childType <= Setloop && type == Loop ||
                          childType >= Onelazy && childType <= Setlazy && type == Lazyloop))
                    {
                        break;
                    }
                }

                // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
                // [but things like (a {2,})+ are not too lumpy...]
                if (u.M == 0 && child.M > 1 || child.N < child.M * 2)
                {
                    break;
                }

                u = child;
                if (u.M > 0)
                {
                    u.M = min = ((int.MaxValue - 1) / u.M < min) ? int.MaxValue : u.M * min;
                }
                if (u.N > 0)
                {
                    u.N = max = ((int.MaxValue - 1) / u.N < max) ? int.MaxValue : u.N * max;
                }
            }

            return(min == int.MaxValue ? new RegexNode(Nothing, Options) : u);
        }
Пример #3
0
        /// <summary>
        /// This is a related computation: it takes a RegexTree and computes the
        /// leading substring if it see one. It's quite trivial and gives up easily.
        /// </summary>
        public static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode    = tree.Root;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            for (; ;)
            {
                switch (curNode.NType)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:

                    // In release, cutoff at a length to which we can still reasonably construct a string
                    // In debug, use a smaller cutoff to exercise the cutoff path in tests
                    const int Cutoff =
                        #if DEBUG
                        50;
                        #else
                        1_000_000;
                        #endif

                    if (curNode.M > 0 && curNode.M < Cutoff)
                    {
                        string pref = string.Empty.PadRight(curNode.M, curNode.Ch);
                        return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode.Str, 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }