ChildCount() private method

private ChildCount ( ) : int
return int
 internal RegexReplacement(string rep, RegexNode concat, System.Collections.Generic.Dictionary<object,object> _caps)
 {
     this._rep = rep;
     if (concat.Type() != 0x19)
     {
         throw new ArgumentException(RegExRes.GetString(0x25));
     }
     StringBuilder builder = new StringBuilder();
     ArrayList list = new ArrayList();
     ArrayList list2 = new ArrayList();
     for (int i = 0; i < concat.ChildCount(); i++)
     {
         RegexNode node = concat.Child(i);
         switch (node.Type())
         {
             case 9:
             {
                 builder.Append(node._ch);
                 continue;
             }
             case 12:
             {
                 builder.Append(node._str);
                 continue;
             }
             case 13:
             {
                 if (builder.Length > 0)
                 {
                     list2.Add(list.Count);
                     list.Add(builder.ToString());
                     builder.Length = 0;
                 }
                 int num = node._m;
                 if ((_caps != null) && (num >= 0))
                 {
                     num = (int) _caps[num];
                 }
                 list2.Add(-5 - num);
                 continue;
             }
         }
         throw new ArgumentException(RegExRes.GetString(0x25));
     }
     if (builder.Length > 0)
     {
         list2.Add(list.Count);
         list.Add(builder.ToString());
     }
     this._strings = new string[list.Count];
     list.CopyTo(0, this._strings, 0, list.Count);
     this._rules = new int[list2.Count];
     for (int j = 0; j < list2.Count; j++)
     {
         this._rules[j] = (int) list2[j];
     }
 }
Esempio n. 2
0
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
        internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps)
        {
            StringBuilder sb;
            List<String> strings;
            List<Int32> rules;
            int slot;

            _rep = rep;

            if (concat.Type() != RegexNode.Concatenate)
                throw new ArgumentException(SR.ReplacementError);

            sb = new StringBuilder();
            strings = new List<String>();
            rules = new List<Int32>();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                    case RegexNode.Multi:
                        sb.Append(child._str);
                        break;
                    case RegexNode.One:
                        sb.Append(child._ch);
                        break;
                    case RegexNode.Ref:
                        if (sb.Length > 0)
                        {
                            rules.Add(strings.Count);
                            strings.Add(sb.ToString());
                            sb.Length = 0;
                        }
                        slot = child._m;

                        if (_caps != null && slot >= 0)
                            slot = (int)_caps[slot];

                        rules.Add(-Specials - 1 - slot);
                        break;
                    default:
                        throw new ArgumentException(SR.ReplacementError);
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            _strings = strings;
            _rules = rules;
        }
Esempio n. 3
0
        private readonly List<string> _strings; // table of string constants

        #endregion Fields

        #region Constructors

        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            if (concat.Type() != RegexNode.Concatenate)
                throw new ArgumentException(SR.ReplacementError);

            StringBuilder sb = StringBuilderCache.Acquire();
            List<string> strings = new List<string>();
            List<int> rules = new List<int>();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                    case RegexNode.Multi:
                        sb.Append(child._str);
                        break;

                    case RegexNode.One:
                        sb.Append(child._ch);
                        break;

                    case RegexNode.Ref:
                        if (sb.Length > 0)
                        {
                            rules.Add(strings.Count);
                            strings.Add(sb.ToString());
                            sb.Length = 0;
                        }
                        int slot = child._m;

                        if (_caps != null && slot >= 0)
                            slot = (int)_caps[slot];

                        rules.Add(-Specials - 1 - slot);
                        break;

                    default:
                        throw new ArgumentException(SR.ReplacementError);
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            StringBuilderCache.Release(sb);

            _rep = rep;
            _strings = strings;
            _rules = rules;
        }
 internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
 {
     this._rep = rep;
     if (concat.Type() != 0x19)
     {
         throw new ArgumentException(SR.GetString("ReplacementError"));
     }
     StringBuilder builder = new StringBuilder();
     List<string> list = new List<string>();
     List<int> list2 = new List<int>();
     for (int i = 0; i < concat.ChildCount(); i++)
     {
         RegexNode node = concat.Child(i);
         switch (node.Type())
         {
             case 9:
             {
                 builder.Append(node._ch);
                 continue;
             }
             case 12:
             {
                 builder.Append(node._str);
                 continue;
             }
             case 13:
             {
                 if (builder.Length > 0)
                 {
                     list2.Add(list.Count);
                     list.Add(builder.ToString());
                     builder.Length = 0;
                 }
                 int num = node._m;
                 if ((_caps != null) && (num >= 0))
                 {
                     num = (int) _caps[num];
                 }
                 list2.Add(-5 - num);
                 continue;
             }
         }
         throw new ArgumentException(SR.GetString("ReplacementError"));
     }
     if (builder.Length > 0)
     {
         list2.Add(list.Count);
         list.Add(builder.ToString());
     }
     this._strings = list;
     this._rules = list2;
 }
        internal RegexNode ReduceRep()
        {
            RegexNode node = this;
            int       num  = this.Type();
            int       num2 = this._m;
            int       num3 = this._n;

            while (true)
            {
                if (node.ChildCount() == 0)
                {
                    break;
                }
                RegexNode node2 = node.Child(0);
                if (node2.Type() != num)
                {
                    int num4 = node2.Type();
                    if ((((num4 < 3) || (num4 > 5)) || (num != 0x1a)) && (((num4 < 6) || (num4 > 8)) || (num != 0x1b)))
                    {
                        break;
                    }
                }
                if (((node._m == 0) && (node2._m > 1)) || (node2._n < (node2._m * 2)))
                {
                    break;
                }
                node = node2;
                if (node._m > 0)
                {
                    node._m = num2 = ((0x7ffffffe / node._m) < num2) ? 0x7fffffff : (node._m * num2);
                }
                if (node._n > 0)
                {
                    node._n = num3 = ((0x7ffffffe / node._n) < num3) ? 0x7fffffff : (node._n * num3);
                }
            }
            if (num2 != 0x7fffffff)
            {
                return(node);
            }
            return(new RegexNode(0x16, this._options));
        }
Esempio n. 6
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexOpcode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Kind, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Kind | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Parent !;

                EmitFragment(curNode.Kind | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexOpcode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            // Convert the string table into an ordered string array.
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize));
        }
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
        internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps)
        {
            StringBuilder sb;
            ArrayList     strings;
            ArrayList     rules;
            int           slot;

            _rep = rep;

            if (concat.Type() != RegexNode.Concatenate)
            {
                throw new ArgumentException(SR.GetString(SR.ReplacementError));
            }

            sb      = new StringBuilder();
            strings = new ArrayList();
            rules   = new ArrayList();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                case RegexNode.Multi:
                    sb.Append(child._str);
                    break;

                case RegexNode.One:
                    sb.Append(child._ch);
                    break;

                case RegexNode.Ref:
                    if (sb.Length > 0)
                    {
                        rules.Add(strings.Count);
                        strings.Add(sb.ToString());
                        sb.Length = 0;
                    }
                    slot = child._m;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot];
                    }

                    rules.Add(-Specials - 1 - slot);
                    break;

                default:
                    throw new ArgumentException(SR.GetString(SR.ReplacementError));
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            _strings = strings;
            _rules   = rules;
        }
Esempio n. 8
0
        /*
         * Remember the pushed state (in response to a ')')
         */
        internal void PopGroup()
        {
            _concatenation = _stack;
            _alternation = _concatenation._next;
            _group = _alternation._next;
            _stack = _group._next;

            // The first () inside a Testgroup group goes directly to the group
            if (_group.Type() == RegexNode.Testgroup && _group.ChildCount() == 0)
            {
                if (_unit == null)
                    throw MakeException(SR.IllegalCondition);

                _group.AddChild(_unit);
                _unit = null;
            }
        }
Esempio n. 9
0
        private readonly int[] _rules;      // negative -> group #, positive -> string #

        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            if (concat.Type != RegexNode.Concatenate)
            {
                throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError);
            }

            Span <char>      vsbStack     = stackalloc char[256];
            var              vsb          = new ValueStringBuilder(vsbStack);
            FourStackStrings stackStrings = default;
            var              strings      = new ValueListBuilder <string>(MemoryMarshal.CreateSpan(ref stackStrings.Item1 !, 4));
            var              rules        = new ValueListBuilder <int>(stackalloc int[64]);

            int childCount = concat.ChildCount();

            for (int i = 0; i < childCount; i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type)
                {
                case RegexNode.Multi:
                    vsb.Append(child.Str !);
                    break;

                case RegexNode.One:
                    vsb.Append(child.Ch);
                    break;

                case RegexNode.Ref:
                    if (vsb.Length > 0)
                    {
                        rules.Append(strings.Length);
                        strings.Append(vsb.ToString());
                        vsb = new ValueStringBuilder(vsbStack);
                    }
                    int slot = child.M;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot] !;
                    }

                    rules.Append(-Specials - 1 - slot);
                    break;

                default:
                    throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError);
                }
            }

            if (vsb.Length > 0)
            {
                rules.Append(strings.Length);
                strings.Append(vsb.ToString());
            }

            Pattern  = rep;
            _strings = strings.AsSpan().ToArray();
            _rules   = rules.AsSpan().ToArray();

            rules.Dispose();
        }
        /// <summary>Computes the leading substring in <paramref name="tree"/>.</summary>
        /// <remarks>It's quite trivial and gives up easily, in which case an empty string is returned.</remarks>
        public static (string Prefix, bool CaseInsensitive) ComputeLeadingSubstring(RegexTree tree)
        {
            RegexNode curNode    = tree.Root;
            RegexNode?concatNode = null;
            int       nextChild  = 0;

            while (true)
            {
                switch (curNode.Type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Atomic:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Oneloopatomic:
                case RegexNode.Onelazy:

                    // In release, cutoff at a length to which we can still reasonably construct a string and Boyer-Moore search.
                    // In debug, use a smaller cutoff to exercise the cutoff path in tests
                    const int Cutoff =
#if DEBUG
                        50;
#else
                        RegexBoyerMoore.MaxLimit;
#endif

                    if (curNode.M > 0 && curNode.M < Cutoff)
                    {
                        return(new string(curNode.Ch, curNode.M), (curNode.Options & RegexOptions.IgnoreCase) != 0);
                    }

                    return(string.Empty, false);

                case RegexNode.One:
                    return(curNode.Ch.ToString(), (curNode.Options & RegexOptions.IgnoreCase) != 0);

                case RegexNode.Multi:
                    return(curNode.Str !, (curNode.Options & RegexOptions.IgnoreCase) != 0);

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(string.Empty, false);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(string.Empty, false);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Esempio n. 11
0
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (node.UseOptionR())
            {
                bits |= RegexCode.Rtl;
            }
            if ((node.Options & RegexOptions.IgnoreCase) != 0)
            {
                bits |= RegexCode.Ci;
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node.ChildCount() - 1)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node.ChildCount() - 1)
                {
                    int LBPos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, _emitted.Length);
                }
                else
                {
                    int I;
                    for (I = 0; I < curIndex; I++)
                    {
                        PatchJump(_intStack.Pop(), _emitted.Length);
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node.M));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (curIndex)
                {
                case 0:
                {
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Forejump);
                    if (node.ChildCount() > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node.ChildCount() > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M);
                }
                else
                {
                    Emit(node.M == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node.M == 0)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                }
                _intStack.Append(_emitted.Length);
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
            {
                int StartJumpPos = _emitted.Length;
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, _intStack.Pop());
                }

                if (node.M == 0)
                {
                    PatchJump(_intStack.Pop(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node.M), MapCapnum(node.N));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                _intStack.Append(_emitted.Length);
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(_intStack.Pop(), _emitted.Length);
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Atomic | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Atomic | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node.Type | bits, node.Ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notoneloopatomic:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Oneloopatomic:
            case RegexNode.Onelazy:
                if (node.M > 0)
                {
                    Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.Type | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setloopatomic:
            case RegexNode.Setlazy:
            {
                int stringCode = StringCode(node.Str !);
                if (node.M > 0)
                {
                    Emit(RegexCode.Setrep | bits, stringCode, node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.Type | bits, stringCode, (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M);
                }
            }
            break;

            case RegexNode.Multi:
                Emit(node.Type | bits, StringCode(node.Str !));
                break;

            case RegexNode.Set:
                Emit(node.Type | bits, StringCode(node.Str !));
                break;

            case RegexNode.Ref:
                Emit(node.Type | bits, MapCapnum(node.M));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node.Type);
                break;

            default:
                throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString()));
            }
        }
Esempio n. 12
0
        internal static RegexPrefix ScanChars(RegexTree tree)
        {
            RegexNode node2  = null;
            int       num    = 0;
            string    prefix = null;
            bool      ci     = false;
            RegexNode node   = tree._root;

Label_0010:
            switch (node._type)
            {
            case 3:
            case 6:
                if (node._n == 0x7fffffff)
                {
                    prefix = RegexCharClass.SetFromChar(node._ch);
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 4:
            case 7:
                if (node._n == 0x7fffffff)
                {
                    prefix = RegexCharClass.SetInverseFromChar(node._ch);
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 5:
            case 8:
                if ((node._n == 0x7fffffff) && ((node._str2 == null) || (node._str2.Length == 0)))
                {
                    prefix = node._str;
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 14:
            case 15:
            case 0x10:
            case 0x12:
            case 0x13:
            case 20:
            case 0x15:
            case 0x17:
            case 30:
            case 0x1f:
            case 0x29:
                break;

            case 0x19:
                if (node.ChildCount() > 0)
                {
                    node2 = node;
                    num   = 0;
                }
                break;

            case 0x1c:
            case 0x20:
                node  = node.Child(0);
                node2 = null;
                goto Label_0010;

            default:
                return(null);
            }
            if (prefix != null)
            {
                return(new RegexPrefix(prefix, ci));
            }
            if ((node2 == null) || (num >= node2.ChildCount()))
            {
                return(null);
            }
            node = node2.Child(num++);
            goto Label_0010;
        }
Esempio n. 13
0
        private bool _hasBackreferences;    // true if the replacement has any backreferences; otherwise, false

        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            Debug.Assert(concat.Kind == RegexNodeKind.Concatenate, $"Expected Concatenate, got {concat.Kind}");

            var vsb = new ValueStringBuilder(stackalloc char[256]);
            FourStackStrings stackStrings = default;
            var strings = new ValueListBuilder <string>(MemoryMarshal.CreateSpan(ref stackStrings.Item1 !, 4));
            var rules   = new ValueListBuilder <int>(stackalloc int[64]);

            int childCount = concat.ChildCount();

            for (int i = 0; i < childCount; i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Kind)
                {
                case RegexNodeKind.Multi:
                    vsb.Append(child.Str !);
                    break;

                case RegexNodeKind.One:
                    vsb.Append(child.Ch);
                    break;

                case RegexNodeKind.Backreference:
                    if (vsb.Length > 0)
                    {
                        rules.Append(strings.Length);
                        strings.Append(vsb.AsSpan().ToString());
                        vsb.Length = 0;
                    }
                    int slot = child.M;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot] !;
                    }

                    rules.Append(-Specials - 1 - slot);
                    _hasBackreferences = true;
                    break;

                default:
                    Debug.Fail($"Unexpected child kind {child.Kind}");
                    break;
                }
            }

            if (vsb.Length > 0)
            {
                rules.Append(strings.Length);
                strings.Append(vsb.ToString());
            }
            vsb.Dispose();

            Pattern  = rep;
            _strings = strings.AsSpan().ToArray();
            _rules   = rules.AsSpan().ToArray();

            rules.Dispose();
        }
Esempio n. 14
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._m > 0)
                    {
                        string pref = String.Empty.PadRight(curNode._m, curNode._ch);
                        return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Esempio n. 15
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?bmPrefix = null;
            RegexPrefix?    fcPrefix = null;
            RegexPrefix     prefix   = RegexFCD.Prefix(tree);

            if (prefix.Prefix.Length > 1 && prefix.Prefix.Length <= RegexBoyerMoore.MaxLimit) // if it's <= 1 || > MaxLimit, perf is better using fcPrefix
            {
                // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression.
                CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                // If we didn't find such a string, try to compute the characters set that might begin the string.
                fcPrefix = RegexFCD.FirstChars(tree);
            }

            // Compute any anchors starting the expression.
            int anchors = RegexFCD.Anchors(tree);

            // Convert the string table into an ordered string array/
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Esempio n. 16
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (;;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                case RegexNode.Multi:
                    goto OuterloopBreak;

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
#if ECMA
                case RegexNode.ECMABoundary:
#endif
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }

OuterloopBreak:
            ;

            switch (curNode._type)
            {
            case RegexNode.Multi:
                return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

            case RegexNode.Oneloop:
                goto
            case RegexNode.Onelazy;

            case RegexNode.Onelazy:
                if (curNode._m > 0)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.Append(curNode._ch, curNode._m);
                    return(new RegexPrefix(sb.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)));
                }
                // else fallthrough
                goto default;

            default:
                return(RegexPrefix.Empty);
            }
        }
Esempio n. 17
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading []* construct if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix ScanChars(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode      = null;
            int       nextChild       = 0;
            String    foundSet        = null;
            bool      caseInsensitive = false;

            curNode = tree._root;

            for (;;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
#if ECMA
                case RegexNode.ECMABoundary:
#endif
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._n != infinite)
                    {
                        return(null);
                    }

                    foundSet        = RegexCharClass.SetFromChar(curNode._ch);
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                case RegexNode.Notoneloop:
                case RegexNode.Notonelazy:
                    if (curNode._n != infinite)
                    {
                        return(null);
                    }

                    foundSet        = RegexCharClass.SetInverseFromChar(curNode._ch);
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                case RegexNode.Setloop:
                case RegexNode.Setlazy:
                    if (curNode._n != infinite || (curNode._str2 != null && curNode._str2.Length != 0))
                    {
                        return(null);
                    }

                    foundSet        = curNode._str;
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                default:
                    return(null);
                }

                if (foundSet != null)
                {
                    return(new RegexPrefix(foundSet, caseInsensitive));
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(null);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Esempio n. 18
0
        /// <summary>
        /// This is a related computation: it takes a RegexTree and computes the
        /// leading substring if it see one. It's quite trivial and gives up easily.
        /// </summary>
        public static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode    = tree.Root;
            RegexNode?concatNode = null;
            int       nextChild  = 0;

            while (true)
            {
                switch (curNode.Type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Atomic:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Oneloopatomic:
                case RegexNode.Onelazy:

                    // In release, cutoff at a length to which we can still reasonably construct a string
                    // In debug, use a smaller cutoff to exercise the cutoff path in tests
                    const int Cutoff =
#if DEBUG
                        50;
#else
                        1_000_000;
#endif

                    if (curNode.M > 0 && curNode.M < Cutoff)
                    {
                        string pref = new string(curNode.Ch, curNode.M);
                        return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase)));
                    }

                    return(RegexPrefix.Empty);

                case RegexNode.One:
                    return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode.Str !, 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Esempio n. 19
0
            // Processes the node, adding any prefix text to the builder.
            // Returns whether processing should continue with subsequent nodes.
            static bool Process(RegexNode node, ref ValueStringBuilder vsb)
            {
                if (!StackHelper.TryEnsureSufficientExecutionStack())
                {
                    // If we're too deep on the stack, just give up finding any more prefix.
                    return(false);
                }

                // We don't bother to handle reversed input, so process at most one node
                // when handling RightToLeft.
                bool rtl = (node.Options & RegexOptions.RightToLeft) != 0;

                switch (node.Type)
                {
                // Concatenation
                case RegexNode.Concatenate:
                {
                    int childCount = node.ChildCount();
                    for (int i = 0; i < childCount; i++)
                    {
                        if (!Process(node.Child(i), ref vsb))
                        {
                            return(false);
                        }
                    }
                    return(!rtl);
                }

                // Alternation: find a string that's a shared prefix of all branches
                case RegexNode.Alternate:
                {
                    int childCount = node.ChildCount();

                    // Store the initial branch into the target builder
                    int  initialLength = vsb.Length;
                    bool keepExploring = Process(node.Child(0), ref vsb);
                    int  addedLength   = vsb.Length - initialLength;

                    // Then explore the rest of the branches, finding the length
                    // a prefix they all share in common with the initial branch.
                    if (addedLength != 0)
                    {
                        var alternateSb = new ValueStringBuilder(64);

                        // Process each branch.  If we reach a point where we've proven there's
                        // no overlap, we can bail early.
                        for (int i = 1; i < childCount && addedLength != 0; i++)
                        {
                            alternateSb.Length = 0;

                            // Process the branch.  We want to keep exploring after this alternation,
                            // but we can't if either this branch doesn't allow for it or if the prefix
                            // supplied by this branch doesn't entirely match all the previous ones.
                            keepExploring &= Process(node.Child(i), ref alternateSb);
                            keepExploring &= alternateSb.Length == addedLength;

                            addedLength = Math.Min(addedLength, alternateSb.Length);
                            for (int j = 0; j < addedLength; j++)
                            {
                                if (vsb[initialLength + j] != alternateSb[j])
                                {
                                    addedLength   = j;
                                    keepExploring = false;
                                    break;
                                }
                            }
                        }

                        alternateSb.Dispose();

                        // Then cull back on what was added based on the other branches.
                        vsb.Length = initialLength + addedLength;
                    }

                    return(!rtl && keepExploring);
                }

                // One character
                case RegexNode.One when(node.Options& RegexOptions.IgnoreCase) == 0:
                    vsb.Append(node.Ch);

                    return(!rtl);

                // Multiple characters
                case RegexNode.Multi when(node.Options& RegexOptions.IgnoreCase) == 0:
                    vsb.Append(node.Str);

                    return(!rtl);

                // Loop of one character
                case RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0:
                    const int SingleCharIterationLimit = 32;     // arbitrary cut-off to avoid creating super long strings unnecessarily
                    int       count = Math.Min(node.M, SingleCharIterationLimit);
                    vsb.Append(node.Ch, count);
                    return(count == node.N && !rtl);

                // Loop of a node
                case RegexNode.Loop or RegexNode.Lazyloop when node.M > 0:
                {
                    const int NodeIterationLimit = 4;         // arbitrary cut-off to avoid creating super long strings unnecessarily
                    int       limit = Math.Min(node.M, NodeIterationLimit);
                    for (int i = 0; i < limit; i++)
                    {
                        if (!Process(node.Child(0), ref vsb))
                        {
                            return(false);
                        }
                    }
                    return(limit == node.N && !rtl);
                }

                // Grouping nodes for which we only care about their single child
                case RegexNode.Atomic:
                case RegexNode.Capture:
                    return(Process(node.Child(0), ref vsb));

                // Zero-width anchors and assertions
                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonBoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.UpdateBumpalong:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    return(true);

                // Give up for anything else
                default:
                    return(false);
                }
            }
Esempio n. 20
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl      = (tree.Options & RegexOptions.RightToLeft) != 0;
            bool compiled = (tree.Options & RegexOptions.Compiled) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?boyerMoorePrefix = null;

            (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
Esempio n. 21
0
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode node2 = null;
            int       num2;
            int       num  = 0;
            RegexNode node = tree._root;

Label_000B:
            num2 = node._type;
            switch (num2)
            {
            case 3:
            case 6:
            case 12:
                num2 = node._type;
                switch (num2)
                {
                case 3:
                case 6:
                    if (node._m > 0)
                    {
                        StringBuilder builder = new StringBuilder();
                        builder.Append(node._ch, node._m);
                        return(new RegexPrefix(builder.ToString(), RegexOptions.None != (node._options & RegexOptions.IgnoreCase)));
                    }
                    goto Label_0151;
                }
                if (num2 != 12)
                {
                    goto Label_0151;
                }
                return(new RegexPrefix(node._str, RegexOptions.None != (node._options & RegexOptions.IgnoreCase)));

            case 14:
            case 15:
            case 0x10:
            case 0x12:
            case 0x13:
            case 20:
            case 0x15:
            case 0x17:
            case 30:
            case 0x1f:
            case 0x29:
                break;

            case 0x19:
                if (node.ChildCount() > 0)
                {
                    node2 = node;
                    num   = 0;
                }
                break;

            case 0x1c:
            case 0x20:
                node  = node.Child(0);
                node2 = null;
                goto Label_000B;

            default:
                return(RegexPrefix.Empty);
            }
            if ((node2 == null) || (num >= node2.ChildCount()))
            {
                return(RegexPrefix.Empty);
            }
            node = node2.Child(num++);
            goto Label_000B;
Label_0151:
            return(RegexPrefix.Empty);
        }
Esempio n. 22
0
            static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor, bool isInLoop)
            {
                if (!StackHelper.TryEnsureSufficientExecutionStack())
                {
                    return(false);
                }

                // Track whether we've seen any nodes with various options set.
                results._hasIgnoreCase  |= (node.Options & RegexOptions.IgnoreCase) != 0;
                results._hasRightToLeft |= (node.Options & RegexOptions.RightToLeft) != 0;

                // Track whether this node is inside of a loop.
                if (isInLoop)
                {
                    (results._inLoops ??= new HashSet <RegexNode>()).Add(node);
                }

                if (isAtomicByAncestor)
                {
                    // We've been told by our parent that we should be considered atomic, so add ourselves
                    // to the atomic collection.
                    results._isAtomicByAncestor.Add(node);
                }
                else
                {
                    // Certain kinds of nodes incur backtracking logic themselves: add them to the backtracking collection.
                    // We may later find that a node contains another that has backtracking; we'll add nodes based on that
                    // after examining the children.
                    switch (node.Kind)
                    {
                    case RegexNodeKind.Alternate:
                    case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M != node.N:
                    case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when node.M != node.N:
                        (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node);
                        break;
                    }
                }

                // Update state for certain node types.
                bool isAtomicBySelf = false;

                switch (node.Kind)
                {
                // Some node types add atomicity around what they wrap.  Set isAtomicBySelfOrParent to true for such nodes
                // even if it was false upon entering the method.
                case RegexNodeKind.Atomic:
                case RegexNodeKind.NegativeLookaround:
                case RegexNodeKind.PositiveLookaround:
                    isAtomicBySelf = true;
                    break;

                // Track any nodes that are themselves captures.
                case RegexNodeKind.Capture:
                    results._containsCapture.Add(node);
                    break;

                // Track whether we've recurred into a loop
                case RegexNodeKind.Loop:
                case RegexNodeKind.Lazyloop:
                    isInLoop = true;
                    break;
                }

                // Process each child.
                int childCount = node.ChildCount();

                for (int i = 0; i < childCount; i++)
                {
                    RegexNode child = node.Child(i);

                    // Determine whether the child should be treated as atomic (whether anything
                    // can backtrack into it), which is influenced by whether this node (the child's
                    // parent) is considered atomic by itself or by its parent.
                    bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch
                    {
                        // If the parent is atomic, so is the child.  That's the whole purpose
                        // of the Atomic node, and lookarounds are also implicitly atomic.
                        RegexNodeKind.Atomic or RegexNodeKind.NegativeLookaround or RegexNodeKind.PositiveLookaround => true,

                        // Each branch is considered independently, so any atomicity applied to the alternation also applies
                        // to each individual branch.  This is true as well for conditionals.
                         RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional => true,

                        // Captures don't impact atomicity: if the parent of a capture is atomic, the capture is also atomic.
                         RegexNodeKind.Capture => true,

                        // If the parent is a concatenation and this is the last node, any atomicity
                        // applying to the concatenation applies to this node, too.
                         RegexNodeKind.Concatenate => i == childCount - 1,

                        // For loops with a max iteration count of 1, they themselves can be considered
                        // atomic as can whatever they wrap, as they won't ever iterate more than once
                        // and thus we don't need to worry about one iteration consuming input destined
                        // for a subsequent iteration.
                         RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.N == 1 => true,

                        // For any other parent type, give up on trying to prove atomicity.
                         _ => false,
                    };

                    // Now analyze the child.
                    if (!TryAnalyze(child, results, treatChildAsAtomic, isInLoop))
                    {
                        return(false);
                    }

                    // If the child contains captures, so too does this parent.
                    if (results._containsCapture.Contains(child))
                    {
                        results._containsCapture.Add(node);
                    }

                    // If the child might require backtracking into it, so too might the parent,
                    // unless the parent is itself considered atomic.  Here we don't consider parental
                    // atomicity, as we need to surface upwards to the parent whether any backtracking
                    // will be visible from this node to it.
                    if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true))
                    {
                        (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node);
                    }
                }

                // Successfully analyzed the node.
                return(true);
            }
Esempio n. 23
0
        /// <summary>Performs additional optimizations on an entire tree prior to being used.</summary>
        internal RegexNode FinalOptimize()
        {
            RegexNode rootNode = this;

            Debug.Assert(rootNode.Type == Capture && rootNode.ChildCount() == 1);

            // If we find backtracking construct at the end of the regex, we can instead make it non-backtracking,
            // since nothing would ever backtrack into it anyway.  Doing this then makes the construct available
            // to implementations that don't support backtracking.
            if ((Options & RegexOptions.RightToLeft) == 0 && // only apply optimization when LTR to avoid needing additional code for the rarer RTL case
                (Options & RegexOptions.Compiled) != 0)      // only apply when we're compiling, as that's the only time it would make a meaningful difference
            {
                // Walk the tree, starting from the sole child of the root implicit capture.
                RegexNode node = rootNode.Child(0);
                while (true)
                {
                    switch (node.Type)
                    {
                    case Oneloop:
                        node.Type = Oneloopatomic;
                        break;

                    case Notoneloop:
                        node.Type = Notoneloopatomic;
                        break;

                    case Setloop:
                        node.Type = Setloopatomic;
                        break;

                    case Capture:
                    case Concatenate:
                        RegexNode existingChild = node.Child(node.ChildCount() - 1);
                        switch (existingChild.Type)
                        {
                        default:
                            node = existingChild;
                            break;

                        case Alternate:
                        case Loop:
                        case Lazyloop:
                            var atomic = new RegexNode(Atomic, Options);
                            atomic.AddChild(existingChild);
                            node.ReplaceChild(node.ChildCount() - 1, atomic);
                            break;
                        }
                        continue;

                    case Atomic:
                        node = node.Child(0);
                        continue;
                    }

                    break;
                }
            }

            // If the root node under the implicit Capture is an Atomic, the Atomic is useless as there's nothing
            // to backtrack into it, so we can remove it.
            if (rootNode.Child(0).Type == Atomic)
            {
                rootNode.ReplaceChild(0, rootNode.Child(0).Child(0));
            }

            // Done optimizing.  Return the final tree.
            return(rootNode);
        }