internal RegexReplacement(string rep, RegexNode concat, System.Collections.Generic.Dictionary<object,object> _caps)
 {
     this._rep = rep;
     if (concat.Type() != 0x19)
     {
         throw new ArgumentException(RegExRes.GetString(0x25));
     }
     StringBuilder builder = new StringBuilder();
     ArrayList list = new ArrayList();
     ArrayList list2 = new ArrayList();
     for (int i = 0; i < concat.ChildCount(); i++)
     {
         RegexNode node = concat.Child(i);
         switch (node.Type())
         {
             case 9:
             {
                 builder.Append(node._ch);
                 continue;
             }
             case 12:
             {
                 builder.Append(node._str);
                 continue;
             }
             case 13:
             {
                 if (builder.Length > 0)
                 {
                     list2.Add(list.Count);
                     list.Add(builder.ToString());
                     builder.Length = 0;
                 }
                 int num = node._m;
                 if ((_caps != null) && (num >= 0))
                 {
                     num = (int) _caps[num];
                 }
                 list2.Add(-5 - num);
                 continue;
             }
         }
         throw new ArgumentException(RegExRes.GetString(0x25));
     }
     if (builder.Length > 0)
     {
         list2.Add(list.Count);
         list.Add(builder.ToString());
     }
     this._strings = new string[list.Count];
     list.CopyTo(0, this._strings, 0, list.Count);
     this._rules = new int[list2.Count];
     for (int j = 0; j < list2.Count; j++)
     {
         this._rules[j] = (int) list2[j];
     }
 }
 internal RegexTree(RegexNode root, Hashtable caps, Object[] capnumlist, int captop, Hashtable capnames, String[] capslist, RegexOptions opts) {
     _root = root;
     _caps = caps;
     _capnumlist = capnumlist;
     _capnames = capnames;
     _capslist = capslist;
     _captop = captop;
     _options = opts;
 }
Beispiel #3
0
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
        internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps)
        {
            StringBuilder sb;
            List<String> strings;
            List<Int32> rules;
            int slot;

            _rep = rep;

            if (concat.Type() != RegexNode.Concatenate)
                throw new ArgumentException(SR.ReplacementError);

            sb = new StringBuilder();
            strings = new List<String>();
            rules = new List<Int32>();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                    case RegexNode.Multi:
                        sb.Append(child._str);
                        break;
                    case RegexNode.One:
                        sb.Append(child._ch);
                        break;
                    case RegexNode.Ref:
                        if (sb.Length > 0)
                        {
                            rules.Add(strings.Count);
                            strings.Add(sb.ToString());
                            sb.Length = 0;
                        }
                        slot = child._m;

                        if (_caps != null && slot >= 0)
                            slot = (int)_caps[slot];

                        rules.Add(-Specials - 1 - slot);
                        break;
                    default:
                        throw new ArgumentException(SR.ReplacementError);
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            _strings = strings;
            _rules = rules;
        }
Beispiel #4
0
 internal RegexTree(RegexNode root, Dictionary<Int32, Int32> caps, Int32[] capnumlist, int captop, Dictionary<String, Int32> capnames, String[] capslist, RegexOptions opts)
 {
     _root = root;
     _caps = caps;
     _capnumlist = capnumlist;
     _capnames = capnames;
     _capslist = capslist;
     _captop = captop;
     _options = opts;
 }
 internal RegexTree(RegexNode root, Hashtable caps, int[] capnumlist, int captop, Hashtable capnames, string[] capslist, RegexOptions opts)
 {
     this._root = root;
     this._caps = caps;
     this._capnumlist = capnumlist;
     this._capnames = capnames;
     this._capslist = capslist;
     this._captop = captop;
     this._options = opts;
 }
Beispiel #6
0
        private readonly List<string> _strings; // table of string constants

        #endregion Fields

        #region Constructors

        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            if (concat.Type() != RegexNode.Concatenate)
                throw new ArgumentException(SR.ReplacementError);

            StringBuilder sb = StringBuilderCache.Acquire();
            List<string> strings = new List<string>();
            List<int> rules = new List<int>();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                    case RegexNode.Multi:
                        sb.Append(child._str);
                        break;

                    case RegexNode.One:
                        sb.Append(child._ch);
                        break;

                    case RegexNode.Ref:
                        if (sb.Length > 0)
                        {
                            rules.Add(strings.Count);
                            strings.Add(sb.ToString());
                            sb.Length = 0;
                        }
                        int slot = child._m;

                        if (_caps != null && slot >= 0)
                            slot = (int)_caps[slot];

                        rules.Add(-Specials - 1 - slot);
                        break;

                    default:
                        throw new ArgumentException(SR.ReplacementError);
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            StringBuilderCache.Release(sb);

            _rep = rep;
            _strings = strings;
            _rules = rules;
        }
Beispiel #7
0
 internal RegexTree(RegexNode root, System.Collections.Generic.Dictionary<object,object> caps, 
     object[] capnumlist, int captop, System.Collections.Generic.Dictionary<object,object> capnames, string[] capslist, RegexOptions opts)
 {
     this._root = root;
     this._caps = caps;
     this._capnumlist = capnumlist;
     this._capnames = capnames;
     this._capslist = capslist;
     this._captop = captop;
     this._options = opts;
 }
 internal void AddAlternate()
 {
     if ((this._group.Type() == 0x22) || (this._group.Type() == 0x21))
     {
         this._group.AddChild(this._concatenation.ReverseLeft());
     }
     else
     {
         this._alternation.AddChild(this._concatenation.ReverseLeft());
     }
     this._concatenation = new RegexNode(0x19, this._options);
 }
 internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
 {
     this._rep = rep;
     if (concat.Type() != 0x19)
     {
         throw new ArgumentException(SR.GetString("ReplacementError"));
     }
     StringBuilder builder = new StringBuilder();
     List<string> list = new List<string>();
     List<int> list2 = new List<int>();
     for (int i = 0; i < concat.ChildCount(); i++)
     {
         RegexNode node = concat.Child(i);
         switch (node.Type())
         {
             case 9:
             {
                 builder.Append(node._ch);
                 continue;
             }
             case 12:
             {
                 builder.Append(node._str);
                 continue;
             }
             case 13:
             {
                 if (builder.Length > 0)
                 {
                     list2.Add(list.Count);
                     list.Add(builder.ToString());
                     builder.Length = 0;
                 }
                 int num = node._m;
                 if ((_caps != null) && (num >= 0))
                 {
                     num = (int) _caps[num];
                 }
                 list2.Add(-5 - num);
                 continue;
             }
         }
         throw new ArgumentException(SR.GetString("ReplacementError"));
     }
     if (builder.Length > 0)
     {
         list2.Add(list.Count);
         list.Add(builder.ToString());
     }
     this._strings = list;
     this._rules = list2;
 }
Beispiel #10
0
        internal void AddChild(RegexNode newChild)
        {
            RegexNode reducedChild;

            if (_children == null)
            {
                _children = new List <RegexNode>(4);
            }

            reducedChild = newChild.Reduce();

            _children.Add(reducedChild);
            reducedChild._next = this;
        }
Beispiel #11
0
        /// <summary>
        /// Nested repeaters just get multiplied with each other if they're not
        /// too lumpy
        /// </summary>
        private RegexNode ReduceRep()
        {
            RegexNode u = this;
            RegexNode child;
            int       type = Type();
            int       min  = M;
            int       max  = N;

            for (; ;)
            {
                if (u.ChildCount() == 0)
                {
                    break;
                }

                child = u.Child(0);

                // multiply reps of the same type only
                if (child.Type() != type)
                {
                    int childType = child.Type();

                    if (!(childType >= Oneloop && childType <= Setloop && type == Loop ||
                          childType >= Onelazy && childType <= Setlazy && type == Lazyloop))
                    {
                        break;
                    }
                }

                // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
                // [but things like (a {2,})+ are not too lumpy...]
                if (u.M == 0 && child.M > 1 || child.N < child.M * 2)
                {
                    break;
                }

                u = child;
                if (u.M > 0)
                {
                    u.M = min = ((int.MaxValue - 1) / u.M < min) ? int.MaxValue : u.M * min;
                }
                if (u.N > 0)
                {
                    u.N = max = ((int.MaxValue - 1) / u.N < max) ? int.MaxValue : u.N * max;
                }
            }

            return(min == int.MaxValue ? new RegexNode(Nothing, Options) : u);
        }
Beispiel #12
0
        private RegexFC RegexFCFromRegexTree(RegexTree tree)
        {
            RegexNode node     = tree._root;
            int       curIndex = 0;

Label_0009:
            if (node._children == null)
            {
                this.CalculateFC(node._type, node, 0);
            }
            else if ((curIndex < node._children.Count) && !this._skipAllChildren)
            {
                this.CalculateFC(node._type | 0x40, node, curIndex);
                if (!this._skipchild)
                {
                    node = node._children[curIndex];
                    this.PushInt(curIndex);
                    curIndex = 0;
                }
                else
                {
                    curIndex++;
                    this._skipchild = false;
                }
                goto Label_0009;
            }
            this._skipAllChildren = false;
            if (!this.IntIsEmpty())
            {
                curIndex = this.PopInt();
                node     = node._next;
                this.CalculateFC(node._type | 0x80, node, curIndex);
                if (this._failed)
                {
                    return(null);
                }
                curIndex++;
                goto Label_0009;
            }
            if (this.FCIsEmpty())
            {
                return(null);
            }
            return(this.PopFC());
        }
        internal RegexNode ReduceRep()
        {
            RegexNode node = this;
            int       num  = this.Type();
            int       num2 = this._m;
            int       num3 = this._n;

            while (true)
            {
                if (node.ChildCount() == 0)
                {
                    break;
                }
                RegexNode node2 = node.Child(0);
                if (node2.Type() != num)
                {
                    int num4 = node2.Type();
                    if ((((num4 < 3) || (num4 > 5)) || (num != 0x1a)) && (((num4 < 6) || (num4 > 8)) || (num != 0x1b)))
                    {
                        break;
                    }
                }
                if (((node._m == 0) && (node2._m > 1)) || (node2._n < (node2._m * 2)))
                {
                    break;
                }
                node = node2;
                if (node._m > 0)
                {
                    node._m = num2 = ((0x7ffffffe / node._m) < num2) ? 0x7fffffff : (node._m * num2);
                }
                if (node._n > 0)
                {
                    node._n = num3 = ((0x7ffffffe / node._n) < num3) ? 0x7fffffff : (node._n * num3);
                }
            }
            if (num2 != 0x7fffffff)
            {
                return(node);
            }
            return(new RegexNode(0x16, this._options));
        }
        internal RegexFC RegexFCFromRegexTree(RegexTree tree)
        {
            RegexNode node     = tree._root;
            int       curIndex = 0;

Label_0009:
            if (node._children == null)
            {
                this.CalculateFC(node._type, node, 0);
            }
            else if ((curIndex < node._children.Count) && !this._earlyexit)
            {
                this.CalculateFC(node._type | 0x40, node, curIndex);
                if (!this._skipchild)
                {
                    node = (RegexNode)node._children[curIndex];
                    this.PushInt(curIndex);
                    curIndex = 0;
                }
                else
                {
                    curIndex++;
                    this._skipchild = false;
                }
                goto Label_0009;
            }
            this._earlyexit = false;
            if (!this.EmptyInt())
            {
                curIndex = this.PopInt();
                node     = node._next;
                this.CalculateFC(node._type | 0x80, node, curIndex);
                curIndex++;
                goto Label_0009;
            }
            if (this.EmptyFC())
            {
                return(new RegexFC("\0", true, false));
            }
            return(this.PopFC());
        }
Beispiel #15
0
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node.Options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node.Children !.Count - 1)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node.Children !.Count - 1)
                {
                    int LBPos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, _emitted.Length);
                }
Beispiel #16
0
        internal RegexTree(RegexNode root, int captureCount, string[]?captureNames, Hashtable?captureNameToNumberMapping, Hashtable?captureNumberSparseMapping, RegexOptions options, CultureInfo?culture)
        {
#if DEBUG
            // Asserts to both demonstrate and validate the relationships between the various capture data structures.
            Debug.Assert(captureNumberSparseMapping is null || captureNames is not null);
            Debug.Assert((captureNames is null) == (captureNameToNumberMapping is null));
            Debug.Assert(captureNames is null || captureCount == captureNames.Length);
            Debug.Assert(captureNumberSparseMapping is null || captureCount == captureNumberSparseMapping.Count);
            Debug.Assert(captureNameToNumberMapping is null || captureCount == captureNameToNumberMapping.Count);
            if (captureNames is not null)
            {
                Debug.Assert(captureNameToNumberMapping is not null);
                for (int i = 0; i < captureNames.Length; i++)
                {
                    string captureName = captureNames[i];

                    int?captureNumber = captureNameToNumberMapping[captureName] as int?;
                    Debug.Assert(captureNumber is not null);

                    if (captureNumberSparseMapping is not null)
                    {
                        captureNumber = captureNumberSparseMapping[captureNumber] as int?;
                        Debug.Assert(captureNumber is not null);
                    }

                    Debug.Assert(captureNumber == i);
                }
            }
#endif

            Root    = root;
            Culture = culture;
            CaptureNumberSparseMapping = captureNumberSparseMapping;
            CaptureCount = captureCount;
            CaptureNameToNumberMapping = captureNameToNumberMapping;
            CaptureNames      = captureNames;
            Options           = options;
            FindOptimizations = new RegexFindOptimizations(root, options);
        }
        internal RegexNode MakeQuantifier(bool lazy, int min, int max)
        {
            if ((min == 0) && (max == 0))
            {
                return(new RegexNode(0x17, this._options));
            }
            if ((min == 1) && (max == 1))
            {
                return(this);
            }
            switch (this._type)
            {
            case 9:
            case 10:
            case 11:
                this.MakeRep(lazy ? 6 : 3, min, max);
                return(this);
            }
            RegexNode node = new RegexNode(lazy ? 0x1b : 0x1a, this._options, min, max);

            node.AddChild(this);
            return(node);
        }
Beispiel #18
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._m > 0)
                    {
                        string pref = String.Empty.PadRight(curNode._m, curNode._ch);
                        return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
 internal void AddGroup()
 {
     if ((this._group.Type() == 0x22) || (this._group.Type() == 0x21))
     {
         this._group.AddChild(this._concatenation.ReverseLeft());
         if (((this._group.Type() == 0x21) && (this._group.ChildCount() > 2)) || (this._group.ChildCount() > 3))
         {
             throw this.MakeException(SR.GetString("TooManyAlternates"));
         }
     }
     else
     {
         this._alternation.AddChild(this._concatenation.ReverseLeft());
         this._group.AddChild(this._alternation);
     }
     this._unit = this._group;
 }
Beispiel #20
0
 public RegexReplacement(string rep, RegexNode concat, Hashtable caps)
 {
 }
Beispiel #21
0
        /*
         * The main RegexCode generator. It does a depth-first walk
         * through the tree and calls EmitFragment to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        internal void EmitFragment(int nodetype, RegexNode node, int CurIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (CurIndex < node._children.Count - 1)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild: {
                if (CurIndex < node._children.Count - 1)
                {
                    int LBPos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, CurPos());
                }
                else
                {
                    int I;
                    for (I = 0; I < CurIndex; I++)
                    {
                        PatchJump(PopInt(), CurPos());
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node._m));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (CurIndex)
                {
                case 0: {
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Forejump);
                    if (node._children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node._children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node._n < infinite || node._m > 1)
                {
                    Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                }
                else
                {
                    Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node._m == 0)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                }
                PushInt(CurPos());
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild: {
                int StartJumpPos = CurPos();
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node._n < infinite || node._m > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == infinite ? infinite : node._n - node._m);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, PopInt());
                }

                if (node._m == 0)
                {
                    PatchJump(PopInt(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                PushInt(CurPos());
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(PopInt(), CurPos());
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node._type | bits, (int)node._ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node._m > 0)
                {
                    Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, (int)node._ch, node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, (int)node._ch, node._n == infinite ?
                         infinite : node._n - node._m);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node._m > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node._str), StringCode(node._str2), node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, StringCode(node._str), StringCode(node._str2),
                         (node._n == infinite) ? infinite : node._n - node._m);
                }
                break;

            case RegexNode.Multi:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Set:
                Emit(node._type | bits, StringCode(node._str), StringCode(node._str2));
                break;

            case RegexNode.Ref:
                Emit(node._type | bits, MapCapnum(node._m));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
#if ECMA
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
#endif
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node._type);
                break;

            default:
                throw MakeException(SR.GetString(SR.UnexpectedOpcode, nodetype.ToString()));
            }
        }
Beispiel #22
0
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node.Options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node.Children.Count - 1)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node.Children.Count - 1)
                {
                    int LBPos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, _emitted.Length);
                }
                else
                {
                    int I;
                    for (I = 0; I < curIndex; I++)
                    {
                        PatchJump(_intStack.Pop(), _emitted.Length);
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node.M));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (curIndex)
                {
                case 0:
                {
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Forejump);
                    if (node.Children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node.Children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M);
                }
                else
                {
                    Emit(node.M == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node.M == 0)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                }
                _intStack.Append(_emitted.Length);
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
            {
                int StartJumpPos = _emitted.Length;
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, _intStack.Pop());
                }

                if (node.M == 0)
                {
                    PatchJump(_intStack.Pop(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node.M), MapCapnum(node.N));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                _intStack.Append(_emitted.Length);
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(_intStack.Pop(), _emitted.Length);
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node.NType | bits, node.Ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node.M > 0)
                {
                    Emit(((node.NType == RegexNode.Oneloop || node.NType == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.NType | bits, node.Ch, node.N == int.MaxValue ?
                         int.MaxValue : node.N - node.M);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node.M > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node.Str), node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.NType | bits, StringCode(node.Str),
                         (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M);
                }
                break;

            case RegexNode.Multi:
                Emit(node.NType | bits, StringCode(node.Str));
                break;

            case RegexNode.Set:
                Emit(node.NType | bits, StringCode(node.Str));
                break;

            case RegexNode.Ref:
                Emit(node.NType | bits, MapCapnum(node.M));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node.NType);
                break;

            default:
                throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture)));
            }
        }
Beispiel #23
0
        /// <summary>
        /// This is a related computation: it takes a RegexTree and computes the
        /// leading substring if it see one. It's quite trivial and gives up easily.
        /// </summary>
        public static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode    = tree.Root;
            RegexNode?concatNode = null;
            int       nextChild  = 0;

            while (true)
            {
                switch (curNode.Type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Atomic:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Oneloopatomic:
                case RegexNode.Onelazy:

                    // In release, cutoff at a length to which we can still reasonably construct a string
                    // In debug, use a smaller cutoff to exercise the cutoff path in tests
                    const int Cutoff =
#if DEBUG
                        50;
#else
                        1_000_000;
#endif

                    if (curNode.M > 0 && curNode.M < Cutoff)
                    {
                        string pref = new string(curNode.Ch, curNode.M);
                        return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase)));
                    }

                    return(RegexPrefix.Empty);

                case RegexNode.One:
                    return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode.Str !, 0 != (curNode.Options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Beispiel #24
0
        /*
         * Sets the current unit to a single inverse-char node
         */
        internal void AddUnitNotone(char ch)
        {
            if (UseOptionI())
                ch = _culture.TextInfo.ToLower(ch);

            _unit = new RegexNode(RegexNode.Notone, _options, ch);
        }
Beispiel #25
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            Span <int>  emittedSpan  = stackalloc int[EmittedSize];
            Span <int>  intStackSpan = stackalloc int[IntStackSize];
            RegexWriter writer       = new RegexWriter(emittedSpan, intStackSpan);

            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize      = tree._captop;
                writer._caps = null;
            }
            else
            {
                capsize      = tree._capnumlist.Length;
                writer._caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    writer._caps[tree._capnumlist[i]] = i;
                }
            }

            RegexNode curNode  = tree._root;
            int       curChild = 0;

            writer.Emit(RegexCode.Lazybranch, 0);

            for (; ;)
            {
                if (curNode._children == null)
                {
                    writer.EmitFragment(curNode._type, curNode, 0);
                }
                else if (curChild < curNode._children.Count)
                {
                    writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                    curNode = curNode._children[curChild];
                    writer._intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (writer._intStack.Length == 0)
                {
                    break;
                }

                curChild = writer._intStack.Pop();
                curNode  = curNode._next;

                writer.EmitFragment(curNode._type | AfterChild, curNode, curChild);
                curChild++;
            }

            writer.PatchJump(0, writer._emitted.Length);
            writer.Emit(RegexCode.Stop);

            RegexPrefix fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore bmPrefix;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = writer._emitted.AsReadOnlySpan().ToArray();

            // Cleaning up and returning the borrowed arrays
            writer._emitted.Dispose();
            writer._intStack.Dispose();

            return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Beispiel #26
0
        private readonly int[] _rules;      // negative -> group #, positive -> string #

        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            if (concat.Type != RegexNode.Concatenate)
            {
                throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError);
            }

            Span <char>      vsbStack     = stackalloc char[256];
            var              vsb          = new ValueStringBuilder(vsbStack);
            FourStackStrings stackStrings = default;
            var              strings      = new ValueListBuilder <string>(MemoryMarshal.CreateSpan(ref stackStrings.Item1 !, 4));
            var              rules        = new ValueListBuilder <int>(stackalloc int[64]);

            int childCount = concat.ChildCount();

            for (int i = 0; i < childCount; i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type)
                {
                case RegexNode.Multi:
                    vsb.Append(child.Str !);
                    break;

                case RegexNode.One:
                    vsb.Append(child.Ch);
                    break;

                case RegexNode.Ref:
                    if (vsb.Length > 0)
                    {
                        rules.Append(strings.Length);
                        strings.Append(vsb.ToString());
                        vsb = new ValueStringBuilder(vsbStack);
                    }
                    int slot = child.M;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot] !;
                    }

                    rules.Append(-Specials - 1 - slot);
                    break;

                default:
                    throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError);
                }
            }

            if (vsb.Length > 0)
            {
                rules.Append(strings.Length);
                strings.Append(vsb.ToString());
            }

            Pattern  = rep;
            _strings = strings.AsSpan().ToArray();
            _rules   = rules.AsSpan().ToArray();

            rules.Dispose();
        }
Beispiel #27
0
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
#if SILVERLIGHT
        internal RegexReplacement(String rep, RegexNode concat, Dictionary <Int32, Int32> _caps)
        {
        /// <summary>Computes the leading substring in <paramref name="tree"/>.</summary>
        /// <remarks>It's quite trivial and gives up easily, in which case an empty string is returned.</remarks>
        public static (string Prefix, bool CaseInsensitive) ComputeLeadingSubstring(RegexTree tree)
        {
            RegexNode curNode    = tree.Root;
            RegexNode?concatNode = null;
            int       nextChild  = 0;

            while (true)
            {
                switch (curNode.Type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Atomic:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Oneloopatomic:
                case RegexNode.Onelazy:

                    // In release, cutoff at a length to which we can still reasonably construct a string and Boyer-Moore search.
                    // In debug, use a smaller cutoff to exercise the cutoff path in tests
                    const int Cutoff =
#if DEBUG
                        50;
#else
                        RegexBoyerMoore.MaxLimit;
#endif

                    if (curNode.M > 0 && curNode.M < Cutoff)
                    {
                        return(new string(curNode.Ch, curNode.M), (curNode.Options & RegexOptions.IgnoreCase) != 0);
                    }

                    return(string.Empty, false);

                case RegexNode.One:
                    return(curNode.Ch.ToString(), (curNode.Options & RegexOptions.IgnoreCase) != 0);

                case RegexNode.Multi:
                    return(curNode.Str !, (curNode.Options & RegexOptions.IgnoreCase) != 0);

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(string.Empty, false);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(string.Empty, false);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Beispiel #29
0
        internal RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            int             length;
            RegexBoyerMoore moore;

            if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length))
            {
                length     = tree._captop;
                this._caps = null;
            }
            else
            {
                length     = tree._capnumlist.Length;
                this._caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    this._caps[tree._capnumlist[i]] = i;
                }
            }
            this._counting = true;
Label_0076:
            if (!this._counting)
            {
                this._emitted = new int[this._count];
            }
            RegexNode node     = tree._root;
            int       curIndex = 0;

            this.Emit(0x17, 0);
Label_00A1:
            if (node._children == null)
            {
                this.EmitFragment(node._type, node, 0);
            }
            else if (curIndex < node._children.Count)
            {
                this.EmitFragment(node._type | 0x40, node, curIndex);
                node = (RegexNode)node._children[curIndex];
                this.PushInt(curIndex);
                curIndex = 0;
                goto Label_00A1;
            }
            if (!this.EmptyStack())
            {
                curIndex = this.PopInt();
                node     = node._next;
                this.EmitFragment(node._type | 0x80, node, curIndex);
                curIndex++;
                goto Label_00A1;
            }
            this.PatchJump(0, this.CurPos());
            this.Emit(40);
            if (this._counting)
            {
                this._counting = false;
                goto Label_0076;
            }
            RegexPrefix fcPrefix = RegexFCD.FirstChars(tree);

            if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0))
            {
                fcPrefix = null;
            }
            RegexPrefix scPrefix    = null;
            RegexPrefix prefix3     = RegexFCD.Prefix(tree);
            bool        rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None;
            CultureInfo culture     = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if ((prefix3 != null) && (prefix3.Prefix.Length > 0))
            {
                moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture);
            }
            else
            {
                moore = null;
            }
            return(new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft));
        }
Beispiel #30
0
        internal void EmitFragment(int nodetype, RegexNode node, int CurIndex)
        {
            int num = 0;

            if (nodetype <= 13)
            {
                if (node.UseOptionR())
                {
                    num |= 0x40;
                }
                if ((node._options & RegexOptions.IgnoreCase) != RegexOptions.None)
                {
                    num |= 0x200;
                }
            }
            int num8 = nodetype;

            switch (num8)
            {
            case 3:
            case 4:
            case 6:
            case 7:
                if (node._m > 0)
                {
                    this.Emit((((node._type == 3) || (node._type == 6)) ? 0 : 1) | num, node._ch, node._m);
                }
                if (node._n > node._m)
                {
                    this.Emit(node._type | num, node._ch, (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m));
                }
                return;

            case 5:
            case 8:
                if (node._m > 0)
                {
                    this.Emit(2 | num, this.StringCode(node._str), this.StringCode(node._str2), node._m);
                }
                if (node._n > node._m)
                {
                    this.Emit(node._type | num, this.StringCode(node._str), this.StringCode(node._str2), (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m));
                }
                return;

            case 9:
            case 10:
                this.Emit(node._type | num, node._ch);
                return;

            case 11:
                this.Emit(node._type | num, this.StringCode(node._str), this.StringCode(node._str2));
                return;

            case 12:
                this.Emit(node._type | num, this.StringCode(node._str));
                return;

            case 13:
                this.Emit(node._type | num, this.MapCapnum(node._m));
                return;

            case 14:
            case 15:
            case 0x10:
            case 0x11:
            case 0x12:
            case 0x13:
            case 20:
            case 0x15:
            case 0x16:
            case 0x29:
            case 0x2a:
                this.Emit(node._type);
                return;

            case 0x17:
            case 0x59:
            case 0x5d:
            case 0x99:
            case 0x9d:
                return;

            case 0x58:
                if (CurIndex < (node._children.Count - 1))
                {
                    this.PushInt(this.CurPos());
                    this.Emit(0x17, 0);
                }
                return;

            case 90:
            case 0x5b:
                if ((node._n >= 0x7fffffff) && (node._m <= 1))
                {
                    this.Emit((node._m == 0) ? 30 : 0x1f);
                }
                else
                {
                    this.Emit((node._m == 0) ? 0x1a : 0x1b, (node._m == 0) ? 0 : (1 - node._m));
                }
                if (node._m == 0)
                {
                    this.PushInt(this.CurPos());
                    this.Emit(0x26, 0);
                }
                this.PushInt(this.CurPos());
                return;

            case 0x5c:
                this.Emit(0x1f);
                return;

            case 0x5e:
                this.Emit(0x22);
                this.Emit(0x1f);
                return;

            case 0x5f:
                this.Emit(0x22);
                this.PushInt(this.CurPos());
                this.Emit(0x17, 0);
                return;

            case 0x60:
                this.Emit(0x22);
                return;

            case 0x61:
                num8 = CurIndex;
                if (num8 == 0)
                {
                    this.Emit(0x22);
                    this.PushInt(this.CurPos());
                    this.Emit(0x17, 0);
                    this.Emit(0x25, this.MapCapnum(node._m));
                    this.Emit(0x24);
                    return;
                }
                return;

            case 0x62:
                num8 = CurIndex;
                if (num8 == 0)
                {
                    this.Emit(0x22);
                    this.Emit(0x1f);
                    this.PushInt(this.CurPos());
                    this.Emit(0x17, 0);
                    return;
                }
                return;

            case 0x98:
            {
                if (CurIndex >= (node._children.Count - 1))
                {
                    for (int i = 0; i < CurIndex; i++)
                    {
                        this.PatchJump(this.PopInt(), this.CurPos());
                    }
                    return;
                }
                int offset = this.PopInt();
                this.PushInt(this.CurPos());
                this.Emit(0x26, 0);
                this.PatchJump(offset, this.CurPos());
                return;
            }

            case 0x9a:
            case 0x9b:
            {
                int jumpDest = this.CurPos();
                int num7     = nodetype - 0x9a;
                if ((node._n >= 0x7fffffff) && (node._m <= 1))
                {
                    this.Emit(0x18 + num7, this.PopInt());
                }
                else
                {
                    this.Emit(0x1c + num7, this.PopInt(), (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m));
                }
                if (node._m == 0)
                {
                    this.PatchJump(this.PopInt(), jumpDest);
                }
                return;
            }

            case 0x9c:
                this.Emit(0x20, this.MapCapnum(node._m), this.MapCapnum(node._n));
                return;

            case 0x9e:
                this.Emit(0x21);
                this.Emit(0x24);
                return;

            case 0x9f:
                this.Emit(0x23);
                this.PatchJump(this.PopInt(), this.CurPos());
                this.Emit(0x24);
                return;

            case 160:
                this.Emit(0x24);
                return;

            case 0xa1:
                switch (CurIndex)
                {
                case 0:
                {
                    int num4 = this.PopInt();
                    this.PushInt(this.CurPos());
                    this.Emit(0x26, 0);
                    this.PatchJump(num4, this.CurPos());
                    this.Emit(0x24);
                    if (node._children.Count > 1)
                    {
                        return;
                    }
                    break;
                }
                }
                return;

            case 0xa2:
                switch (CurIndex)
                {
                case 0:
                    this.Emit(0x21);
                    this.Emit(0x24);
                    return;

                case 1:
                {
                    int num5 = this.PopInt();
                    this.PushInt(this.CurPos());
                    this.Emit(0x26, 0);
                    this.PatchJump(num5, this.CurPos());
                    this.Emit(0x21);
                    this.Emit(0x24);
                    if (node._children.Count > 2)
                    {
                        return;
                    }
                    goto Label_0312;
                }

                case 2:
                    goto Label_0312;
                }
                return;

            default:
                throw MakeException(RegExRes.GetString(4, nodetype.ToString()));
            }
            this.PatchJump(this.PopInt(), this.CurPos());
            return;

Label_0312:
            this.PatchJump(this.PopInt(), this.CurPos());
        }
 internal void AddUnitOne(char ch)
 {
     if (this.UseOptionI())
     {
         ch = char.ToLower(ch, this._culture);
     }
     this._unit = new RegexNode(9, this._options, ch);
 }
Beispiel #32
0
 /*
  * Sets the current unit to a single set node
  */
 internal void AddUnitSet(string cc)
 {
     _unit = new RegexNode(RegexNode.Set, _options, cc);
 }
 internal void AddUnitType(int type)
 {
     this._unit = new RegexNode(type, this._options);
 }
Beispiel #34
0
        /*
         * Finish the current group (in response to a ')' or end)
         */
        internal void AddGroup()
        {
            if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref)
            {
                _group.AddChild(_concatenation.ReverseLeft());

                if (_group.Type() == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3)
                    throw MakeException(SR.TooManyAlternates);
            }
            else
            {
                _alternation.AddChild(_concatenation.ReverseLeft());
                _group.AddChild(_alternation);
            }

            _unit = _group;
        }
Beispiel #35
0
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
        {
            bool caseInsensitive = false;
            bool flag2           = false;

            if (NodeType <= 13)
            {
                if ((node._options & RegexOptions.IgnoreCase) != RegexOptions.None)
                {
                    caseInsensitive = true;
                }
                if ((node._options & RegexOptions.RightToLeft) != RegexOptions.None)
                {
                    flag2 = true;
                }
            }
            switch (NodeType)
            {
            case 3:
            case 6:
                this.PushFC(new RegexFC(node._ch, false, node._m == 0, caseInsensitive));
                return;

            case 4:
            case 7:
                this.PushFC(new RegexFC(node._ch, true, node._m == 0, caseInsensitive));
                return;

            case 5:
            case 8:
                this.PushFC(new RegexFC(node._str, node._m == 0, caseInsensitive));
                return;

            case 9:
            case 10:
                this.PushFC(new RegexFC(node._ch, NodeType == 10, false, caseInsensitive));
                return;

            case 11:
                this.PushFC(new RegexFC(node._str, false, caseInsensitive));
                return;

            case 12:
                if (node._str.Length != 0)
                {
                    if (!flag2)
                    {
                        this.PushFC(new RegexFC(node._str[0], false, false, caseInsensitive));
                        return;
                    }
                    this.PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, caseInsensitive));
                    return;
                }
                this.PushFC(new RegexFC(true));
                return;

            case 13:
                this.PushFC(new RegexFC("\0\x0001\0\0", true, false));
                return;

            case 14:
            case 15:
            case 0x10:
            case 0x11:
            case 0x12:
            case 0x13:
            case 20:
            case 0x15:
            case 0x16:
            case 0x29:
            case 0x2a:
                this.PushFC(new RegexFC(true));
                return;

            case 0x17:
                this.PushFC(new RegexFC(true));
                return;

            case 0x58:
            case 0x59:
            case 90:
            case 0x5b:
            case 0x5c:
            case 0x5d:
            case 0x60:
            case 0x61:
            case 0x9c:
            case 0x9d:
            case 0x9e:
            case 0x9f:
            case 160:
                return;

            case 0x5e:
            case 0x5f:
                this.SkipChild();
                this.PushFC(new RegexFC(true));
                return;

            case 0x62:
                if (CurIndex == 0)
                {
                    this.SkipChild();
                }
                return;

            case 0x98:
            case 0xa1:
                if (CurIndex != 0)
                {
                    RegexFC fc   = this.PopFC();
                    RegexFC xfc6 = this.TopFC();
                    this._failed = !xfc6.AddFC(fc, false);
                }
                return;

            case 0x99:
                if (CurIndex != 0)
                {
                    RegexFC xfc  = this.PopFC();
                    RegexFC xfc2 = this.TopFC();
                    this._failed = !xfc2.AddFC(xfc, true);
                }
                if (!this.TopFC()._nullable)
                {
                    this._skipAllChildren = true;
                }
                return;

            case 0x9a:
            case 0x9b:
                if (node._m == 0)
                {
                    this.TopFC()._nullable = true;
                }
                return;

            case 0xa2:
                if (CurIndex > 1)
                {
                    RegexFC xfc3 = this.PopFC();
                    RegexFC xfc4 = this.TopFC();
                    this._failed = !xfc4.AddFC(xfc3, false);
                }
                return;
            }
            throw new ArgumentException(SR.GetString("UnexpectedOpcode", new object[] { NodeType.ToString(CultureInfo.CurrentCulture) }));
        }
Beispiel #36
0
        /*
         * Simple parsing for replacement patterns
         */
        internal RegexNode ScanReplacement()
        {
            int c;
            int startpos;

            _concatenation = new RegexNode(RegexNode.Concatenate, _options);

            for (; ;)
            {
                c = CharsRight();
                if (c == 0)
                    break;

                startpos = Textpos();

                while (c > 0 && RightChar() != '$')
                {
                    MoveRight();
                    c--;
                }

                AddConcatenate(startpos, Textpos() - startpos, true);

                if (c > 0)
                {
                    if (MoveRightGetChar() == '$')
                        AddUnitNode(ScanDollar());
                    AddConcatenate();
                }
            }

            return _concatenation;
        }
Beispiel #37
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            Emit(RegexCode.Lazybranch, 0);

            for (; ;)
            {
                if (curNode.Children == null)
                {
                    EmitFragment(curNode.NType, curNode, 0);
                }
                else if (curChild < curNode.Children.Count)
                {
                    EmitFragment(curNode.NType | BeforeChild, curNode, curChild);

                    curNode = curNode.Children[curChild];
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next;

                EmitFragment(curNode.NType | AfterChild, curNode, curChild);
                curChild++;
            }

            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);

            RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree.Options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore bmPrefix;

            if (prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = _emitted.AsSpan().ToArray();

            return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
 internal void PushGroup()
 {
     this._group._next = this._stack;
     this._alternation._next = this._group;
     this._concatenation._next = this._alternation;
     this._stack = this._concatenation;
 }
Beispiel #39
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl      = (tree.Options & RegexOptions.RightToLeft) != 0;
            bool compiled = (tree.Options & RegexOptions.Compiled) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?boyerMoorePrefix = null;

            (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
 internal void AddConcatenate(bool lazy, int min, int max)
 {
     this._concatenation.AddChild(this._unit.MakeQuantifier(lazy, min, max));
     this._unit = null;
 }
        internal RegexNode ReduceConcatenation()
        {
            if (this._children == null)
            {
                return(new RegexNode(0x17, this._options));
            }
            bool         flag  = false;
            RegexOptions none  = RegexOptions.None;
            int          num   = 0;
            int          index = 0;

            while (num < this._children.Count)
            {
                RegexNode node = (RegexNode)this._children[num];
                if (index < num)
                {
                    this._children[index] = node;
                }
                if ((node._type == 0x19) && ((node._options & RegexOptions.RightToLeft) == (this._options & RegexOptions.RightToLeft)))
                {
                    for (int i = 0; i < node._children.Count; i++)
                    {
                        ((RegexNode)node._children[i])._next = this;
                    }
                    this._children.InsertRange(num + 1, node._children);
                    index--;
                }
                else if ((node._type == 12) || (node._type == 9))
                {
                    RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
                    if (!flag || (none != options2))
                    {
                        flag = true;
                        none = options2;
                    }
                    else
                    {
                        RegexNode node2 = (RegexNode)this._children[--index];
                        if (node2._type == 9)
                        {
                            node2._type = 12;
                            node2._str  = node2._ch.ToString();// Convert.ToString(node2._ch);
                        }
                        if ((options2 & RegexOptions.RightToLeft) == RegexOptions.None)
                        {
                            if (node._type == 9)
                            {
                                node2._str = node2._str + node._ch.ToString();
                            }
                            else
                            {
                                node2._str = node2._str + node._str;
                            }
                        }
                        else if (node._type == 9)
                        {
                            node2._str = node._ch.ToString() + node2._str;
                        }
                        else
                        {
                            node2._str = node._str + node2._str;
                        }
                    }
                }
                else if (node._type == 0x17)
                {
                    index--;
                }
                else
                {
                    flag = false;
                }
                num++;
                index++;
            }
            if (index < num)
            {
                this._children.RemoveRange(index, num - index);
            }
            return(this.StripEnation(0x17));
        }
 internal void AddConcatenate()
 {
     this._concatenation.AddChild(this._unit);
     this._unit = null;
 }
Beispiel #43
0
        /*
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
        {
            bool ci  = false;
            bool rtl = false;

            if (NodeType <= RegexNode.Ref)
            {
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    ci = true;
                }
                if ((node._options & RegexOptions.RightToLeft) != 0)
                {
                    rtl = true;
                }
            }

            switch (NodeType)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Alternate | BeforeChild:
            case RegexNode.Testref | BeforeChild:
            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:
                break;

            case RegexNode.Testgroup | BeforeChild:
                if (CurIndex == 0)
                {
                    SkipChild();
                }
                break;

            case RegexNode.Empty:
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Concatenate | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, true);
                }

                if (!TopFC()._nullable)
                {
                    _skipAllChildren = true;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                if (CurIndex > 1)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            case RegexNode.Testref | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
                if (node._m == 0)
                {
                    TopFC()._nullable = true;
                }
                break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
            case RegexNode.Capture | BeforeChild:
            case RegexNode.Capture | AfterChild:
            case RegexNode.Greedy | BeforeChild:
            case RegexNode.Greedy | AfterChild:
                break;

            case RegexNode.Require | BeforeChild:
            case RegexNode.Prevent | BeforeChild:
                SkipChild();
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Require | AfterChild:
            case RegexNode.Prevent | AfterChild:
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci));
                break;

            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
                PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                break;

            case RegexNode.Multi:
                if (node._str.Length == 0)
                {
                    PushFC(new RegexFC(true));
                }
                else if (!rtl)
                {
                    PushFC(new RegexFC(node._str[0], false, false, ci));
                }
                else
                {
                    PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                }
                break;

            case RegexNode.Set:
                PushFC(new RegexFC(node._str, false, ci));
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                PushFC(new RegexFC(node._str, node._m == 0, ci));
                break;

            case RegexNode.Ref:
                PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                PushFC(new RegexFC(true));
                break;

            default:
                throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
            }
        }
 internal void StartGroup(RegexNode openGroup)
 {
     this._group = openGroup;
     this._alternation = new RegexNode(0x18, this._options);
     this._concatenation = new RegexNode(0x19, this._options);
 }
Beispiel #45
0
            static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor, bool isInLoop)
            {
                if (!StackHelper.TryEnsureSufficientExecutionStack())
                {
                    return(false);
                }

                // Track whether we've seen any nodes with various options set.
                results._hasIgnoreCase  |= (node.Options & RegexOptions.IgnoreCase) != 0;
                results._hasRightToLeft |= (node.Options & RegexOptions.RightToLeft) != 0;

                // Track whether this node is inside of a loop.
                if (isInLoop)
                {
                    (results._inLoops ??= new HashSet <RegexNode>()).Add(node);
                }

                if (isAtomicByAncestor)
                {
                    // We've been told by our parent that we should be considered atomic, so add ourselves
                    // to the atomic collection.
                    results._isAtomicByAncestor.Add(node);
                }
                else
                {
                    // Certain kinds of nodes incur backtracking logic themselves: add them to the backtracking collection.
                    // We may later find that a node contains another that has backtracking; we'll add nodes based on that
                    // after examining the children.
                    switch (node.Kind)
                    {
                    case RegexNodeKind.Alternate:
                    case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M != node.N:
                    case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when node.M != node.N:
                        (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node);
                        break;
                    }
                }

                // Update state for certain node types.
                bool isAtomicBySelf = false;

                switch (node.Kind)
                {
                // Some node types add atomicity around what they wrap.  Set isAtomicBySelfOrParent to true for such nodes
                // even if it was false upon entering the method.
                case RegexNodeKind.Atomic:
                case RegexNodeKind.NegativeLookaround:
                case RegexNodeKind.PositiveLookaround:
                    isAtomicBySelf = true;
                    break;

                // Track any nodes that are themselves captures.
                case RegexNodeKind.Capture:
                    results._containsCapture.Add(node);
                    break;

                // Track whether we've recurred into a loop
                case RegexNodeKind.Loop:
                case RegexNodeKind.Lazyloop:
                    isInLoop = true;
                    break;
                }

                // Process each child.
                int childCount = node.ChildCount();

                for (int i = 0; i < childCount; i++)
                {
                    RegexNode child = node.Child(i);

                    // Determine whether the child should be treated as atomic (whether anything
                    // can backtrack into it), which is influenced by whether this node (the child's
                    // parent) is considered atomic by itself or by its parent.
                    bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch
                    {
                        // If the parent is atomic, so is the child.  That's the whole purpose
                        // of the Atomic node, and lookarounds are also implicitly atomic.
                        RegexNodeKind.Atomic or RegexNodeKind.NegativeLookaround or RegexNodeKind.PositiveLookaround => true,

                        // Each branch is considered independently, so any atomicity applied to the alternation also applies
                        // to each individual branch.  This is true as well for conditionals.
                         RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional => true,

                        // Captures don't impact atomicity: if the parent of a capture is atomic, the capture is also atomic.
                         RegexNodeKind.Capture => true,

                        // If the parent is a concatenation and this is the last node, any atomicity
                        // applying to the concatenation applies to this node, too.
                         RegexNodeKind.Concatenate => i == childCount - 1,

                        // For loops with a max iteration count of 1, they themselves can be considered
                        // atomic as can whatever they wrap, as they won't ever iterate more than once
                        // and thus we don't need to worry about one iteration consuming input destined
                        // for a subsequent iteration.
                         RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.N == 1 => true,

                        // For any other parent type, give up on trying to prove atomicity.
                         _ => false,
                    };

                    // Now analyze the child.
                    if (!TryAnalyze(child, results, treatChildAsAtomic, isInLoop))
                    {
                        return(false);
                    }

                    // If the child contains captures, so too does this parent.
                    if (results._containsCapture.Contains(child))
                    {
                        results._containsCapture.Add(node);
                    }

                    // If the child might require backtracking into it, so too might the parent,
                    // unless the parent is itself considered atomic.  Here we don't consider parental
                    // atomicity, as we need to surface upwards to the parent whether any backtracking
                    // will be visible from this node to it.
                    if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true))
                    {
                        (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node);
                    }
                }

                // Successfully analyzed the node.
                return(true);
            }
        internal RegexNode ReduceAlternation()
        {
            if (this._children == null)
            {
                return(new RegexNode(0x16, this._options));
            }
            bool         flag  = false;
            RegexOptions none  = RegexOptions.None;
            int          num   = 0;
            int          index = 0;

            while (num < this._children.Count)
            {
                RegexNode node = (RegexNode)this._children[num];
                if (index < num)
                {
                    this._children[index] = node;
                }
                if (node._type == 0x18)
                {
                    for (int i = 0; i < node._children.Count; i++)
                    {
                        ((RegexNode)node._children[i])._next = this;
                    }
                    this._children.InsertRange(num + 1, node._children);
                    index--;
                }
                else if ((node._type == 11) || (node._type == 9))
                {
                    RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
                    if (!flag || (none != options2))
                    {
                        flag = true;
                        none = options2;
                    }
                    else
                    {
                        RegexNode node2 = (RegexNode)this._children[--index];
                        if (node2._type == 9)
                        {
                            node2._type = 11;
                            node2._str  = RegexCharClass.SetFromChar(node2._ch);
                        }
                        if (node._type == 9)
                        {
                            node2._str = RegexCharClass.SetUnion(node2._str, RegexCharClass.SetFromChar(node._ch));
                        }
                        else
                        {
                            node2._str  = RegexCharClass.SetUnion(node2._str, node._str);
                            node2._str2 = RegexCharClass.CategoryUnion(node2._str2, node._str2);
                        }
                    }
                }
                else if (node._type == 0x16)
                {
                    index--;
                }
                else
                {
                    flag = false;
                }
                num++;
                index++;
            }
            if (index < num)
            {
                this._children.RemoveRange(index, num - index);
            }
            return(this.StripEnation(0x16));
        }
 internal void AddUnitNode(RegexNode node)
 {
     this._unit = node;
 }
        internal void AddChild(RegexNode newChild) {
            RegexNode reducedChild;

            if (_children == null)
                _children = new List<RegexNode>(4);

            reducedChild = newChild.Reduce();

            _children.Add(reducedChild);
            reducedChild._next = this;
        }
 internal void AddUnitSet(string cc)
 {
     this._unit = new RegexNode(11, this._options, cc);
 }
Beispiel #50
0
        /*
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
        {
            bool ci = false;
            bool rtl = false;

            if (NodeType <= RegexNode.Ref)
            {
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                    ci = true;
                if ((node._options & RegexOptions.RightToLeft) != 0)
                    rtl = true;
            }

            switch (NodeType)
            {
                case RegexNode.Concatenate | BeforeChild:
                case RegexNode.Alternate | BeforeChild:
                case RegexNode.Testref | BeforeChild:
                case RegexNode.Loop | BeforeChild:
                case RegexNode.Lazyloop | BeforeChild:
                    break;

                case RegexNode.Testgroup | BeforeChild:
                    if (CurIndex == 0)
                        SkipChild();
                    break;

                case RegexNode.Empty:
                    PushFC(new RegexFC(true));
                    break;

                case RegexNode.Concatenate | AfterChild:
                    if (CurIndex != 0)
                    {
                        RegexFC child = PopFC();
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, true);
                    }

                    if (!TopFC()._nullable)
                        _skipAllChildren = true;
                    break;

                case RegexNode.Testgroup | AfterChild:
                    if (CurIndex > 1)
                    {
                        RegexFC child = PopFC();
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false);
                    }
                    break;

                case RegexNode.Alternate | AfterChild:
                case RegexNode.Testref | AfterChild:
                    if (CurIndex != 0)
                    {
                        RegexFC child = PopFC();
                        RegexFC cumul = TopFC();

                        _failed = !cumul.AddFC(child, false);
                    }
                    break;

                case RegexNode.Loop | AfterChild:
                case RegexNode.Lazyloop | AfterChild:
                    if (node._m == 0)
                        TopFC()._nullable = true;
                    break;

                case RegexNode.Group | BeforeChild:
                case RegexNode.Group | AfterChild:
                case RegexNode.Capture | BeforeChild:
                case RegexNode.Capture | AfterChild:
                case RegexNode.Greedy | BeforeChild:
                case RegexNode.Greedy | AfterChild:
                    break;

                case RegexNode.Require | BeforeChild:
                case RegexNode.Prevent | BeforeChild:
                    SkipChild();
                    PushFC(new RegexFC(true));
                    break;

                case RegexNode.Require | AfterChild:
                case RegexNode.Prevent | AfterChild:
                    break;

                case RegexNode.One:
                case RegexNode.Notone:
                    PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci));
                    break;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                    break;

                case RegexNode.Notoneloop:
                case RegexNode.Notonelazy:
                    PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                    break;

                case RegexNode.Multi:
                    if (node._str.Length == 0)
                        PushFC(new RegexFC(true));
                    else if (!rtl)
                        PushFC(new RegexFC(node._str[0], false, false, ci));
                    else
                        PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                    break;

                case RegexNode.Set:
                    PushFC(new RegexFC(node._str, false, ci));
                    break;

                case RegexNode.Setloop:
                case RegexNode.Setlazy:
                    PushFC(new RegexFC(node._str, node._m == 0, ci));
                    break;

                case RegexNode.Ref:
                    PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                    break;

                case RegexNode.Nothing:
                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.Nonboundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                    PushFC(new RegexFC(true));
                    break;

                default:
                    throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
            }
        }
 internal RegexNode ScanReplacement()
 {
     this._concatenation = new RegexNode(0x19, this._options);
     while (true)
     {
         int num = this.CharsRight();
         if (num == 0)
         {
             return this._concatenation;
         }
         int pos = this.Textpos();
         while ((num > 0) && (this.RightChar() != '$'))
         {
             this.MoveRight();
             num--;
         }
         this.AddConcatenate(pos, this.Textpos() - pos, true);
         if (num > 0)
         {
             if (this.MoveRightGetChar() == '$')
             {
                 this.AddUnitNode(this.ScanDollar());
             }
             this.AddConcatenate();
         }
     }
 }
Beispiel #52
0
            // Processes the node, adding any prefix text to the builder.
            // Returns whether processing should continue with subsequent nodes.
            static bool Process(RegexNode node, ref ValueStringBuilder vsb)
            {
                if (!StackHelper.TryEnsureSufficientExecutionStack())
                {
                    // If we're too deep on the stack, just give up finding any more prefix.
                    return(false);
                }

                // We don't bother to handle reversed input, so process at most one node
                // when handling RightToLeft.
                bool rtl = (node.Options & RegexOptions.RightToLeft) != 0;

                switch (node.Type)
                {
                // Concatenation
                case RegexNode.Concatenate:
                {
                    int childCount = node.ChildCount();
                    for (int i = 0; i < childCount; i++)
                    {
                        if (!Process(node.Child(i), ref vsb))
                        {
                            return(false);
                        }
                    }
                    return(!rtl);
                }

                // Alternation: find a string that's a shared prefix of all branches
                case RegexNode.Alternate:
                {
                    int childCount = node.ChildCount();

                    // Store the initial branch into the target builder
                    int  initialLength = vsb.Length;
                    bool keepExploring = Process(node.Child(0), ref vsb);
                    int  addedLength   = vsb.Length - initialLength;

                    // Then explore the rest of the branches, finding the length
                    // a prefix they all share in common with the initial branch.
                    if (addedLength != 0)
                    {
                        var alternateSb = new ValueStringBuilder(64);

                        // Process each branch.  If we reach a point where we've proven there's
                        // no overlap, we can bail early.
                        for (int i = 1; i < childCount && addedLength != 0; i++)
                        {
                            alternateSb.Length = 0;

                            // Process the branch.  We want to keep exploring after this alternation,
                            // but we can't if either this branch doesn't allow for it or if the prefix
                            // supplied by this branch doesn't entirely match all the previous ones.
                            keepExploring &= Process(node.Child(i), ref alternateSb);
                            keepExploring &= alternateSb.Length == addedLength;

                            addedLength = Math.Min(addedLength, alternateSb.Length);
                            for (int j = 0; j < addedLength; j++)
                            {
                                if (vsb[initialLength + j] != alternateSb[j])
                                {
                                    addedLength   = j;
                                    keepExploring = false;
                                    break;
                                }
                            }
                        }

                        alternateSb.Dispose();

                        // Then cull back on what was added based on the other branches.
                        vsb.Length = initialLength + addedLength;
                    }

                    return(!rtl && keepExploring);
                }

                // One character
                case RegexNode.One when(node.Options& RegexOptions.IgnoreCase) == 0:
                    vsb.Append(node.Ch);

                    return(!rtl);

                // Multiple characters
                case RegexNode.Multi when(node.Options& RegexOptions.IgnoreCase) == 0:
                    vsb.Append(node.Str);

                    return(!rtl);

                // Loop of one character
                case RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0:
                    const int SingleCharIterationLimit = 32;     // arbitrary cut-off to avoid creating super long strings unnecessarily
                    int       count = Math.Min(node.M, SingleCharIterationLimit);
                    vsb.Append(node.Ch, count);
                    return(count == node.N && !rtl);

                // Loop of a node
                case RegexNode.Loop or RegexNode.Lazyloop when node.M > 0:
                {
                    const int NodeIterationLimit = 4;         // arbitrary cut-off to avoid creating super long strings unnecessarily
                    int       limit = Math.Min(node.M, NodeIterationLimit);
                    for (int i = 0; i < limit; i++)
                    {
                        if (!Process(node.Child(0), ref vsb))
                        {
                            return(false);
                        }
                    }
                    return(limit == node.N && !rtl);
                }

                // Grouping nodes for which we only care about their single child
                case RegexNode.Atomic:
                case RegexNode.Capture:
                    return(Process(node.Child(0), ref vsb));

                // Zero-width anchors and assertions
                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonBoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.UpdateBumpalong:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    return(true);

                // Give up for anything else
                default:
                    return(false);
                }
            }
Beispiel #53
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexOpcode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Kind, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Kind | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Parent !;

                EmitFragment(curNode.Kind | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexOpcode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            // Convert the string table into an ordered string array.
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize));
        }
        internal RegexNode MakeQuantifier(bool lazy, int min, int max) {
            RegexNode result;

            if (min == 0 && max == 0)
                return new RegexNode(RegexNode.Empty, _options);

            if (min == 1 && max == 1)
                return this;

            switch (_type) {
                case RegexNode.One:
                case RegexNode.Notone:
                case RegexNode.Set:

                    MakeRep(lazy ? RegexNode.Onelazy : RegexNode.Oneloop, min, max);
                    return this;

                default:
                    result = new RegexNode(lazy ? RegexNode.Lazyloop : RegexNode.Loop, _options, min, max);
                    result.AddChild(this);
                    return result;
            }
        }
 internal void PopGroup()
 {
     this._concatenation = this._stack;
     this._alternation = this._concatenation._next;
     this._group = this._alternation._next;
     this._stack = this._group._next;
     if ((this._group.Type() == 0x22) && (this._group.ChildCount() == 0))
     {
         if (this._unit == null)
         {
             throw this.MakeException(SR.GetString("IllegalCondition"));
         }
         this._group.AddChild(this._unit);
         this._unit = null;
     }
 }
Beispiel #56
0
        internal RegexNode ReduceAlternation()
        {
            if (this._children == null)
            {
                return(new RegexNode(0x16, this._options));
            }
            bool         flag  = false;
            bool         flag2 = false;
            RegexOptions none  = RegexOptions.None;
            int          num   = 0;
            int          index = 0;

            while (num < this._children.Count)
            {
                RegexCharClass class2;
                RegexNode      node = this._children[num];
                if (index < num)
                {
                    this._children[index] = node;
                }
                if (node._type == 0x18)
                {
                    for (int i = 0; i < node._children.Count; i++)
                    {
                        node._children[i]._next = this;
                    }
                    this._children.InsertRange(num + 1, node._children);
                    index--;
                    goto Label_01C2;
                }
                if ((node._type != 11) && (node._type != 9))
                {
                    goto Label_01AB;
                }
                RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
                if (node._type == 11)
                {
                    if ((flag && (none == options2)) && (!flag2 && RegexCharClass.IsMergeable(node._str)))
                    {
                        goto Label_011B;
                    }
                    flag  = true;
                    flag2 = !RegexCharClass.IsMergeable(node._str);
                    none  = options2;
                    goto Label_01C2;
                }
                if ((!flag || (none != options2)) || flag2)
                {
                    flag  = true;
                    flag2 = false;
                    none  = options2;
                    goto Label_01C2;
                }
Label_011B:
                index--;
                RegexNode node2 = this._children[index];
                if (node2._type == 9)
                {
                    class2 = new RegexCharClass();
                    class2.AddChar(node2._ch);
                }
                else
                {
                    class2 = RegexCharClass.Parse(node2._str);
                }
                if (node._type == 9)
                {
                    class2.AddChar(node._ch);
                }
                else
                {
                    RegexCharClass cc = RegexCharClass.Parse(node._str);
                    class2.AddCharClass(cc);
                }
                node2._type = 11;
                node2._str  = class2.ToStringClass();
                goto Label_01C2;
Label_01AB:
                if (node._type == 0x16)
                {
                    index--;
                }
                else
                {
                    flag  = false;
                    flag2 = false;
                }
Label_01C2:
                num++;
                index++;
            }
            if (index < num)
            {
                this._children.RemoveRange(index, num - index);
            }
            return(this.StripEnation(0x16));
        }
 internal void Reset(RegexOptions topopts)
 {
     this._currentPos = 0;
     this._autocap = 1;
     this._ignoreNextParen = false;
     if (this._optionsStack.Count > 0)
     {
         this._optionsStack.RemoveRange(0, this._optionsStack.Count - 1);
     }
     this._options = topopts;
     this._stack = null;
 }
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
        internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps)
        {
            StringBuilder sb;
            ArrayList     strings;
            ArrayList     rules;
            int           slot;

            _rep = rep;

            if (concat.Type() != RegexNode.Concatenate)
            {
                throw new ArgumentException(SR.GetString(SR.ReplacementError));
            }

            sb      = new StringBuilder();
            strings = new ArrayList();
            rules   = new ArrayList();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                case RegexNode.Multi:
                    sb.Append(child._str);
                    break;

                case RegexNode.One:
                    sb.Append(child._ch);
                    break;

                case RegexNode.Ref:
                    if (sb.Length > 0)
                    {
                        rules.Add(strings.Count);
                        strings.Add(sb.ToString());
                        sb.Length = 0;
                    }
                    slot = child._m;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot];
                    }

                    rules.Add(-Specials - 1 - slot);
                    break;

                default:
                    throw new ArgumentException(SR.GetString(SR.ReplacementError));
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            _strings = strings;
            _rules   = rules;
        }
 internal void AddConcatenate(int pos, int cch, bool isReplacement)
 {
     if (cch != 0)
     {
         RegexNode node;
         if (cch > 1)
         {
             string str = this._pattern.Substring(pos, cch);
             if (this.UseOptionI() && !isReplacement)
             {
                 StringBuilder builder = new StringBuilder(str.Length);
                 for (int i = 0; i < str.Length; i++)
                 {
                     builder.Append(char.ToLower(str[i], this._culture));
                 }
                 str = builder.ToString();
             }
             node = new RegexNode(12, this._options, str);
         }
         else
         {
             char c = this._pattern[pos];
             if (this.UseOptionI() && !isReplacement)
             {
                 c = char.ToLower(c, this._culture);
             }
             node = new RegexNode(9, this._options, c);
         }
         this._concatenation.AddChild(node);
     }
 }
Beispiel #60
0
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                    bits |= RegexCode.Rtl;
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                    bits |= RegexCode.Ci;
            }

            switch (nodetype)
            {
                case RegexNode.Concatenate | BeforeChild:
                case RegexNode.Concatenate | AfterChild:
                case RegexNode.Empty:
                    break;

                case RegexNode.Alternate | BeforeChild:
                    if (curIndex < node._children.Count - 1)
                    {
                        PushInt(CurPos());
                        Emit(RegexCode.Lazybranch, 0);
                    }
                    break;

                case RegexNode.Alternate | AfterChild:
                    {
                        if (curIndex < node._children.Count - 1)
                        {
                            int LBPos = PopInt();
                            PushInt(CurPos());
                            Emit(RegexCode.Goto, 0);
                            PatchJump(LBPos, CurPos());
                        }
                        else
                        {
                            int I;
                            for (I = 0; I < curIndex; I++)
                            {
                                PatchJump(PopInt(), CurPos());
                            }
                        }
                        break;
                    }

                case RegexNode.Testref | BeforeChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Setjump);
                            PushInt(CurPos());
                            Emit(RegexCode.Lazybranch, 0);
                            Emit(RegexCode.Testref, MapCapnum(node._m));
                            Emit(RegexCode.Forejump);
                            break;
                    }
                    break;

                case RegexNode.Testref | AfterChild:
                    switch (curIndex)
                    {
                        case 0:
                            {
                                int Branchpos = PopInt();
                                PushInt(CurPos());
                                Emit(RegexCode.Goto, 0);
                                PatchJump(Branchpos, CurPos());
                                Emit(RegexCode.Forejump);
                                if (node._children.Count > 1)
                                    break;
                                // else fallthrough
                                goto case 1;
                            }
                        case 1:
                            PatchJump(PopInt(), CurPos());
                            break;
                    }
                    break;

                case RegexNode.Testgroup | BeforeChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Setjump);
                            Emit(RegexCode.Setmark);
                            PushInt(CurPos());
                            Emit(RegexCode.Lazybranch, 0);
                            break;
                    }
                    break;

                case RegexNode.Testgroup | AfterChild:
                    switch (curIndex)
                    {
                        case 0:
                            Emit(RegexCode.Getmark);
                            Emit(RegexCode.Forejump);
                            break;
                        case 1:
                            int Branchpos = PopInt();
                            PushInt(CurPos());
                            Emit(RegexCode.Goto, 0);
                            PatchJump(Branchpos, CurPos());
                            Emit(RegexCode.Getmark);
                            Emit(RegexCode.Forejump);

                            if (node._children.Count > 2)
                                break;
                            // else fallthrough
                            goto case 2;
                        case 2:
                            PatchJump(PopInt(), CurPos());
                            break;
                    }
                    break;

                case RegexNode.Loop | BeforeChild:
                case RegexNode.Lazyloop | BeforeChild:

                    if (node._n < Int32.MaxValue || node._m > 1)
                        Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                    else
                        Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);

                    if (node._m == 0)
                    {
                        PushInt(CurPos());
                        Emit(RegexCode.Goto, 0);
                    }
                    PushInt(CurPos());
                    break;

                case RegexNode.Loop | AfterChild:
                case RegexNode.Lazyloop | AfterChild:
                    {
                        int StartJumpPos = CurPos();
                        int Lazy = (nodetype - (RegexNode.Loop | AfterChild));

                        if (node._n < Int32.MaxValue || node._m > 1)
                            Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == Int32.MaxValue ? Int32.MaxValue : node._n - node._m);
                        else
                            Emit(RegexCode.Branchmark + Lazy, PopInt());

                        if (node._m == 0)
                            PatchJump(PopInt(), StartJumpPos);
                    }
                    break;

                case RegexNode.Group | BeforeChild:
                case RegexNode.Group | AfterChild:
                    break;

                case RegexNode.Capture | BeforeChild:
                    Emit(RegexCode.Setmark);
                    break;

                case RegexNode.Capture | AfterChild:
                    Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                    break;

                case RegexNode.Require | BeforeChild:
                    // NOTE: the following line causes lookahead/lookbehind to be
                    // NON-BACKTRACKING. It can be commented out with (*)
                    Emit(RegexCode.Setjump);


                    Emit(RegexCode.Setmark);
                    break;

                case RegexNode.Require | AfterChild:
                    Emit(RegexCode.Getmark);

                    // NOTE: the following line causes lookahead/lookbehind to be
                    // NON-BACKTRACKING. It can be commented out with (*)
                    Emit(RegexCode.Forejump);

                    break;

                case RegexNode.Prevent | BeforeChild:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;

                case RegexNode.Prevent | AfterChild:
                    Emit(RegexCode.Backjump);
                    PatchJump(PopInt(), CurPos());
                    Emit(RegexCode.Forejump);
                    break;

                case RegexNode.Greedy | BeforeChild:
                    Emit(RegexCode.Setjump);
                    break;

                case RegexNode.Greedy | AfterChild:
                    Emit(RegexCode.Forejump);
                    break;

                case RegexNode.One:
                case RegexNode.Notone:
                    Emit(node._type | bits, (int)node._ch);
                    break;

                case RegexNode.Notoneloop:
                case RegexNode.Notonelazy:
                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (node._m > 0)
                        Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                              RegexCode.Onerep : RegexCode.Notonerep) | bits, (int)node._ch, node._m);
                    if (node._n > node._m)
                        Emit(node._type | bits, (int)node._ch, node._n == Int32.MaxValue ?
                             Int32.MaxValue : node._n - node._m);
                    break;

                case RegexNode.Setloop:
                case RegexNode.Setlazy:
                    if (node._m > 0)
                        Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m);
                    if (node._n > node._m)
                        Emit(node._type | bits, StringCode(node._str),
                             (node._n == Int32.MaxValue) ? Int32.MaxValue : node._n - node._m);
                    break;

                case RegexNode.Multi:
                    Emit(node._type | bits, StringCode(node._str));
                    break;

                case RegexNode.Set:
                    Emit(node._type | bits, StringCode(node._str));
                    break;

                case RegexNode.Ref:
                    Emit(node._type | bits, MapCapnum(node._m));
                    break;

                case RegexNode.Nothing:
                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.Nonboundary:
                case RegexNode.ECMABoundary:
                case RegexNode.NonECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                    Emit(node._type);
                    break;

                default:
                    throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture)));
            }
        }