/// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node.Options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node.Children.Count - 1)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node.Children.Count - 1)
                {
                    int LBPos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, _emitted.Length);
                }
                else
                {
                    int I;
                    for (I = 0; I < curIndex; I++)
                    {
                        PatchJump(_intStack.Pop(), _emitted.Length);
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node.M));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (curIndex)
                {
                case 0:
                {
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Forejump);
                    if (node.Children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = _intStack.Pop();
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, _emitted.Length);
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node.Children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(_intStack.Pop(), _emitted.Length);
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M);
                }
                else
                {
                    Emit(node.M == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node.M == 0)
                {
                    _intStack.Append(_emitted.Length);
                    Emit(RegexCode.Goto, 0);
                }
                _intStack.Append(_emitted.Length);
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
            {
                int StartJumpPos = _emitted.Length;
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node.N < int.MaxValue || node.M > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, _intStack.Pop());
                }

                if (node.M == 0)
                {
                    PatchJump(_intStack.Pop(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
            {
                int mappedCapnum = MapCapnum(node.M);
                if (_capPositions[mappedCapnum] == default)             // Note only the first one in the case of a branch reset group
                {
                    _capPositions[mappedCapnum] = _emitted.Length;      // Note that this capture group starts here
                }
                Emit(RegexCode.Setmark);
                break;
            }

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node.M), MapCapnum(node.N));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                _intStack.Append(_emitted.Length);
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(_intStack.Pop(), _emitted.Length);
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node.NType | bits, node.Ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node.M > 0)
                {
                    Emit(((node.NType == RegexNode.Oneloop || node.NType == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.NType | bits, node.Ch, node.N == int.MaxValue ?
                         int.MaxValue : node.N - node.M);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node.M > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node.Str), node.M);
                }
                if (node.N > node.M)
                {
                    Emit(node.NType | bits, StringCode(node.Str),
                         (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M);
                }
                break;

            case RegexNode.Multi:
                Emit(node.NType | bits, StringCode(node.Str));
                break;

            case RegexNode.Set:
                Emit(node.NType | bits, StringCode(node.Str));
                break;

            case RegexNode.Ref:
                Emit(node.NType | bits, MapCapnum(node.M));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node.NType);
                break;

            case RegexNode.ResetMatchStart:
                _resetMatchStartFound = true;
                Emit(node.NType);
                break;

            case RegexNode.CallSubroutine:
                Emit(RegexCode.CallSubroutine, MapCapnum(node.M));
                break;

            case RegexNode.BacktrackingVerb:
                Emit(node.M);
                break;

            default:
                throw new ArgumentException(string.Format(SR.UnexpectedOpcode, nodetype.ToString()));
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node._children.Count - 1)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node._children.Count - 1)
                {
                    int LBPos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, CurPos());
                }
                else
                {
                    int I;
                    for (I = 0; I < curIndex; I++)
                    {
                        PatchJump(PopInt(), CurPos());
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node._m));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (curIndex)
                {
                case 0:
                {
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Forejump);
                    if (node._children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node._children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node._n < int.MaxValue || node._m > 1)
                {
                    Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                }
                else
                {
                    Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node._m == 0)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                }
                PushInt(CurPos());
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
            {
                int StartJumpPos = CurPos();
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node._n < int.MaxValue || node._m > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, PopInt());
                }

                if (node._m == 0)
                {
                    PatchJump(PopInt(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                _capPositions[MapCapnum(node._m)] = _curpos;        // Note that this capture group starts here
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                PushInt(CurPos());
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(PopInt(), CurPos());
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node._type | bits, node._ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node._m > 0)
                {
                    Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, node._ch, node._n == int.MaxValue ?
                         int.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node._m > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, StringCode(node._str),
                         (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Multi:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Set:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Ref:
                Emit(node._type | bits, MapCapnum(node._m));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node._type);
                break;

            case RegexNode.ResetMatchStart:
                _resetMatchStartFound = true;
                Emit(node._type);
                break;

            case RegexNode.CallSubroutine:
                Emit(RegexCode.CallSubroutine, MapCapnum(node._m));
                break;

            default:
                throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture)));
            }
        }