internal void AddChild(RegexNode newChild)
        {
            RegexNode reducedChild;

            if (_children == null)
            {
                _children = new List <RegexNode>(4);
            }

            reducedChild = newChild.Reduce();

            _children.Add(reducedChild);
            reducedChild._next = this;
        }
        /// <summary>
        /// The main RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        private void EmitFragment(int nodetype, RegexNode node, int curIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (curIndex < node._children.Count - 1)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            {
                if (curIndex < node._children.Count - 1)
                {
                    int LBPos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, CurPos());
                }
                else
                {
                    int I;
                    for (I = 0; I < curIndex; I++)
                    {
                        PatchJump(PopInt(), CurPos());
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node._m));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (curIndex)
                {
                case 0:
                {
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Forejump);
                    if (node._children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (curIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node._children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node._n < int.MaxValue || node._m > 1)
                {
                    Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                }
                else
                {
                    Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node._m == 0)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                }
                PushInt(CurPos());
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
            {
                int StartJumpPos = CurPos();
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node._n < int.MaxValue || node._m > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, PopInt());
                }

                if (node._m == 0)
                {
                    PatchJump(PopInt(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                PushInt(CurPos());
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(PopInt(), CurPos());
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node._type | bits, node._ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node._m > 0)
                {
                    Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, node._ch, node._n == int.MaxValue ?
                         int.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node._m > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, StringCode(node._str),
                         (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Multi:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Set:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Ref:
                Emit(node._type | bits, MapCapnum(node._m));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node._type);
                break;

            default:
                throw new ArgumentException("");
            }
        }
Example #3
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._m > 0)
                    {
                        string pref = string.Empty.PadRight(curNode._m, curNode._ch);
                        return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Example #4
0
        /*
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
        {
            bool ci  = false;
            bool rtl = false;

            if (NodeType <= RegexNode.Ref)
            {
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    ci = true;
                }
                if ((node._options & RegexOptions.RightToLeft) != 0)
                {
                    rtl = true;
                }
            }

            switch (NodeType)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Alternate | BeforeChild:
            case RegexNode.Testref | BeforeChild:
            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:
                break;

            case RegexNode.Testgroup | BeforeChild:
                if (CurIndex == 0)
                {
                    SkipChild();
                }
                break;

            case RegexNode.Empty:
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Concatenate | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, true);
                }

                if (!TopFC()._nullable)
                {
                    _skipAllChildren = true;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                if (CurIndex > 1)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            case RegexNode.Testref | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
                if (node._m == 0)
                {
                    TopFC()._nullable = true;
                }
                break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
            case RegexNode.Capture | BeforeChild:
            case RegexNode.Capture | AfterChild:
            case RegexNode.Greedy | BeforeChild:
            case RegexNode.Greedy | AfterChild:
                break;

            case RegexNode.Require | BeforeChild:
            case RegexNode.Prevent | BeforeChild:
                SkipChild();
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Require | AfterChild:
            case RegexNode.Prevent | AfterChild:
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci));
                break;

            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
                PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                break;

            case RegexNode.Multi:
                if (node._str.Length == 0)
                {
                    PushFC(new RegexFC(true));
                }
                else if (!rtl)
                {
                    PushFC(new RegexFC(node._str[0], false, false, ci));
                }
                else
                {
                    PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                }
                break;

            case RegexNode.Set:
                PushFC(new RegexFC(node._str, false, ci));
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                PushFC(new RegexFC(node._str, node._m == 0, ci));
                break;

            case RegexNode.Ref:
                PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                PushFC(new RegexFC(true));
                break;

            default:
                throw new ArgumentException("");
            }
        }