Example #1
0
        /*
         * Since RegexReplacement shares the same parser as Regex,
         * the constructor takes a RegexNode which is a concatenation
         * of constant strings and backreferences.
         */
        internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps)
        {
            StringBuilder sb;
            ArrayList     strings;
            ArrayList     rules;
            int           slot;

            _rep = rep;

            if (concat.Type() != RegexNode.Concatenate)
            {
                throw new ArgumentException(SR.GetString(SR.ReplacementError));
            }

            sb      = new StringBuilder();
            strings = new ArrayList();
            rules   = new ArrayList();

            for (int i = 0; i < concat.ChildCount(); i++)
            {
                RegexNode child = concat.Child(i);

                switch (child.Type())
                {
                case RegexNode.Multi:
                    sb.Append(child._str);
                    break;

                case RegexNode.One:
                    sb.Append(child._ch);
                    break;

                case RegexNode.Ref:
                    if (sb.Length > 0)
                    {
                        rules.Add(strings.Count);
                        strings.Add(sb.ToString());
                        sb.Length = 0;
                    }
                    slot = child._m;

                    if (_caps != null && slot >= 0)
                    {
                        slot = (int)_caps[slot];
                    }

                    rules.Add(-Specials - 1 - slot);
                    break;

                default:
                    throw new ArgumentException(SR.GetString(SR.ReplacementError));
                }
            }

            if (sb.Length > 0)
            {
                rules.Add(strings.Count);
                strings.Add(sb.ToString());
            }

            _strings = strings;
            _rules   = rules;
        }
Example #2
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (;;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._m > 0)
                    {
                        string pref = String.Empty.PadRight(curNode._m, curNode._ch);
                        return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Example #3
0
        /*
         * The main RegexCode generator. It does a depth-first walk
         * through the tree and calls EmitFragment to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        internal void EmitFragment(int nodetype, RegexNode node, int CurIndex)
        {
            int bits = 0;

            if (nodetype <= RegexNode.Ref)
            {
                if (node.UseOptionR())
                {
                    bits |= RegexCode.Rtl;
                }
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    bits |= RegexCode.Ci;
                }
            }

            switch (nodetype)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Concatenate | AfterChild:
            case RegexNode.Empty:
                break;

            case RegexNode.Alternate | BeforeChild:
                if (CurIndex < node._children.Count - 1)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                }
                break;

            case RegexNode.Alternate | AfterChild: {
                if (CurIndex < node._children.Count - 1)
                {
                    int LBPos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(LBPos, CurPos());
                }
                else
                {
                    int I;
                    for (I = 0; I < CurIndex; I++)
                    {
                        PatchJump(PopInt(), CurPos());
                    }
                }
                break;
            }

            case RegexNode.Testref | BeforeChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    Emit(RegexCode.Testref, MapCapnum(node._m));
                    Emit(RegexCode.Forejump);
                    break;
                }
                break;

            case RegexNode.Testref | AfterChild:
                switch (CurIndex)
                {
                case 0: {
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Forejump);
                    if (node._children.Count > 1)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 1;
                }

                case 1:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Testgroup | BeforeChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Setjump);
                    Emit(RegexCode.Setmark);
                    PushInt(CurPos());
                    Emit(RegexCode.Lazybranch, 0);
                    break;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                switch (CurIndex)
                {
                case 0:
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);
                    break;

                case 1:
                    int Branchpos = PopInt();
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                    PatchJump(Branchpos, CurPos());
                    Emit(RegexCode.Getmark);
                    Emit(RegexCode.Forejump);

                    if (node._children.Count > 2)
                    {
                        break;
                    }
                    // else fallthrough
                    goto case 2;

                case 2:
                    PatchJump(PopInt(), CurPos());
                    break;
                }
                break;

            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:

                if (node._n < Int32.MaxValue || node._m > 1)
                {
                    Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m);
                }
                else
                {
                    Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark);
                }

                if (node._m == 0)
                {
                    PushInt(CurPos());
                    Emit(RegexCode.Goto, 0);
                }
                PushInt(CurPos());
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild: {
                int StartJumpPos = CurPos();
                int Lazy         = (nodetype - (RegexNode.Loop | AfterChild));

                if (node._n < Int32.MaxValue || node._m > 1)
                {
                    Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == Int32.MaxValue ? Int32.MaxValue : node._n - node._m);
                }
                else
                {
                    Emit(RegexCode.Branchmark + Lazy, PopInt());
                }

                if (node._m == 0)
                {
                    PatchJump(PopInt(), StartJumpPos);
                }
            }
            break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
                break;

            case RegexNode.Capture | BeforeChild:
                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Capture | AfterChild:
                Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n));
                break;

            case RegexNode.Require | BeforeChild:
                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Setjump);


                Emit(RegexCode.Setmark);
                break;

            case RegexNode.Require | AfterChild:
                Emit(RegexCode.Getmark);

                // NOTE: the following line causes lookahead/lookbehind to be
                // NON-BACKTRACKING. It can be commented out with (*)
                Emit(RegexCode.Forejump);

                break;

            case RegexNode.Prevent | BeforeChild:
                Emit(RegexCode.Setjump);
                PushInt(CurPos());
                Emit(RegexCode.Lazybranch, 0);
                break;

            case RegexNode.Prevent | AfterChild:
                Emit(RegexCode.Backjump);
                PatchJump(PopInt(), CurPos());
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.Greedy | BeforeChild:
                Emit(RegexCode.Setjump);
                break;

            case RegexNode.Greedy | AfterChild:
                Emit(RegexCode.Forejump);
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                Emit(node._type | bits, (int)node._ch);
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                if (node._m > 0)
                {
                    Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ?
                          RegexCode.Onerep : RegexCode.Notonerep) | bits, (int)node._ch, node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, (int)node._ch, node._n == Int32.MaxValue ?
                         Int32.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                if (node._m > 0)
                {
                    Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m);
                }
                if (node._n > node._m)
                {
                    Emit(node._type | bits, StringCode(node._str),
                         (node._n == Int32.MaxValue) ? Int32.MaxValue : node._n - node._m);
                }
                break;

            case RegexNode.Multi:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Set:
                Emit(node._type | bits, StringCode(node._str));
                break;

            case RegexNode.Ref:
                Emit(node._type | bits, MapCapnum(node._m));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                Emit(node._type);
                break;

            default:
                throw MakeException(SR.GetString(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture)));
            }
        }
Example #4
0
        /*
         * FC computation and shortcut cases for each node type
         */
        private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
        {
            bool ci  = false;
            bool rtl = false;

            if (NodeType <= RegexNode.Ref)
            {
                if ((node._options & RegexOptions.IgnoreCase) != 0)
                {
                    ci = true;
                }
                if ((node._options & RegexOptions.RightToLeft) != 0)
                {
                    rtl = true;
                }
            }

            switch (NodeType)
            {
            case RegexNode.Concatenate | BeforeChild:
            case RegexNode.Alternate | BeforeChild:
            case RegexNode.Testref | BeforeChild:
            case RegexNode.Loop | BeforeChild:
            case RegexNode.Lazyloop | BeforeChild:
                break;

            case RegexNode.Testgroup | BeforeChild:
                if (CurIndex == 0)
                {
                    SkipChild();
                }
                break;

            case RegexNode.Empty:
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Concatenate | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, true);
                }

                if (!TopFC()._nullable)
                {
                    _skipAllChildren = true;
                }
                break;

            case RegexNode.Testgroup | AfterChild:
                if (CurIndex > 1)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Alternate | AfterChild:
            case RegexNode.Testref | AfterChild:
                if (CurIndex != 0)
                {
                    RegexFC child = PopFC();
                    RegexFC cumul = TopFC();

                    _failed = !cumul.AddFC(child, false);
                }
                break;

            case RegexNode.Loop | AfterChild:
            case RegexNode.Lazyloop | AfterChild:
                if (node._m == 0)
                {
                    TopFC()._nullable = true;
                }
                break;

            case RegexNode.Group | BeforeChild:
            case RegexNode.Group | AfterChild:
            case RegexNode.Capture | BeforeChild:
            case RegexNode.Capture | AfterChild:
            case RegexNode.Greedy | BeforeChild:
            case RegexNode.Greedy | AfterChild:
                break;

            case RegexNode.Require | BeforeChild:
            case RegexNode.Prevent | BeforeChild:
                SkipChild();
                PushFC(new RegexFC(true));
                break;

            case RegexNode.Require | AfterChild:
            case RegexNode.Prevent | AfterChild:
                break;

            case RegexNode.One:
            case RegexNode.Notone:
                PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci));
                break;

            case RegexNode.Oneloop:
            case RegexNode.Onelazy:
                PushFC(new RegexFC(node._ch, false, node._m == 0, ci));
                break;

            case RegexNode.Notoneloop:
            case RegexNode.Notonelazy:
                PushFC(new RegexFC(node._ch, true, node._m == 0, ci));
                break;

            case RegexNode.Multi:
                if (node._str.Length == 0)
                {
                    PushFC(new RegexFC(true));
                }
                else if (!rtl)
                {
                    PushFC(new RegexFC(node._str[0], false, false, ci));
                }
                else
                {
                    PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci));
                }
                break;

            case RegexNode.Set:
                PushFC(new RegexFC(node._str, false, ci));
                break;

            case RegexNode.Setloop:
            case RegexNode.Setlazy:
                PushFC(new RegexFC(node._str, node._m == 0, ci));
                break;

            case RegexNode.Ref:
                PushFC(new RegexFC(RegexCharClass.AnyClass, true, false));
                break;

            case RegexNode.Nothing:
            case RegexNode.Bol:
            case RegexNode.Eol:
            case RegexNode.Boundary:
            case RegexNode.Nonboundary:
            case RegexNode.ECMABoundary:
            case RegexNode.NonECMABoundary:
            case RegexNode.Beginning:
            case RegexNode.Start:
            case RegexNode.EndZ:
            case RegexNode.End:
                PushFC(new RegexFC(true));
                break;

            default:
                throw new ArgumentException(SR.GetString(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
            }
        }