Beispiel #1
0
        /*
         * This is the one of the only two functions that should be called from outside.
         * It takes a RegexTree and computes the set of chars that can start it.
         */
        internal static RegexPrefix FirstChars(RegexTree t)
        {
            RegexFCD s = new RegexFCD();
            RegexFC fc = s.RegexFCFromRegexTree(t);

            if (fc == null || fc._nullable)
                return null;

            CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive());
        }
Beispiel #2
0
        /// <summary>
        /// This is the only function that should be called from outside.
        /// It takes a RegexTree and creates a corresponding RegexCode.
        /// </summary>
        internal static RegexCode Write(RegexTree t)
        {
            RegexWriter w = new RegexWriter();
            RegexCode retval = w.RegexCodeFromRegexTree(t);
#if DEBUG
            if (t.Debug)
            {
                t.Dump();
                retval.Dump();
            }
#endif
            return retval;
        }
Beispiel #3
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        ///
        /// It runs two passes, first to count the size of the generated
        /// code, and second to generate the code.
        ///
        /// We should time it against the alternative, which is
        /// to just generate the code and grow the array as we go.
        /// </summary>
        private RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            RegexNode       curNode;
            int             curChild;
            int             capsize;
            RegexPrefix     fcPrefix;
            RegexPrefix     prefix;
            int             anchors;
            RegexBoyerMoore bmPrefix;
            bool            rtl;

            // construct sparse capnum mapping if some numbers are unused

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize = tree._captop;
                _caps   = null;
            }
            else
            {
                capsize = tree._capnumlist.Length;
                _caps   = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    _caps[tree._capnumlist[i]] = i;
                }
            }

            _counting = true;

            for (; ;)
            {
                if (!_counting)
                {
                    _emitted = new int[_count];
                }

                curNode  = tree._root;
                curChild = 0;

                Emit(RegexCode.Lazybranch, 0);

                for (; ;)
                {
                    if (curNode._children == null)
                    {
                        EmitFragment(curNode._type, curNode, 0);
                    }
                    else if (curChild < curNode._children.Count)
                    {
                        EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                        curNode = curNode._children[curChild];
                        PushInt(curChild);
                        curChild = 0;
                        continue;
                    }

                    if (EmptyStack())
                    {
                        break;
                    }

                    curChild = PopInt();
                    curNode  = curNode._next;

                    EmitFragment(curNode._type | AfterChild, curNode, curChild);
                    curChild++;
                }

                PatchJump(0, CurPos());
                Emit(RegexCode.Stop);

                if (!_counting)
                {
                    break;
                }

                _counting = false;
            }

            fcPrefix = RegexFCD.FirstChars(tree);

            prefix = RegexFCD.Prefix(tree);
            rtl    = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            anchors = RegexFCD.Anchors(tree);

            return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Beispiel #4
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int       nextChild  = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._m > 0)
                    {
                        string pref = string.Empty.PadRight(curNode._m, curNode._ch);
                        return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)));
                    }
                    else
                    {
                        return(RegexPrefix.Empty);
                    }

                case RegexNode.One:
                    return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Multi:
                    return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)));

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
                case RegexNode.ECMABoundary:
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                default:
                    return(RegexPrefix.Empty);
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(RegexPrefix.Empty);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
Beispiel #5
0
        /*
         * The main FC computation. It does a shortcutted depth-first walk
         * through the tree and calls CalculateFC to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        private RegexFC RegexFCFromRegexTree(RegexTree tree)
        {
            RegexNode curNode;
            int       curChild;

            curNode  = tree._root;
            curChild = 0;

            for (; ;)
            {
                if (curNode._children == null)
                {
                    // This is a leaf node
                    CalculateFC(curNode._type, curNode, 0);
                }
                else if (curChild < curNode._children.Count && !_skipAllChildren)
                {
                    // This is an interior node, and we have more children to analyze
                    CalculateFC(curNode._type | BeforeChild, curNode, curChild);

                    if (!_skipchild)
                    {
                        curNode = curNode._children[curChild];
                        // this stack is how we get a depth first walk of the tree.
                        PushInt(curChild);
                        curChild = 0;
                    }
                    else
                    {
                        curChild++;
                        _skipchild = false;
                    }
                    continue;
                }

                // This is an interior node where we've finished analyzing all the children, or
                // the end of a leaf node.
                _skipAllChildren = false;

                if (IntIsEmpty())
                {
                    break;
                }

                curChild = PopInt();
                curNode  = curNode._next;

                CalculateFC(curNode._type | AfterChild, curNode, curChild);
                if (_failed)
                {
                    return(null);
                }

                curChild++;
            }

            if (FCIsEmpty())
            {
                return(null);
            }

            return(PopFC());
        }
Beispiel #6
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading substring if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix Prefix(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int nextChild = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0)
                        {
                            concatNode = curNode;
                            nextChild = 0;
                        }
                        break;

                    case RegexNode.Greedy:
                    case RegexNode.Capture:
                        curNode = curNode.Child(0);
                        concatNode = null;
                        continue;

                    case RegexNode.Oneloop:
                    case RegexNode.Onelazy:
                        if (curNode._m > 0)
                        {
                            string pref = string.Empty.PadRight(curNode._m, curNode._ch);
                            return new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase));
                        }
                        else
                            return RegexPrefix.Empty;

                    case RegexNode.One:
                        return new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase));

                    case RegexNode.Multi:
                        return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase));

                    case RegexNode.Bol:
                    case RegexNode.Eol:
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary:
                    case RegexNode.Beginning:
                    case RegexNode.Start:
                    case RegexNode.EndZ:
                    case RegexNode.End:
                    case RegexNode.Empty:
                    case RegexNode.Require:
                    case RegexNode.Prevent:
                        break;

                    default:
                        return RegexPrefix.Empty;
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                    return RegexPrefix.Empty;

                curNode = concatNode.Child(nextChild++);
            }
        }
Beispiel #7
0
        /*
         * The main FC computation. It does a shortcutted depth-first walk
         * through the tree and calls CalculateFC to emits code before
         * and after each child of an interior node, and at each leaf.
         */
        private RegexFC RegexFCFromRegexTree(RegexTree tree)
        {
            RegexNode curNode;
            int curChild;

            curNode = tree._root;
            curChild = 0;

            for (; ;)
            {
                if (curNode._children == null)
                {
                    // This is a leaf node
                    CalculateFC(curNode._type, curNode, 0);
                }
                else if (curChild < curNode._children.Count && !_skipAllChildren)
                {
                    // This is an interior node, and we have more children to analyze
                    CalculateFC(curNode._type | BeforeChild, curNode, curChild);

                    if (!_skipchild)
                    {
                        curNode = curNode._children[curChild];
                        // this stack is how we get a depth first walk of the tree.
                        PushInt(curChild);
                        curChild = 0;
                    }
                    else
                    {
                        curChild++;
                        _skipchild = false;
                    }
                    continue;
                }

                // This is an interior node where we've finished analyzing all the children, or
                // the end of a leaf node.
                _skipAllChildren = false;

                if (IntIsEmpty())
                    break;

                curChild = PopInt();
                curNode = curNode._next;

                CalculateFC(curNode._type | AfterChild, curNode, curChild);
                if (_failed)
                    return null;

                curChild++;
            }

            if (FCIsEmpty())
                return null;

            return PopFC();
        }
Beispiel #8
0
        /*
         * Yet another related computation: it takes a RegexTree and computes the
         * leading anchors that it encounters.
         */
        internal static int Anchors(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode = null;
            int nextChild = 0;
            int result = 0;

            curNode = tree._root;

            for (; ;)
            {
                switch (curNode._type)
                {
                    case RegexNode.Concatenate:
                        if (curNode.ChildCount() > 0)
                        {
                            concatNode = curNode;
                            nextChild = 0;
                        }
                        break;

                    case RegexNode.Greedy:
                    case RegexNode.Capture:
                        curNode = curNode.Child(0);
                        concatNode = null;
                        continue;

                    case RegexNode.Bol:
                    case RegexNode.Eol:
                    case RegexNode.Boundary:
                    case RegexNode.ECMABoundary:
                    case RegexNode.Beginning:
                    case RegexNode.Start:
                    case RegexNode.EndZ:
                    case RegexNode.End:
                        return result | AnchorFromType(curNode._type);

                    case RegexNode.Empty:
                    case RegexNode.Require:
                    case RegexNode.Prevent:
                        break;

                    default:
                        return result;
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                    return result;

                curNode = concatNode.Child(nextChild++);
            }
        }
Beispiel #9
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        ///
        /// It runs two passes, first to count the size of the generated
        /// code, and second to generate the code.
        ///
        /// We should time it against the alternative, which is
        /// to just generate the code and grow the array as we go.
        /// </summary>
        private RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            RegexNode curNode;
            int curChild;
            int capsize;
            RegexPrefix fcPrefix;
            RegexPrefix prefix;
            int anchors;
            RegexBoyerMoore bmPrefix;
            bool rtl;

            // construct sparse capnum mapping if some numbers are unused

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize = tree._captop;
                _caps = null;
            }
            else
            {
                capsize = tree._capnumlist.Length;
                _caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                    _caps[tree._capnumlist[i]] = i;
            }

            _counting = true;

            for (; ;)
            {
                if (!_counting)
                    _emitted = new int[_count];

                curNode = tree._root;
                curChild = 0;

                Emit(RegexCode.Lazybranch, 0);

                for (; ;)
                {
                    if (curNode._children == null)
                    {
                        EmitFragment(curNode._type, curNode, 0);
                    }
                    else if (curChild < curNode._children.Count)
                    {
                        EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                        curNode = curNode._children[curChild];
                        PushInt(curChild);
                        curChild = 0;
                        continue;
                    }

                    if (EmptyStack())
                        break;

                    curChild = PopInt();
                    curNode = curNode._next;

                    EmitFragment(curNode._type | AfterChild, curNode, curChild);
                    curChild++;
                }

                PatchJump(0, CurPos());
                Emit(RegexCode.Stop);

                if (!_counting)
                    break;

                _counting = false;
            }

            fcPrefix = RegexFCD.FirstChars(tree);

            prefix = RegexFCD.Prefix(tree);
            rtl = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            if (prefix != null && prefix.Prefix.Length > 0)
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            else
                bmPrefix = null;

            anchors = RegexFCD.Anchors(tree);

            return new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl);
        }