internal void Dump()
        {
            int i;

            Debug.WriteLine("Direction:  " + (_rightToLeft ? "right-to-left" : "left-to-right"));
            Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix)));
            Debug.WriteLine("Prefix:     " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString())));
            Debug.WriteLine("Anchors:    " + RegexFCD.AnchorDescription(_anchors));
            Debug.WriteLine("Scanchars:  " + (_scPrefix == null ? "n/a" : RegexCharClass.SetDescription(_scPrefix.Prefix)));
            Debug.WriteLine("");

            /*
             * if (_bmPrefix != null) {
             *  Debug.WriteLine("BoyerMoore:");
             *  Debug.WriteLine(_bmPrefix.Dump("    "));
             * }
             */
            for (i = 0; i < _codes.Length;)
            {
                Debug.WriteLine(OpcodeDescription(i));
                i += OpcodeSize(_codes[i]);
            }

            Debug.WriteLine("");
        }
        /*
         * This is the one of the only two functions that should be called from outside.
         * It takes a RegexTree and computes the set of chars that can start it.
         */
        internal static RegexPrefix FirstChars(RegexTree t) {
            RegexFCD s = new RegexFCD();
            RegexFC fc = s.RegexFCFromRegexTree(t);

            if (fc == null || fc._nullable)
                return null;
            
            CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive());
        }
Exemple #3
0
        internal static RegexPrefix FirstChars(RegexTree t)
        {
            RegexFC xfc = new RegexFCD().RegexFCFromRegexTree(t);

            if ((xfc == null) || xfc._nullable)
            {
                return(null);
            }
            CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            return(new RegexPrefix(xfc.GetFirstChars(culture), xfc.IsCaseInsensitive()));
        }
Exemple #4
0
        /// <summary>
        /// Takes a RegexTree and computes the set of chars that can start it.
        /// </summary>
        public static RegexPrefix FirstChars(RegexTree t)
        {
            RegexFCD s  = new RegexFCD();
            RegexFC  fc = s.RegexFCFromRegexTree(t);

            if (fc == null || fc._nullable)
            {
                return(null);
            }

            CultureInfo culture = ((t.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            return(new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive()));
        }
Exemple #5
0
        /// <summary>
        /// This is the one of the only two functions that should be called from outside.
        /// It takes a RegexTree and computes the set of chars that can start it.
        /// </summary>
        public static RegexPrefix?FirstChars(RegexTree t)
        {
            // Create/rent buffers
            Span <int> intSpan = stackalloc int[StackBufferSize];

            RegexFCD s  = new RegexFCD(intSpan);
            RegexFC  fc = s.RegexFCFromRegexTree(t);

            s.Dispose();

            if (fc == null || fc._nullable)
            {
                return(null);
            }

            CultureInfo culture = ((t.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            return(new RegexPrefix(fc.GetFirstChars(culture), fc.CaseInsensitive));
        }
Exemple #6
0
        public void Dump()
        {
            int i;

            Debug.WriteLine("Direction:  " + (RightToLeft ? "right-to-left" : "left-to-right"));
            Debug.WriteLine("Firstchars: " + (FCPrefix == null ? "n/a" : RegexCharClass.SetDescription(FCPrefix.GetValueOrDefault().Prefix)));
            Debug.WriteLine("Prefix:     " + (BMPrefix == null ? "n/a" : Regex.Escape(BMPrefix.ToString())));
            Debug.WriteLine("Anchors:    " + RegexFCD.AnchorDescription(Anchors));
            Debug.WriteLine("");
            if (BMPrefix != null)
            {
                Debug.WriteLine("BoyerMoore:");
                Debug.WriteLine(BMPrefix.Dump("    "));
            }
            for (i = 0; i < Codes.Length;)
            {
                Debug.WriteLine(OpcodeDescription(i));
                i += OpcodeSize(Codes[i]);
            }

            Debug.WriteLine("");
        }
Exemple #7
0
        /*
         * The top level RegexCode generator. It does a depth-first walk
         * through the tree and calls EmitFragment to emits code before
         * and after each child of an interior node, and at each leaf.
         *
         * It runs two passes, first to count the size of the generated
         * code, and second to generate the code.
         *
         * CONSIDER: we need to time it against the alternative, which is
         * to just generate the code and grow the array as we go.
         */
        internal RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            RegexNode       curNode;
            int             curChild;
            int             capsize;
            RegexPrefix     fcPrefix;
            RegexPrefix     scPrefix;
            RegexPrefix     prefix;
            int             anchors;
            RegexBoyerMoore bmPrefix;
            bool            rtl;

            // construct sparse capnum mapping if some numbers are unused

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize = tree._captop;
                _caps   = null;
            }
            else
            {
                capsize = tree._capnumlist.Length;
                _caps   = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    _caps[tree._capnumlist[i]] = i;
                }
            }

            _counting = true;

            for (;;)
            {
                if (!_counting)
                {
                    _emitted = new int[_count];
                }

                curNode  = tree._root;
                curChild = 0;

                Emit(RegexCode.Lazybranch, 0);

                for (;;)
                {
                    if (curNode._children == null)
                    {
                        EmitFragment(curNode._type, curNode, 0);
                    }
                    else if (curChild < curNode._children.Count)
                    {
                        EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                        curNode = (RegexNode)curNode._children[curChild];
                        PushInt(curChild);
                        curChild = 0;
                        continue;
                    }

                    if (EmptyStack())
                    {
                        break;
                    }

                    curChild = PopInt();
                    curNode  = curNode._next;

                    EmitFragment(curNode._type | AfterChild, curNode, curChild);
                    curChild++;
                }

                PatchJump(0, CurPos());
                Emit(RegexCode.Stop);

                if (!_counting)
                {
                    break;
                }

                _counting = false;
            }

            // if the set of possible first chars is very large,
            // don't bother scanning for it (common case: . == [^\n])

            fcPrefix = RegexFCD.FirstChars(tree);

            // REVIEW : ChrisAn/DavidGut, 11/21/2000 - Huh... this code used to
            //        : read "> 0XFFF", note the CAPITAL X... everything is golden,
            //        : except that this really evaluates to 0 in the C# compiler.
            //        :
            //        : However! begining in CSC 9055 0XFFF will attempted to be
            //        : evaluated as a float, causing a compiler error. So switching
            //        : the constant to "0xFFF", note the lowercase x, causes
            //        : everything to fail.
            //        :
            //        : What is this code really supposed to do???!
            //
            if (fcPrefix != null && RegexCharClass.SetSize(fcPrefix.Prefix) > 0)
            {
                fcPrefix = null;
            }

            // REVIEW: is this even used anywhere? Can we use it somehow?
            scPrefix = null; //RegexFCD.ScanChars(tree);
            prefix   = RegexFCD.Prefix(tree);
            rtl      = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            anchors = RegexFCD.Anchors(tree);

            return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, scPrefix, anchors, rtl));
        }
Exemple #8
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            RegexNode?curNode  = tree.Root;
            int       curChild = 0;

            Emit(RegexCode.Lazybranch, 0);

            while (true)
            {
                if (curNode.Children == null)
                {
                    EmitFragment(curNode.NType, curNode, 0);
                }
                else if (curChild < curNode.Children.Count)
                {
                    EmitFragment(curNode.NType | BeforeChild, curNode, curChild);

                    curNode = curNode.Children[curChild];
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next;

                EmitFragment(curNode !.NType | AfterChild, curNode, curChild);
                curChild++;
            }

            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);

            RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree.Options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore?bmPrefix;

            if (prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = _emitted.AsSpan().ToArray();

            return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Exemple #9
0
        /*
         * The top level RegexCode generator. It does a depth-first walk
         * through the tree and calls EmitFragment to emits code before
         * and after each child of an interior node, and at each leaf.
         *
         * It runs two passes, first to count the size of the generated
         * code, and second to generate the code.
         *
         * We should time it against the alternative, which is
         * to just generate the code and grow the array as we go.
         */
        internal RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            RegexNode       curNode;
            int             curChild;
            int             capsize;
            RegexPrefix     fcPrefix;
            RegexPrefix     prefix;
            int             anchors;
            RegexBoyerMoore bmPrefix;
            bool            rtl;

            // construct sparse capnum mapping if some numbers are unused

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize = tree._captop;
                _caps   = null;
            }
            else
            {
                capsize = tree._capnumlist.Length;
                _caps   = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    _caps[tree._capnumlist[i]] = i;
                }
            }

            _counting = true;

            for (; ;)
            {
                if (!_counting)
                {
                    _emitted = new int[_count];
                }

                curNode  = tree._root;
                curChild = 0;

                Emit(RegexCode.Lazybranch, 0);

                for (; ;)
                {
                    if (curNode._children == null)
                    {
                        EmitFragment(curNode._type, curNode, 0);
                    }
                    else if (curChild < curNode._children.Count)
                    {
                        EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                        curNode = (RegexNode)curNode._children[curChild];
                        PushInt(curChild);
                        curChild = 0;
                        continue;
                    }

                    if (EmptyStack())
                    {
                        break;
                    }

                    curChild = PopInt();
                    curNode  = curNode._next;

                    EmitFragment(curNode._type | AfterChild, curNode, curChild);
                    curChild++;
                }

                PatchJump(0, CurPos());
                Emit(RegexCode.Stop);

                if (!_counting)
                {
                    break;
                }

                _counting = false;
            }

            fcPrefix = RegexFCD.FirstChars(tree);

            prefix = RegexFCD.Prefix(tree);
            rtl    = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            anchors = RegexFCD.Anchors(tree);

            return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Exemple #10
0
        internal RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            int             length;
            RegexBoyerMoore moore;

            if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length))
            {
                length     = tree._captop;
                this._caps = null;
            }
            else
            {
                length     = tree._capnumlist.Length;
                this._caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    this._caps[tree._capnumlist[i]] = i;
                }
            }
            this._counting = true;
Label_0076:
            if (!this._counting)
            {
                this._emitted = new int[this._count];
            }
            RegexNode node     = tree._root;
            int       curIndex = 0;

            this.Emit(0x17, 0);
Label_00A1:
            if (node._children == null)
            {
                this.EmitFragment(node._type, node, 0);
            }
            else if (curIndex < node._children.Count)
            {
                this.EmitFragment(node._type | 0x40, node, curIndex);
                node = (RegexNode)node._children[curIndex];
                this.PushInt(curIndex);
                curIndex = 0;
                goto Label_00A1;
            }
            if (!this.EmptyStack())
            {
                curIndex = this.PopInt();
                node     = node._next;
                this.EmitFragment(node._type | 0x80, node, curIndex);
                curIndex++;
                goto Label_00A1;
            }
            this.PatchJump(0, this.CurPos());
            this.Emit(40);
            if (this._counting)
            {
                this._counting = false;
                goto Label_0076;
            }
            RegexPrefix fcPrefix = RegexFCD.FirstChars(tree);

            if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0))
            {
                fcPrefix = null;
            }
            RegexPrefix scPrefix    = null;
            RegexPrefix prefix3     = RegexFCD.Prefix(tree);
            bool        rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None;
            CultureInfo culture     = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if ((prefix3 != null) && (prefix3.Prefix.Length > 0))
            {
                moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture);
            }
            else
            {
                moore = null;
            }
            return(new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft));
        }
Exemple #11
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            Span <int>  emittedSpan  = stackalloc int[EmittedSize];
            Span <int>  intStackSpan = stackalloc int[IntStackSize];
            RegexWriter writer       = new RegexWriter(emittedSpan, intStackSpan);

            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize      = tree._captop;
                writer._caps = null;
            }
            else
            {
                capsize      = tree._capnumlist.Length;
                writer._caps = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    writer._caps[tree._capnumlist[i]] = i;
                }
            }

            RegexNode curNode  = tree._root;
            int       curChild = 0;

            writer.Emit(RegexCode.Lazybranch, 0);

            for (; ;)
            {
                if (curNode._children == null)
                {
                    writer.EmitFragment(curNode._type, curNode, 0);
                }
                else if (curChild < curNode._children.Count)
                {
                    writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                    curNode = curNode._children[curChild];
                    writer._intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (writer._intStack.Length == 0)
                {
                    break;
                }

                curChild = writer._intStack.Pop();
                curNode  = curNode._next;

                writer.EmitFragment(curNode._type | AfterChild, curNode, curChild);
                curChild++;
            }

            writer.PatchJump(0, writer._emitted.Length);
            writer.Emit(RegexCode.Stop);

            RegexPrefix fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore bmPrefix;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = writer._emitted.AsReadOnlySpan().ToArray();

            // Cleaning up and returning the borrowed arrays
            writer._emitted.Dispose();
            writer._intStack.Dispose();

            return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Exemple #12
0
 internal static RegexPrefix FirstChars(RegexTree t)
 {
     RegexFC xfc = new RegexFCD().RegexFCFromRegexTree(t);
     if (xfc._nullable)
     {
         return null;
     }
     CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
     return new RegexPrefix(xfc.GetFirstChars(culture), xfc.IsCaseInsensitive());
 }
Exemple #13
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?bmPrefix = null;
            RegexPrefix?    fcPrefix = null;
            RegexPrefix     prefix   = RegexFCD.Prefix(tree);

            if (prefix.Prefix.Length > 1 && prefix.Prefix.Length <= RegexBoyerMoore.MaxLimit) // if it's <= 1 || > MaxLimit, perf is better using fcPrefix
            {
                // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression.
                CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                // If we didn't find such a string, try to compute the characters set that might begin the string.
                fcPrefix = RegexFCD.FirstChars(tree);
            }

            // Compute any anchors starting the expression.
            int anchors = RegexFCD.Anchors(tree);

            // Convert the string table into an ordered string array/
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }