Esempio n. 1
0
        /// <summary>
        /// The top level RegexInterpreterCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        private RegexInterpreterCode EmitCode()
        {
            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexOpcode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = _tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Kind, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Kind | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Parent !;

                EmitFragment(curNode.Kind | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexOpcode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            // Convert the string table into an ordered string array.
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexInterpreterCode(_tree.FindOptimizations, _tree.Options, emitted, strings, _trackCount));
        }
Esempio n. 2
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emits code before
        /// and after each child of an interior node, and at each leaf.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // construct sparse capnum mapping if some numbers are unused
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            RegexNode?curNode  = tree.Root;
            int       curChild = 0;

            Emit(RegexCode.Lazybranch, 0);

            while (true)
            {
                if (curNode.Children == null)
                {
                    EmitFragment(curNode.NType, curNode, 0);
                }
                else if (curChild < curNode.Children.Count)
                {
                    EmitFragment(curNode.NType | BeforeChild, curNode, curChild);

                    curNode = curNode.Children[curChild];
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next;

                EmitFragment(curNode !.NType | AfterChild, curNode, curChild);
                curChild++;
            }

            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);

            RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree);
            RegexPrefix prefix   = RegexFCD.Prefix(tree);
            bool        rtl      = ((tree.Options & RegexOptions.RightToLeft) != 0);

            CultureInfo     culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
            RegexBoyerMoore?bmPrefix;

            if (prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            int anchors = RegexFCD.Anchors(tree);

            int[] emitted = _emitted.AsSpan().ToArray();

            return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Esempio n. 3
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexOpcode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Kind, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Kind | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Parent !;

                EmitFragment(curNode.Kind | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexOpcode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            // Convert the string table into an ordered string array.
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize));
        }
Esempio n. 4
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl      = (tree.Options & RegexOptions.RightToLeft) != 0;
            bool compiled = (tree.Options & RegexOptions.Compiled) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?boyerMoorePrefix = null;

            (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
Esempio n. 5
0
        /// <summary>
        /// The top level RegexCode generator. It does a depth-first walk
        /// through the tree and calls EmitFragment to emit code before
        /// and after each child of an interior node and at each leaf.
        /// It also computes various information about the tree, such as
        /// prefix data to help with optimizations.
        /// </summary>
        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            // Construct sparse capnum mapping if some numbers are unused.
            int capsize;

            if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length)
            {
                capsize = tree.CapTop;
                _caps   = null;
            }
            else
            {
                capsize = tree.CapNumList.Length;
                _caps   = tree.Caps;
                for (int i = 0; i < tree.CapNumList.Length; i++)
                {
                    _caps[tree.CapNumList[i]] = i;
                }
            }

            // Every written code begins with a lazy branch.  This will be back-patched
            // to point to the ending Stop after the whole expression has been written.
            Emit(RegexCode.Lazybranch, 0);

            // Emit every node.
            RegexNode curNode  = tree.Root;
            int       curChild = 0;

            while (true)
            {
                int curNodeChildCount = curNode.ChildCount();
                if (curNodeChildCount == 0)
                {
                    EmitFragment(curNode.Type, curNode, 0);
                }
                else if (curChild < curNodeChildCount)
                {
                    EmitFragment(curNode.Type | BeforeChild, curNode, curChild);

                    curNode = curNode.Child(curChild);
                    _intStack.Append(curChild);
                    curChild = 0;
                    continue;
                }

                if (_intStack.Length == 0)
                {
                    break;
                }

                curChild = _intStack.Pop();
                curNode  = curNode.Next !;

                EmitFragment(curNode.Type | AfterChild, curNode, curChild);
                curChild++;
            }

            // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array.
            PatchJump(0, _emitted.Length);
            Emit(RegexCode.Stop);
            int[] emitted = _emitted.AsSpan().ToArray();

            bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0;

            // Compute prefixes to help optimize FindFirstChar.
            RegexBoyerMoore?bmPrefix = null;
            RegexPrefix?    fcPrefix = null;
            RegexPrefix     prefix   = RegexFCD.Prefix(tree);

            if (prefix.Prefix.Length > 1 && prefix.Prefix.Length <= RegexBoyerMoore.MaxLimit) // if it's <= 1 || > MaxLimit, perf is better using fcPrefix
            {
                // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression.
                CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                // If we didn't find such a string, try to compute the characters set that might begin the string.
                fcPrefix = RegexFCD.FirstChars(tree);
            }

            // Compute any anchors starting the expression.
            int anchors = RegexFCD.Anchors(tree);

            // Convert the string table into an ordered string array/
            var strings = new string[_stringTable.Count];

            foreach (KeyValuePair <string, int> stringEntry in _stringTable)
            {
                strings[stringEntry.Value] = stringEntry.Key;
            }

            // Return all that in a RegexCode object.
            return(new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl));
        }
Esempio n. 6
0
 private int PopInt()
 {
     return(_intStack.Pop());
 }