Пример #1
0
        /// <summary>
        /// Basic optimization. Single-letter alternations can be replaced
        /// by faster set specifications, and nested alternations with no
        /// intervening operators can be flattened:
        ///
        /// a|b|c|def|g|h -> [a-c]|def|[gh]
        /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
        /// </summary>
        internal RegexNode ReduceAlternation()
        {
            // Combine adjacent sets/chars

            bool         wasLastSet;
            bool         lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int          i;
            int          j;
            RegexNode    at;
            RegexNode    prev;

            if (_children == null)
            {
                return(new RegexNode(Nothing, _options));
            }

            wasLastSet          = false;
            lastNodeCannotMerge = false;
            optionsLast         = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++)
            {
                at = _children[i];

                if (j < i)
                {
                    _children[j] = at;
                }

                for (; ;)
                {
                    if (at._type == Alternate)
                    {
                        for (int k = 0; k < at._children.Count; k++)
                        {
                            at._children[k]._next = this;
                        }

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One)
                    {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set)
                        {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str))
                            {
                                wasLastSet          = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast         = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge)
                        {
                            wasLastSet          = true;
                            lastNodeCannotMerge = false;
                            optionsLast         = optionsAt;
                            break;
                        }


                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];

                        RegexCharClass prevCharClass;
                        if (prev._type == One)
                        {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else
                        {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }

                        if (at._type == One)
                        {
                            prevCharClass.AddChar(at._ch);
                        }
                        else
                        {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }

                        prev._type = Set;
                        prev._str  = prevCharClass.ToStringClass();
                    }
                    else if (at._type == Nothing)
                    {
                        j--;
                    }
                    else
                    {
                        wasLastSet          = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
            {
                _children.RemoveRange(j, i - j);
            }

            return(StripEnation(Nothing));
        }
Пример #2
0
        /// <summary>
        /// Basic optimization. Single-letter alternations can be replaced
        /// by faster set specifications, and nested alternations with no
        /// intervening operators can be flattened:
        ///
        /// a|b|c|def|g|h -> [a-c]|def|[gh]
        /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
        /// </summary>
        internal RegexNode ReduceAlternation()
        {
            // Combine adjacent sets/chars

            bool wasLastSet;
            bool lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int i;
            int j;
            RegexNode at;
            RegexNode prev;

            if (_children == null)
                return new RegexNode(Nothing, _options);

            wasLastSet = false;
            lastNodeCannotMerge = false;
            optionsLast = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++)
            {
                at = _children[i];

                if (j < i)
                    _children[j] = at;

                for (; ;)
                {
                    if (at._type == Alternate)
                    {
                        for (int k = 0; k < at._children.Count; k++)
                            at._children[k]._next = this;

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One)
                    {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set)
                        {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str))
                            {
                                wasLastSet = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge)
                        {
                            wasLastSet = true;
                            lastNodeCannotMerge = false;
                            optionsLast = optionsAt;
                            break;
                        }


                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];

                        RegexCharClass prevCharClass;
                        if (prev._type == One)
                        {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else
                        {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }

                        if (at._type == One)
                        {
                            prevCharClass.AddChar(at._ch);
                        }
                        else
                        {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }

                        prev._type = Set;
                        prev._str = prevCharClass.ToStringClass();
                    }
                    else if (at._type == Nothing)
                    {
                        j--;
                    }
                    else
                    {
                        wasLastSet = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
                _children.RemoveRange(j, i - j);

            return StripEnation(Nothing);
        }