/// <summary> /// Basic optimization. Single-letter alternations can be replaced /// by faster set specifications, and nested alternations with no /// intervening operators can be flattened: /// /// a|b|c|def|g|h -> [a-c]|def|[gh] /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape /// </summary> internal RegexNode ReduceAlternation() { // Combine adjacent sets/chars bool wasLastSet; bool lastNodeCannotMerge; RegexOptions optionsLast; RegexOptions optionsAt; int i; int j; RegexNode at; RegexNode prev; if (_children == null) { return(new RegexNode(Nothing, _options)); } wasLastSet = false; lastNodeCannotMerge = false; optionsLast = 0; for (i = 0, j = 0; i < _children.Count; i++, j++) { at = _children[i]; if (j < i) { _children[j] = at; } for (; ;) { if (at._type == Alternate) { for (int k = 0; k < at._children.Count; k++) { at._children[k]._next = this; } _children.InsertRange(i + 1, at._children); j--; } else if (at._type == Set || at._type == One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (at._type == Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str)) { wasLastSet = true; lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str); optionsLast = optionsAt; break; } } else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge) { wasLastSet = true; lastNodeCannotMerge = false; optionsLast = optionsAt; break; } // The last node was a Set or a One, we're a Set or One and our options are the same. // Merge the two nodes. j--; prev = _children[j]; RegexCharClass prevCharClass; if (prev._type == One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev._ch); } else { prevCharClass = RegexCharClass.Parse(prev._str); } if (at._type == One) { prevCharClass.AddChar(at._ch); } else { RegexCharClass atCharClass = RegexCharClass.Parse(at._str); prevCharClass.AddCharClass(atCharClass); } prev._type = Set; prev._str = prevCharClass.ToStringClass(); } else if (at._type == Nothing) { j--; } else { wasLastSet = false; lastNodeCannotMerge = false; } break; } } if (j < i) { _children.RemoveRange(j, i - j); } return(StripEnation(Nothing)); }
/// <summary> /// Basic optimization. Single-letter alternations can be replaced /// by faster set specifications, and nested alternations with no /// intervening operators can be flattened: /// /// a|b|c|def|g|h -> [a-c]|def|[gh] /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape /// </summary> internal RegexNode ReduceAlternation() { // Combine adjacent sets/chars bool wasLastSet; bool lastNodeCannotMerge; RegexOptions optionsLast; RegexOptions optionsAt; int i; int j; RegexNode at; RegexNode prev; if (_children == null) return new RegexNode(Nothing, _options); wasLastSet = false; lastNodeCannotMerge = false; optionsLast = 0; for (i = 0, j = 0; i < _children.Count; i++, j++) { at = _children[i]; if (j < i) _children[j] = at; for (; ;) { if (at._type == Alternate) { for (int k = 0; k < at._children.Count; k++) at._children[k]._next = this; _children.InsertRange(i + 1, at._children); j--; } else if (at._type == Set || at._type == One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (at._type == Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str)) { wasLastSet = true; lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str); optionsLast = optionsAt; break; } } else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge) { wasLastSet = true; lastNodeCannotMerge = false; optionsLast = optionsAt; break; } // The last node was a Set or a One, we're a Set or One and our options are the same. // Merge the two nodes. j--; prev = _children[j]; RegexCharClass prevCharClass; if (prev._type == One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev._ch); } else { prevCharClass = RegexCharClass.Parse(prev._str); } if (at._type == One) { prevCharClass.AddChar(at._ch); } else { RegexCharClass atCharClass = RegexCharClass.Parse(at._str); prevCharClass.AddCharClass(atCharClass); } prev._type = Set; prev._str = prevCharClass.ToStringClass(); } else if (at._type == Nothing) { j--; } else { wasLastSet = false; lastNodeCannotMerge = false; } break; } } if (j < i) _children.RemoveRange(j, i - j); return StripEnation(Nothing); }