// private methods private void ParseGroup (Group group, RegexOptions options, Assertion assertion) { bool is_top_level = group is RegularExpression; Alternation alternation = null; string literal = null; Group current = new Group (); Expression expr = null; bool closed = false; while (true) { ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr >= pattern.Length) break; // (1) Parse for Expressions char ch = pattern[ptr ++]; switch (ch) { case '^': { Position pos = IsMultiline (options) ? Position.StartOfLine : Position.Start; expr = new PositionAssertion (pos); break; } case '$': { Position pos = IsMultiline (options) ? Position.EndOfLine : Position.End; expr = new PositionAssertion (pos); break; } case '.': { Category cat = IsSingleline (options) ? Category.AnySingleline : Category.Any; expr = new CharacterClass (cat, false); break; } case '\\': { int c = ParseEscape (false); if (c >= 0) ch = (char)c; else { expr = ParseSpecial (options); if (expr == null) ch = pattern[ptr ++]; // default escape } break; } case '[': { expr = ParseCharacterClass (options); break; } case '(': { bool ignore = IsIgnoreCase (options); expr = ParseGroupingConstruct (ref options); if (expr == null) { if (literal != null && IsIgnoreCase (options) != ignore) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } continue; } break; } case ')': { closed = true; goto EndOfGroup; } case '|': { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else if (assertion.FalseExpression == null) assertion.FalseExpression = current; else throw NewParseException ("Too many | in (?()|)."); } else { if (alternation == null) alternation = new Alternation (); alternation.AddAlternative (current); } current = new Group (); continue; } case '*': case '+': case '?': { throw NewParseException ("Bad quantifier."); } default: break; // literal character } ConsumeWhitespace (IsIgnorePatternWhitespace (options)); // (2) Check for Repetitions if (ptr < pattern.Length) { char k = pattern[ptr]; int min = 0, max = 0; bool lazy = false; bool haveRep = false; if (k == '?' || k == '*' || k == '+') { ++ ptr; haveRep = true; switch (k) { case '?': min = 0; max = 1; break; case '*': min = 0; max = 0x7fffffff; break; case '+': min = 1; max = 0x7fffffff; break; } } else if (k == '{' && ptr + 1 < pattern.Length) { int saved_ptr = ptr; ++ptr; haveRep = ParseRepetitionBounds (out min, out max, options); if (!haveRep) ptr = saved_ptr; } if (haveRep) { ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ++ ptr; lazy = true; } //It doesn't make sense to assert a given position more than once. bool ignore_repetition = false; if (expr is PositionAssertion) { ignore_repetition = min > 0 && !lazy; max = 1; } if (!ignore_repetition) { Repetition repetition = new Repetition (min, max, lazy); if (expr == null) repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options)); else repetition.Expression = expr; expr = repetition; } } } // (3) Append Expression and/or Literal if (expr == null) { if (literal == null) literal = ""; literal += ch; } else { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } current.AppendExpression (expr); expr = null; } if (is_top_level && ptr >= pattern.Length) goto EndOfGroup; } EndOfGroup: if (is_top_level && closed) throw NewParseException ("Too many )'s."); if (!is_top_level && !closed) throw NewParseException ("Not enough )'s."); // clean up literals and alternations if (literal != null) current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else assertion.FalseExpression = current; group.AppendExpression (assertion); } else if (alternation != null) { alternation.AddAlternative (current); group.AppendExpression (alternation); } else group.AppendExpression (current); }
// private methods private void ParseGroup(Group group, RegexOptions options, Assertion assertion) { bool is_top_level = group is RegularExpression; Alternation alternation = null; string literal = null; Group current = new Group(); Expression expr = null; bool closed = false; while (true) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr >= pattern.Length) { break; } // (1) Parse for Expressions char ch = pattern[ptr++]; switch (ch) { case '^': { Position pos = IsMultiline(options) ? Position.StartOfLine : Position.Start; expr = new PositionAssertion(pos); break; } case '$': { Position pos = IsMultiline(options) ? Position.EndOfLine : Position.End; expr = new PositionAssertion(pos); break; } case '.': { Category cat = IsSingleline(options) ? Category.AnySingleline : Category.Any; expr = new CharacterClass(cat, false); break; } case '\\': { int c = ParseEscape(); if (c >= 0) { ch = (char)c; } else { expr = ParseSpecial(options); if (expr == null) { ch = pattern[ptr++]; // default escape } } break; } case '[': { expr = ParseCharacterClass(options); break; } case '(': { bool ignore = IsIgnoreCase(options); expr = ParseGroupingConstruct(ref options); if (expr == null) { if (literal != null && IsIgnoreCase(options) != ignore) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } continue; } break; } case ')': { closed = true; goto EndOfGroup; } case '|': { if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = current; } else if (assertion.FalseExpression == null) { assertion.FalseExpression = current; } else { throw NewParseException("Too many | in (?()|)."); } } else { if (alternation == null) { alternation = new Alternation(); } alternation.AddAlternative(current); } current = new Group(); continue; } case '*': case '+': case '?': { throw NewParseException("Bad quantifier."); } default: break; // literal character } ConsumeWhitespace(IsIgnorePatternWhitespace(options)); // (2) Check for Repetitions if (ptr < pattern.Length) { char k = pattern[ptr]; int min = 0, max = 0; bool lazy = false; bool haveRep = false; if (k == '?' || k == '*' || k == '+') { ++ptr; haveRep = true; switch (k) { case '?': min = 0; max = 1; break; case '*': min = 0; max = 0xffff; break; case '+': min = 1; max = 0xffff; break; } } else if (k == '{' && ptr + 1 < pattern.Length) { int saved_ptr = ptr; ++ptr; haveRep = ParseRepetitionBounds(out min, out max, options); if (!haveRep) { ptr = saved_ptr; } } if (haveRep) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ++ptr; lazy = true; } Repetition repetition = new Repetition(min, max, lazy); if (expr == null) { repetition.Expression = new Literal(ch.ToString(), IsIgnoreCase(options)); } else { repetition.Expression = expr; } expr = repetition; } } // (3) Append Expression and/or Literal if (expr == null) { if (literal == null) { literal = ""; } literal += ch; } else { if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } current.AppendExpression(expr); expr = null; } if (is_top_level && ptr >= pattern.Length) { goto EndOfGroup; } } EndOfGroup: if (is_top_level && closed) { throw NewParseException("Too many )'s."); } if (!is_top_level && !closed) { throw NewParseException("Not enough )'s."); } // clean up literals and alternations if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = current; } else { assertion.FalseExpression = current; } group.AppendExpression(assertion); } else if (alternation != null) { alternation.AddAlternative(current); group.AppendExpression(alternation); } else { group.AppendExpression(current); } }