private Expression ParseGroupingConstruct (ref RegexOptions options) { if (pattern[ptr] != '?') { Group group; if (IsExplicitCapture (options)) group = new Group (); else { group = new CapturingGroup (); caps.Add (group); } ParseGroup (group, options, null); return group; } else ++ ptr; switch (pattern[ptr]) { case ':': { // non-capturing group ++ ptr; Group group = new Group (); ParseGroup (group, options, null); return group; } case '>': { // non-backtracking group ++ ptr; Group group = new NonBacktrackingGroup (); ParseGroup (group, options, null); return group; } case 'i': case 'm': case 'n': case 's': case 'x': case '-': { // options RegexOptions o = options; ParseOptions (ref o, false); if (pattern[ptr] == '-') { ++ ptr; ParseOptions (ref o, true); } if (pattern[ptr] == ':') { // pass options to child group ++ ptr; Group group = new Group (); ParseGroup (group, o, null); return group; } else if (pattern[ptr] == ')') { // change options of enclosing group ++ ptr; options = o; return null; } else throw NewParseException ("Bad options"); } case '<': case '=': case '!': { // lookahead/lookbehind ExpressionAssertion asn = new ExpressionAssertion (); if (!ParseAssertionType (asn)) goto case '\''; // it's a (?<name> ) construct Group test = new Group (); ParseGroup (test, options, null); asn.TestExpression = test; return asn; } case '\'': { // named/balancing group char delim; if (pattern[ptr] == '<') delim = '>'; else delim = '\''; ++ ptr; string name = ParseName (); if (pattern[ptr] == delim) { // capturing group if (name == null) throw NewParseException ("Bad group name."); ++ ptr; CapturingGroup cap = new CapturingGroup (); cap.Name = name; caps.Add (cap); ParseGroup (cap, options, null); return cap; } else if (pattern[ptr] == '-') { // balancing group ++ ptr; string balance_name = ParseName (); if (balance_name == null || pattern[ptr] != delim) throw NewParseException ("Bad balancing group name."); ++ ptr; BalancingGroup bal = new BalancingGroup (); bal.Name = name; if(bal.IsNamed) { caps.Add (bal); } refs.Add (bal, balance_name); ParseGroup (bal, options, null); return bal; } else throw NewParseException ("Bad group name."); } case '(': { // expression/capture test Assertion asn; ++ ptr; int p = ptr; string name = ParseName (); if (name == null || pattern[ptr] != ')') { // expression test // FIXME MS implementation doesn't seem to // implement this version of (?(x) ...) ptr = p; ExpressionAssertion expr_asn = new ExpressionAssertion (); if (pattern[ptr] == '?') { ++ ptr; if (!ParseAssertionType (expr_asn)) throw NewParseException ("Bad conditional."); } else { expr_asn.Negate = false; expr_asn.Reverse = false; } Group test = new Group (); ParseGroup (test, options, null); expr_asn.TestExpression = test; asn = expr_asn; } else { // capture test ++ ptr; asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options))); refs.Add (asn, name); } Group group = new Group (); ParseGroup (group, options, asn); return group; } case '#': { // comment ++ ptr; while (pattern[ptr ++] != ')') { if (ptr >= pattern.Length) throw NewParseException ("Unterminated (?#...) comment."); } return null; } default: // error throw NewParseException ("Bad grouping construct."); } }
// private methods private void ParseGroup (Group group, RegexOptions options, Assertion assertion) { bool is_top_level = group is RegularExpression; Alternation alternation = null; string literal = null; Group current = new Group (); Expression expr = null; bool closed = false; while (true) { ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr >= pattern.Length) break; // (1) Parse for Expressions char ch = pattern[ptr ++]; switch (ch) { case '^': { Position pos = IsMultiline (options) ? Position.StartOfLine : Position.Start; expr = new PositionAssertion (pos); break; } case '$': { Position pos = IsMultiline (options) ? Position.EndOfLine : Position.End; expr = new PositionAssertion (pos); break; } case '.': { Category cat = IsSingleline (options) ? Category.AnySingleline : Category.Any; expr = new CharacterClass (cat, false); break; } case '\\': { int c = ParseEscape (false); if (c >= 0) ch = (char)c; else { expr = ParseSpecial (options); if (expr == null) ch = pattern[ptr ++]; // default escape } break; } case '[': { expr = ParseCharacterClass (options); break; } case '(': { bool ignore = IsIgnoreCase (options); expr = ParseGroupingConstruct (ref options); if (expr == null) { if (literal != null && IsIgnoreCase (options) != ignore) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } continue; } break; } case ')': { closed = true; goto EndOfGroup; } case '|': { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else if (assertion.FalseExpression == null) assertion.FalseExpression = current; else throw NewParseException ("Too many | in (?()|)."); } else { if (alternation == null) alternation = new Alternation (); alternation.AddAlternative (current); } current = new Group (); continue; } case '*': case '+': case '?': { throw NewParseException ("Bad quantifier."); } default: break; // literal character } ConsumeWhitespace (IsIgnorePatternWhitespace (options)); // (2) Check for Repetitions if (ptr < pattern.Length) { char k = pattern[ptr]; int min = 0, max = 0; bool lazy = false; bool haveRep = false; if (k == '?' || k == '*' || k == '+') { ++ ptr; haveRep = true; switch (k) { case '?': min = 0; max = 1; break; case '*': min = 0; max = 0x7fffffff; break; case '+': min = 1; max = 0x7fffffff; break; } } else if (k == '{' && ptr + 1 < pattern.Length) { int saved_ptr = ptr; ++ptr; haveRep = ParseRepetitionBounds (out min, out max, options); if (!haveRep) ptr = saved_ptr; } if (haveRep) { ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ++ ptr; lazy = true; } //It doesn't make sense to assert a given position more than once. bool ignore_repetition = false; if (expr is PositionAssertion) { ignore_repetition = min > 0 && !lazy; max = 1; } if (!ignore_repetition) { Repetition repetition = new Repetition (min, max, lazy); if (expr == null) repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options)); else repetition.Expression = expr; expr = repetition; } } } // (3) Append Expression and/or Literal if (expr == null) { if (literal == null) literal = ""; literal += ch; } else { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } current.AppendExpression (expr); expr = null; } if (is_top_level && ptr >= pattern.Length) goto EndOfGroup; } EndOfGroup: if (is_top_level && closed) throw NewParseException ("Too many )'s."); if (!is_top_level && !closed) throw NewParseException ("Not enough )'s."); // clean up literals and alternations if (literal != null) current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else assertion.FalseExpression = current; group.AppendExpression (assertion); } else if (alternation != null) { alternation.AddAlternative (current); group.AppendExpression (alternation); } else group.AppendExpression (current); }
private Expression ParseGroupingConstruct(ref RegexOptions options) { if (pattern[ptr] != '?') { Group group; if (IsExplicitCapture(options)) { group = new Group(); } else { group = new CapturingGroup(); caps.Add(group); } ParseGroup(group, options, null); return(group); } ptr++; switch (pattern[ptr]) { case ':': { ptr++; Group group6 = new Group(); ParseGroup(group6, options, null); return(group6); } case '>': { ptr++; Group group2 = new NonBacktrackingGroup(); ParseGroup(group2, options, null); return(group2); } case '-': case 'i': case 'm': case 'n': case 's': case 'x': { RegexOptions options2 = options; ParseOptions(ref options2, negate: false); if (pattern[ptr] == '-') { ptr++; ParseOptions(ref options2, negate: true); } if (pattern[ptr] == ':') { ptr++; Group group3 = new Group(); ParseGroup(group3, options2, null); return(group3); } if (pattern[ptr] == ')') { ptr++; options = options2; return(null); } throw NewParseException("Bad options"); } case '!': case '<': case '=': { ExpressionAssertion expressionAssertion2 = new ExpressionAssertion(); if (!ParseAssertionType(expressionAssertion2)) { goto case '\''; } Group group7 = new Group(); ParseGroup(group7, options, null); expressionAssertion2.TestExpression = group7; return(expressionAssertion2); } case '\'': { char c = (pattern[ptr] != '<') ? '\'' : '>'; ptr++; string text2 = ParseName(); if (pattern[ptr] == c) { if (text2 == null) { throw NewParseException("Bad group name."); } ptr++; CapturingGroup capturingGroup = new CapturingGroup(); capturingGroup.Name = text2; caps.Add(capturingGroup); ParseGroup(capturingGroup, options, null); return(capturingGroup); } if (pattern[ptr] == '-') { ptr++; string text3 = ParseName(); if (text3 == null || pattern[ptr] != c) { throw NewParseException("Bad balancing group name."); } ptr++; BalancingGroup balancingGroup = new BalancingGroup(); balancingGroup.Name = text2; if (balancingGroup.IsNamed) { caps.Add(balancingGroup); } refs.Add(balancingGroup, text3); ParseGroup(balancingGroup, options, null); return(balancingGroup); } throw NewParseException("Bad group name."); } case '(': { ptr++; int num = ptr; string text = ParseName(); Assertion assertion; if (text == null || pattern[ptr] != ')') { ptr = num; ExpressionAssertion expressionAssertion = new ExpressionAssertion(); if (pattern[ptr] == '?') { ptr++; if (!ParseAssertionType(expressionAssertion)) { throw NewParseException("Bad conditional."); } } else { expressionAssertion.Negate = false; expressionAssertion.Reverse = false; } Group group4 = new Group(); ParseGroup(group4, options, null); expressionAssertion.TestExpression = group4; assertion = expressionAssertion; } else { ptr++; assertion = new CaptureAssertion(new Literal(text, IsIgnoreCase(options))); refs.Add(assertion, text); } Group group5 = new Group(); ParseGroup(group5, options, assertion); return(group5); } case '#': ptr++; while (pattern[ptr++] != ')') { if (ptr >= pattern.Length) { throw NewParseException("Unterminated (?#...) comment."); } } return(null); default: throw NewParseException("Bad grouping construct."); } }
private void ParseGroup(Group group, RegexOptions options, Assertion assertion) { bool flag = group is RegularExpression; Alternation alternation = null; string text = null; Group group2 = new Group(); Expression expression = null; bool flag2 = false; while (true) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr >= pattern.Length) { break; } char c = pattern[ptr++]; switch (c) { case '^': { Position pos = (!IsMultiline(options)) ? Position.Start : Position.StartOfLine; expression = new PositionAssertion(pos); goto default; } case '$': { Position pos2 = (!IsMultiline(options)) ? Position.End : Position.EndOfLine; expression = new PositionAssertion(pos2); goto default; } case '.': { Category cat = (!IsSingleline(options)) ? Category.Any : Category.AnySingleline; expression = new CharacterClass(cat, negate: false); goto default; } case '\\': { int num = ParseEscape(); if (num >= 0) { c = (char)num; } else { expression = ParseSpecial(options); if (expression == null) { c = pattern[ptr++]; } } goto default; } case '[': expression = ParseCharacterClass(options); goto default; case '(': { bool flag4 = IsIgnoreCase(options); expression = ParseGroupingConstruct(ref options); if (expression == null) { if (text != null && IsIgnoreCase(options) != flag4) { group2.AppendExpression(new Literal(text, IsIgnoreCase(options))); text = null; } continue; } goto default; } case ')': flag2 = true; break; case '|': if (text != null) { group2.AppendExpression(new Literal(text, IsIgnoreCase(options))); text = null; } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = group2; } else { if (assertion.FalseExpression != null) { throw NewParseException("Too many | in (?()|)."); } assertion.FalseExpression = group2; } } else { if (alternation == null) { alternation = new Alternation(); } alternation.AddAlternative(group2); } group2 = new Group(); continue; case '*': case '+': case '?': throw NewParseException("Bad quantifier."); default: ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr < pattern.Length) { char c2 = pattern[ptr]; int min = 0; int max = 0; bool lazy = false; bool flag3 = false; switch (c2) { case '*': case '+': case '?': ptr++; flag3 = true; switch (c2) { case '?': min = 0; max = 1; break; case '*': min = 0; max = int.MaxValue; break; case '+': min = 1; max = int.MaxValue; break; } break; case '{': if (ptr + 1 < pattern.Length) { int num2 = ptr; ptr++; flag3 = ParseRepetitionBounds(out min, out max, options); if (!flag3) { ptr = num2; } } break; } if (flag3) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ptr++; lazy = true; } Repetition repetition = new Repetition(min, max, lazy); if (expression == null) { repetition.Expression = new Literal(c.ToString(), IsIgnoreCase(options)); } else { repetition.Expression = expression; } expression = repetition; } } if (expression == null) { if (text == null) { text = string.Empty; } text += c; } else { if (text != null) { group2.AppendExpression(new Literal(text, IsIgnoreCase(options))); text = null; } group2.AppendExpression(expression); expression = null; } if (flag && ptr >= pattern.Length) { break; } continue; } break; } if (flag && flag2) { throw NewParseException("Too many )'s."); } if (!flag && !flag2) { throw NewParseException("Not enough )'s."); } if (text != null) { group2.AppendExpression(new Literal(text, IsIgnoreCase(options))); } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = group2; } else { assertion.FalseExpression = group2; } group.AppendExpression(assertion); } else if (alternation != null) { alternation.AddAlternative(group2); group.AppendExpression(alternation); } else { group.AppendExpression(group2); } }
private Expression ParseGroupingConstruct(ref RegexOptions options) { if (pattern[ptr] != '?') { Group group; if (IsExplicitCapture(options)) { group = new Group(); } else { group = new CapturingGroup(); caps.Add(group); } ParseGroup(group, options, null); return(group); } else { ++ptr; } switch (pattern[ptr]) { case ':': { // non-capturing group ++ptr; Group group = new Group(); ParseGroup(group, options, null); return(group); } case '>': { // non-backtracking group ++ptr; Group group = new NonBacktrackingGroup(); ParseGroup(group, options, null); return(group); } case 'i': case 'm': case 'n': case 's': case 'x': case '-': { // options RegexOptions o = options; ParseOptions(ref o, false); if (pattern[ptr] == '-') { ++ptr; ParseOptions(ref o, true); } if (pattern[ptr] == ':') // pass options to child group { ++ptr; Group group = new Group(); ParseGroup(group, o, null); return(group); } else if (pattern[ptr] == ')') // change options of enclosing group { ++ptr; options = o; return(null); } else { throw NewParseException("Bad options"); } } case '<': case '=': case '!': { // lookahead/lookbehind ExpressionAssertion asn = new ExpressionAssertion(); if (!ParseAssertionType(asn)) { goto case '\''; // it's a (?<name> ) construct } Group test = new Group(); ParseGroup(test, options, null); asn.TestExpression = test; return(asn); } case '\'': { // named/balancing group char delim; if (pattern[ptr] == '<') { delim = '>'; } else { delim = '\''; } ++ptr; string name = ParseName(); if (pattern[ptr] == delim) { // capturing group if (name == null) { throw NewParseException("Bad group name."); } ++ptr; CapturingGroup cap = new CapturingGroup(); cap.Name = name; caps.Add(cap); ParseGroup(cap, options, null); return(cap); } else if (pattern[ptr] == '-') { // balancing group ++ptr; string balance_name = ParseName(); if (balance_name == null || pattern[ptr] != delim) { throw NewParseException("Bad balancing group name."); } ++ptr; BalancingGroup bal = new BalancingGroup(); bal.Name = name; if (bal.IsNamed) { caps.Add(bal); } refs.Add(bal, balance_name); ParseGroup(bal, options, null); return(bal); } else { throw NewParseException("Bad group name."); } } case '(': { // expression/capture test Assertion asn; ++ptr; int p = ptr; string name = ParseName(); if (name == null || pattern[ptr] != ')') // expression test // FIXME MS implementation doesn't seem to // implement this version of (?(x) ...) { ptr = p; ExpressionAssertion expr_asn = new ExpressionAssertion(); if (pattern[ptr] == '?') { ++ptr; if (!ParseAssertionType(expr_asn)) { throw NewParseException("Bad conditional."); } } else { expr_asn.Negate = false; expr_asn.Reverse = false; } Group test = new Group(); ParseGroup(test, options, null); expr_asn.TestExpression = test; asn = expr_asn; } else // capture test { ++ptr; asn = new CaptureAssertion(); refs.Add(asn, name); } Group group = new Group(); ParseGroup(group, options, asn); return(group); } case '#': { // comment ++ptr; while (pattern[ptr++] != ')') { if (ptr >= pattern.Length) { throw NewParseException("Unterminated (?#...) comment."); } } return(null); } default: // error throw NewParseException("Bad grouping construct."); } }
// private methods private void ParseGroup(Group group, RegexOptions options, Assertion assertion) { bool is_top_level = group is RegularExpression; Alternation alternation = null; string literal = null; Group current = new Group(); Expression expr = null; bool closed = false; while (true) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr >= pattern.Length) { break; } // (1) Parse for Expressions char ch = pattern[ptr++]; switch (ch) { case '^': { Position pos = IsMultiline(options) ? Position.StartOfLine : Position.Start; expr = new PositionAssertion(pos); break; } case '$': { Position pos = IsMultiline(options) ? Position.EndOfLine : Position.End; expr = new PositionAssertion(pos); break; } case '.': { Category cat = IsSingleline(options) ? Category.AnySingleline : Category.Any; expr = new CharacterClass(cat, false); break; } case '\\': { int c = ParseEscape(); if (c >= 0) { ch = (char)c; } else { expr = ParseSpecial(options); if (expr == null) { ch = pattern[ptr++]; // default escape } } break; } case '[': { expr = ParseCharacterClass(options); break; } case '(': { bool ignore = IsIgnoreCase(options); expr = ParseGroupingConstruct(ref options); if (expr == null) { if (literal != null && IsIgnoreCase(options) != ignore) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } continue; } break; } case ')': { closed = true; goto EndOfGroup; } case '|': { if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = current; } else if (assertion.FalseExpression == null) { assertion.FalseExpression = current; } else { throw NewParseException("Too many | in (?()|)."); } } else { if (alternation == null) { alternation = new Alternation(); } alternation.AddAlternative(current); } current = new Group(); continue; } case '*': case '+': case '?': { throw NewParseException("Bad quantifier."); } default: break; // literal character } ConsumeWhitespace(IsIgnorePatternWhitespace(options)); // (2) Check for Repetitions if (ptr < pattern.Length) { char k = pattern[ptr]; int min = 0, max = 0; bool lazy = false; bool haveRep = false; if (k == '?' || k == '*' || k == '+') { ++ptr; haveRep = true; switch (k) { case '?': min = 0; max = 1; break; case '*': min = 0; max = 0xffff; break; case '+': min = 1; max = 0xffff; break; } } else if (k == '{' && ptr + 1 < pattern.Length) { int saved_ptr = ptr; ++ptr; haveRep = ParseRepetitionBounds(out min, out max, options); if (!haveRep) { ptr = saved_ptr; } } if (haveRep) { ConsumeWhitespace(IsIgnorePatternWhitespace(options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ++ptr; lazy = true; } Repetition repetition = new Repetition(min, max, lazy); if (expr == null) { repetition.Expression = new Literal(ch.ToString(), IsIgnoreCase(options)); } else { repetition.Expression = expr; } expr = repetition; } } // (3) Append Expression and/or Literal if (expr == null) { if (literal == null) { literal = ""; } literal += ch; } else { if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); literal = null; } current.AppendExpression(expr); expr = null; } if (is_top_level && ptr >= pattern.Length) { goto EndOfGroup; } } EndOfGroup: if (is_top_level && closed) { throw NewParseException("Too many )'s."); } if (!is_top_level && !closed) { throw NewParseException("Not enough )'s."); } // clean up literals and alternations if (literal != null) { current.AppendExpression(new Literal(literal, IsIgnoreCase(options))); } if (assertion != null) { if (assertion.TrueExpression == null) { assertion.TrueExpression = current; } else { assertion.FalseExpression = current; } group.AppendExpression(assertion); } else if (alternation != null) { alternation.AddAlternative(current); group.AppendExpression(alternation); } else { group.AppendExpression(current); } }