private void HandleExplicitNumericGroups(ArrayList explicit_numeric_groups) { int gid = gap; int i = 0; int n_explicit = explicit_numeric_groups.Count; explicit_numeric_groups.Sort(); // move 'gap' forward to skip over all explicit groups that // turn out to match their index for (; i < n_explicit; ++i) { CapturingGroup g = (CapturingGroup)explicit_numeric_groups [i]; if (g.Index > gid) { break; } if (g.Index == gid) { gid++; } } gap = gid; // re-index all further groups so that the indexes are contiguous int prev = gid; for (; i < n_explicit; ++i) { CapturingGroup g = (CapturingGroup)explicit_numeric_groups [i]; if (g.Index == prev) { g.Index = gid - 1; } else { prev = g.Index; g.Index = gid++; } } }
public int GetMapping(Hashtable mapping) { int end = caps.Count; mapping.Add("0", 0); for (int i = 0; i < end; i++) { CapturingGroup group = (CapturingGroup)caps [i]; string name = group.Name != null ? group.Name : group.Index.ToString(); if (mapping.Contains(name)) { if ((int)mapping [name] != group.Index) { throw new SystemException("invalid state"); } continue; } mapping.Add(name, group.Index); } return(gap); }
private Expression ParseGroupingConstruct(ref RegexOptions options) { if (pattern[ptr] != '?') { Group group; if (IsExplicitCapture(options)) { group = new Group(); } else { group = new CapturingGroup(); caps.Add(group); } ParseGroup(group, options, null); return(group); } else { ++ptr; } switch (pattern[ptr]) { case ':': { // non-capturing group ++ptr; Group group = new Group(); ParseGroup(group, options, null); return(group); } case '>': { // non-backtracking group ++ptr; Group group = new NonBacktrackingGroup(); ParseGroup(group, options, null); return(group); } case 'i': case 'm': case 'n': case 's': case 'x': case '-': { // options RegexOptions o = options; ParseOptions(ref o, false); if (pattern[ptr] == '-') { ++ptr; ParseOptions(ref o, true); } if (pattern[ptr] == ':') // pass options to child group { ++ptr; Group group = new Group(); ParseGroup(group, o, null); return(group); } else if (pattern[ptr] == ')') // change options of enclosing group { ++ptr; options = o; return(null); } else { throw NewParseException("Bad options"); } } case '<': case '=': case '!': { // lookahead/lookbehind ExpressionAssertion asn = new ExpressionAssertion(); if (!ParseAssertionType(asn)) { goto case '\''; // it's a (?<name> ) construct } Group test = new Group(); ParseGroup(test, options, null); asn.TestExpression = test; return(asn); } case '\'': { // named/balancing group char delim; if (pattern[ptr] == '<') { delim = '>'; } else { delim = '\''; } ++ptr; string name = ParseName(); if (pattern[ptr] == delim) { // capturing group if (name == null) { throw NewParseException("Bad group name."); } ++ptr; CapturingGroup cap = new CapturingGroup(); cap.Name = name; caps.Add(cap); ParseGroup(cap, options, null); return(cap); } else if (pattern[ptr] == '-') { // balancing group ++ptr; string balance_name = ParseName(); if (balance_name == null || pattern[ptr] != delim) { throw NewParseException("Bad balancing group name."); } ++ptr; BalancingGroup bal = new BalancingGroup(); bal.Name = name; if (bal.IsNamed) { caps.Add(bal); } refs.Add(bal, balance_name); ParseGroup(bal, options, null); return(bal); } else { throw NewParseException("Bad group name."); } } case '(': { // expression/capture test Assertion asn; ++ptr; int p = ptr; string name = ParseName(); if (name == null || pattern[ptr] != ')') // expression test // FIXME MS implementation doesn't seem to // implement this version of (?(x) ...) { ptr = p; ExpressionAssertion expr_asn = new ExpressionAssertion(); if (pattern[ptr] == '?') { ++ptr; if (!ParseAssertionType(expr_asn)) { throw NewParseException("Bad conditional."); } } else { expr_asn.Negate = false; expr_asn.Reverse = false; } Group test = new Group(); ParseGroup(test, options, null); expr_asn.TestExpression = test; asn = expr_asn; } else // capture test { ++ptr; asn = new CaptureAssertion(new Literal(name, IsIgnoreCase(options))); refs.Add(asn, name); } Group group = new Group(); ParseGroup(group, options, asn); return(group); } case '#': { // comment ++ptr; while (pattern[ptr++] != ')') { if (ptr >= pattern.Length) { throw NewParseException("Unterminated (?#...) comment."); } } return(null); } default: // error throw NewParseException("Bad grouping construct."); } }
private void ResolveReferences() { int gid = 1; Hashtable dict = new Hashtable(); ArrayList explicit_numeric_groups = null; // number unnamed groups foreach (CapturingGroup group in caps) { if (group.Name != null) { continue; } dict.Add(gid.ToString(), group); group.Index = gid++; ++num_groups; } // number named groups foreach (CapturingGroup group in caps) { if (group.Name == null) { continue; } if (dict.Contains(group.Name)) { CapturingGroup prev = (CapturingGroup)dict [group.Name]; group.Index = prev.Index; if (group.Index == gid) { gid++; } else if (group.Index > gid) { explicit_numeric_groups.Add(group); } continue; } if (Char.IsDigit(group.Name [0])) { int ptr = 0; int group_gid = ParseDecimal(group.Name, ref ptr); if (ptr == group.Name.Length) { group.Index = group_gid; dict.Add(group.Name, group); ++num_groups; if (group_gid == gid) { gid++; } else { // all numbers before 'gid' are already in the dictionary. So, we know group_gid > gid if (explicit_numeric_groups == null) { explicit_numeric_groups = new ArrayList(4); } explicit_numeric_groups.Add(group); } continue; } } string gid_s = gid.ToString(); while (dict.Contains(gid_s)) { gid_s = (++gid).ToString(); } dict.Add(gid_s, group); dict.Add(group.Name, group); group.Index = gid++; ++num_groups; } gap = gid; // == 1 + num_groups, if explicit_numeric_groups == null if (explicit_numeric_groups != null) { HandleExplicitNumericGroups(explicit_numeric_groups); } // resolve references foreach (Expression expr in refs.Keys) { string name = (string)refs [expr]; if (!dict.Contains(name)) { if (expr is CaptureAssertion && !Char.IsDigit(name [0])) { continue; } BackslashNumber bn = expr as BackslashNumber; if (bn != null && bn.ResolveReference(name, dict)) { continue; } throw NewParseException("Reference to undefined group " + (Char.IsDigit(name[0]) ? "number " : "name ") + name); } CapturingGroup group = (CapturingGroup)dict[name]; if (expr is Reference) { ((Reference)expr).CapturingGroup = group; } else if (expr is CaptureAssertion) { ((CaptureAssertion)expr).CapturingGroup = group; } else if (expr is BalancingGroup) { ((BalancingGroup)expr).Balance = group; } } }