private void ResolveReferences() { int gid = 1; Hashtable dict = new Hashtable(); ArrayList explicit_numeric_groups = null; // number unnamed groups foreach (CapturingGroup group in caps) { if (group.Name != null) { continue; } dict.Add(gid.ToString(), group); group.Index = gid++; ++num_groups; } // number named groups foreach (CapturingGroup group in caps) { if (group.Name == null) { continue; } if (dict.Contains(group.Name)) { CapturingGroup prev = (CapturingGroup)dict [group.Name]; group.Index = prev.Index; if (group.Index == gid) { gid++; } else if (group.Index > gid) { explicit_numeric_groups.Add(group); } continue; } if (Char.IsDigit(group.Name [0])) { int ptr = 0; int group_gid = ParseDecimal(group.Name, ref ptr); if (ptr == group.Name.Length) { group.Index = group_gid; dict.Add(group.Name, group); ++num_groups; if (group_gid == gid) { gid++; } else { // all numbers before 'gid' are already in the dictionary. So, we know group_gid > gid if (explicit_numeric_groups == null) { explicit_numeric_groups = new ArrayList(4); } explicit_numeric_groups.Add(group); } continue; } } string gid_s = gid.ToString(); while (dict.Contains(gid_s)) { gid_s = (++gid).ToString(); } dict.Add(gid_s, group); dict.Add(group.Name, group); group.Index = gid++; ++num_groups; } gap = gid; // == 1 + num_groups, if explicit_numeric_groups == null if (explicit_numeric_groups != null) { HandleExplicitNumericGroups(explicit_numeric_groups); } // resolve references foreach (Expression expr in refs.Keys) { string name = (string)refs [expr]; if (!dict.Contains(name)) { if (expr is CaptureAssertion && !Char.IsDigit(name [0])) { continue; } BackslashNumber bn = expr as BackslashNumber; if (bn != null && bn.ResolveReference(name, dict)) { continue; } throw NewParseException("Reference to undefined group " + (Char.IsDigit(name[0]) ? "number " : "name ") + name); } CapturingGroup group = (CapturingGroup)dict[name]; if (expr is Reference) { ((Reference)expr).CapturingGroup = group; } else if (expr is CaptureAssertion) { ((CaptureAssertion)expr).CapturingGroup = group; } else if (expr is BalancingGroup) { ((BalancingGroup)expr).Balance = group; } } }
private Expression ParseSpecial(RegexOptions options) { int p = ptr; bool ecma = IsECMAScript(options); Expression expr = null; switch (pattern[ptr++]) { // categories case 'd': expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, false); break; case 'w': expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, false); break; case 's': expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false); break; case 'p': // this is odd - ECMAScript isn't supposed to support Unicode, // yet \p{..} compiles and runs under the MS implementation // identically to canonical mode. That's why I'm ignoring the // value of ecma here. expr = new CharacterClass(ParseUnicodeCategory(), false); break; case 'D': expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, true); break; case 'W': expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, true); break; case 'S': expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true); break; case 'P': expr = new CharacterClass(ParseUnicodeCategory(), true); break; // positions case 'A': expr = new PositionAssertion(Position.StartOfString); break; case 'Z': expr = new PositionAssertion(Position.End); break; case 'z': expr = new PositionAssertion(Position.EndOfString); break; case 'G': expr = new PositionAssertion(Position.StartOfScan); break; case 'b': expr = new PositionAssertion(Position.Boundary); break; case 'B': expr = new PositionAssertion(Position.NonBoundary); break; // references case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ptr--; int n = ParseNumber(10, 1, 0); if (n < 0) { ptr = p; return(null); } // FIXME test if number is within number of assigned groups // this may present a problem for right-to-left matching Reference reference = new BackslashNumber(IsIgnoreCase(options), ecma); refs.Add(reference, n.ToString()); expr = reference; break; } case 'k': { char delim = pattern[ptr++]; if (delim == '<') { delim = '>'; } else if (delim != '\'') { throw NewParseException("Malformed \\k<...> named backreference."); } string name = ParseName(); if (name == null || pattern[ptr] != delim) { throw NewParseException("Malformed \\k<...> named backreference."); } ++ptr; Reference reference = new Reference(IsIgnoreCase(options)); refs.Add(reference, name); expr = reference; break; } default: expr = null; break; } if (expr == null) { ptr = p; } return(expr); }