private void ResolveReferences() { int gid = 1; Hashtable dict = new Hashtable(); ArrayList explicit_numeric_groups = null; // number unnamed groups foreach (CapturingGroup group in caps) { if (group.Name != null) { continue; } dict.Add(gid.ToString(), group); group.Index = gid++; ++num_groups; } // number named groups foreach (CapturingGroup group in caps) { if (group.Name == null) { continue; } if (dict.Contains(group.Name)) { CapturingGroup prev = (CapturingGroup)dict [group.Name]; group.Index = prev.Index; if (group.Index == gid) { gid++; } else if (group.Index > gid) { explicit_numeric_groups.Add(group); } continue; } if (Char.IsDigit(group.Name [0])) { int ptr = 0; int group_gid = ParseDecimal(group.Name, ref ptr); if (ptr == group.Name.Length) { group.Index = group_gid; dict.Add(group.Name, group); ++num_groups; if (group_gid == gid) { gid++; } else { // all numbers before 'gid' are already in the dictionary. So, we know group_gid > gid if (explicit_numeric_groups == null) { explicit_numeric_groups = new ArrayList(4); } explicit_numeric_groups.Add(group); } continue; } } string gid_s = gid.ToString(); while (dict.Contains(gid_s)) { gid_s = (++gid).ToString(); } dict.Add(gid_s, group); dict.Add(group.Name, group); group.Index = gid++; ++num_groups; } gap = gid; // == 1 + num_groups, if explicit_numeric_groups == null if (explicit_numeric_groups != null) { HandleExplicitNumericGroups(explicit_numeric_groups); } // resolve references foreach (Expression expr in refs.Keys) { string name = (string)refs [expr]; if (!dict.Contains(name)) { if (expr is CaptureAssertion && !Char.IsDigit(name [0])) { continue; } BackslashNumber bn = expr as BackslashNumber; if (bn != null && bn.ResolveReference(name, dict)) { continue; } throw NewParseException("Reference to undefined group " + (Char.IsDigit(name[0]) ? "number " : "name ") + name); } CapturingGroup group = (CapturingGroup)dict[name]; if (expr is Reference) { ((Reference)expr).CapturingGroup = group; } else if (expr is CaptureAssertion) { ((CaptureAssertion)expr).CapturingGroup = group; } else if (expr is BalancingGroup) { ((BalancingGroup)expr).Balance = group; } } }
private Expression ParseSpecial(RegexOptions options) { int p = ptr; bool ecma = IsECMAScript(options); Expression expr = null; switch (pattern[ptr++]) { // categories case 'd': expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, false); break; case 'w': expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, false); break; case 's': expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false); break; case 'p': // this is odd - ECMAScript isn't supposed to support Unicode, // yet \p{..} compiles and runs under the MS implementation // identically to canonical mode. That's why I'm ignoring the // value of ecma here. expr = new CharacterClass(ParseUnicodeCategory(), false); break; case 'D': expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, true); break; case 'W': expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, true); break; case 'S': expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true); break; case 'P': expr = new CharacterClass(ParseUnicodeCategory(), true); break; // positions case 'A': expr = new PositionAssertion(Position.StartOfString); break; case 'Z': expr = new PositionAssertion(Position.End); break; case 'z': expr = new PositionAssertion(Position.EndOfString); break; case 'G': expr = new PositionAssertion(Position.StartOfScan); break; case 'b': expr = new PositionAssertion(Position.Boundary); break; case 'B': expr = new PositionAssertion(Position.NonBoundary); break; // references case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ptr--; int n = ParseNumber(10, 1, 0); if (n < 0) { ptr = p; return(null); } // FIXME test if number is within number of assigned groups // this may present a problem for right-to-left matching Reference reference = new BackslashNumber(IsIgnoreCase(options), ecma); refs.Add(reference, n.ToString()); expr = reference; break; } case 'k': { char delim = pattern[ptr++]; if (delim == '<') { delim = '>'; } else if (delim != '\'') { throw NewParseException("Malformed \\k<...> named backreference."); } string name = ParseName(); if (name == null || pattern[ptr] != delim) { throw NewParseException("Malformed \\k<...> named backreference."); } ++ptr; Reference reference = new Reference(IsIgnoreCase(options)); refs.Add(reference, name); expr = reference; break; } default: expr = null; break; } if (expr == null) { ptr = p; } return(expr); }
private Expression ParseSpecial(RegexOptions options) { int num = ptr; bool flag = IsECMAScript(options); Expression expression = null; switch (pattern[ptr++]) { case 'd': expression = new CharacterClass((!flag) ? Category.Digit : Category.EcmaDigit, negate: false); break; case 'w': expression = new CharacterClass((!flag) ? Category.Word : Category.EcmaWord, negate: false); break; case 's': expression = new CharacterClass((!flag) ? Category.WhiteSpace : Category.EcmaWhiteSpace, negate: false); break; case 'p': expression = new CharacterClass(ParseUnicodeCategory(), negate: false); break; case 'D': expression = new CharacterClass((!flag) ? Category.Digit : Category.EcmaDigit, negate: true); break; case 'W': expression = new CharacterClass((!flag) ? Category.Word : Category.EcmaWord, negate: true); break; case 'S': expression = new CharacterClass((!flag) ? Category.WhiteSpace : Category.EcmaWhiteSpace, negate: true); break; case 'P': expression = new CharacterClass(ParseUnicodeCategory(), negate: true); break; case 'A': expression = new PositionAssertion(Position.StartOfString); break; case 'Z': expression = new PositionAssertion(Position.End); break; case 'z': expression = new PositionAssertion(Position.EndOfString); break; case 'G': expression = new PositionAssertion(Position.StartOfScan); break; case 'b': expression = new PositionAssertion(Position.Boundary); break; case 'B': expression = new PositionAssertion(Position.NonBoundary); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ptr--; int num2 = ParseNumber(10, 1, 0); if (num2 < 0) { ptr = num; return(null); } Reference reference2 = new BackslashNumber(IsIgnoreCase(options), flag); refs.Add(reference2, num2.ToString()); expression = reference2; break; } case 'k': { char c = pattern[ptr++]; switch (c) { case '<': c = '>'; break; default: throw NewParseException("Malformed \\k<...> named backreference."); case '\'': break; } string text = ParseName(); if (text == null || pattern[ptr] != c) { throw NewParseException("Malformed \\k<...> named backreference."); } ptr++; Reference reference = new Reference(IsIgnoreCase(options)); refs.Add(reference, text); expression = reference; break; } default: expression = null; break; } if (expression == null) { ptr = num; } return(expression); }
private Expression ParseSpecial (RegexOptions options) { int p = ptr; bool ecma = IsECMAScript (options); Expression expr = null; switch (pattern[ptr ++]) { // categories case 'd': expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false); break; case 'w': expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false); break; case 's': expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false); break; case 'p': // this is odd - ECMAScript isn't supposed to support Unicode, // yet \p{..} compiles and runs under the MS implementation // identically to canonical mode. That's why I'm ignoring the // value of ecma here. expr = new CharacterClass (ParseUnicodeCategory (), false); break; case 'D': expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true); break; case 'W': expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true); break; case 'S': expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true); break; case 'P': expr = new CharacterClass (ParseUnicodeCategory (), true); break; // positions case 'A': expr = new PositionAssertion (Position.StartOfString); break; case 'Z': expr = new PositionAssertion (Position.End); break; case 'z': expr = new PositionAssertion (Position.EndOfString); break; case 'G': expr = new PositionAssertion (Position.StartOfScan); break; case 'b': expr = new PositionAssertion (Position.Boundary); break; case 'B': expr = new PositionAssertion (Position.NonBoundary); break; // references case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ptr --; int n = ParseNumber (10, 1, 0); if (n < 0) { ptr = p; return null; } // FIXME test if number is within number of assigned groups // this may present a problem for right-to-left matching Reference reference = new BackslashNumber (IsIgnoreCase (options), ecma); refs.Add (reference, n.ToString ()); expr = reference; break; } case 'k': { char delim = pattern[ptr ++]; if (delim == '<') delim = '>'; else if (delim != '\'') throw NewParseException ("Malformed \\k<...> named backreference."); string name = ParseName (); if (name == null || pattern[ptr] != delim) throw NewParseException ("Malformed \\k<...> named backreference."); ++ ptr; Reference reference = new Reference (IsIgnoreCase (options)); refs.Add (reference, name); expr = reference; break; } default: expr = null; break; } if (expr == null) ptr = p; return expr; }
private void ResolveReferences() { int num = 1; Hashtable hashtable = new Hashtable(); ArrayList arrayList = null; foreach (CapturingGroup cap in caps) { if (cap.Name == null) { hashtable.Add(num.ToString(), cap); cap.Index = num++; num_groups++; } } foreach (CapturingGroup cap2 in caps) { if (cap2.Name != null) { if (hashtable.Contains(cap2.Name)) { CapturingGroup capturingGroup3 = (CapturingGroup)hashtable[cap2.Name]; cap2.Index = capturingGroup3.Index; if (cap2.Index == num) { num++; } else if (cap2.Index > num) { arrayList.Add(cap2); } } else { if (char.IsDigit(cap2.Name[0])) { int num3 = 0; int num4 = ParseDecimal(cap2.Name, ref num3); if (num3 == cap2.Name.Length) { cap2.Index = num4; hashtable.Add(cap2.Name, cap2); num_groups++; if (num4 == num) { num++; } else { if (arrayList == null) { arrayList = new ArrayList(4); } arrayList.Add(cap2); } continue; } } string key = num.ToString(); while (hashtable.Contains(key)) { int num5 = ++num; key = num5.ToString(); } hashtable.Add(key, cap2); hashtable.Add(cap2.Name, cap2); cap2.Index = num++; num_groups++; } } } gap = num; if (arrayList != null) { HandleExplicitNumericGroups(arrayList); } foreach (Expression key2 in refs.Keys) { string text = (string)refs[key2]; if (!hashtable.Contains(text)) { if (!(key2 is CaptureAssertion) || char.IsDigit(text[0])) { BackslashNumber backslashNumber = key2 as BackslashNumber; if (backslashNumber == null || !backslashNumber.ResolveReference(text, hashtable)) { throw NewParseException("Reference to undefined group " + ((!char.IsDigit(text[0])) ? "name " : "number ") + text); } } } else { CapturingGroup capturingGroup4 = (CapturingGroup)hashtable[text]; if (key2 is Reference) { ((Reference)key2).CapturingGroup = capturingGroup4; } else if (key2 is CaptureAssertion) { ((CaptureAssertion)key2).CapturingGroup = capturingGroup4; } else if (key2 is BalancingGroup) { ((BalancingGroup)key2).Balance = capturingGroup4; } } } }