Exemplo n.º 1
0
        private void ResolveReferences()
        {
            int       gid  = 1;
            Hashtable dict = new Hashtable();
            ArrayList explicit_numeric_groups = null;

            // number unnamed groups

            foreach (CapturingGroup group in caps)
            {
                if (group.Name != null)
                {
                    continue;
                }

                dict.Add(gid.ToString(), group);
                group.Index = gid++;
                ++num_groups;
            }

            // number named groups

            foreach (CapturingGroup group in caps)
            {
                if (group.Name == null)
                {
                    continue;
                }

                if (dict.Contains(group.Name))
                {
                    CapturingGroup prev = (CapturingGroup)dict [group.Name];
                    group.Index = prev.Index;

                    if (group.Index == gid)
                    {
                        gid++;
                    }
                    else if (group.Index > gid)
                    {
                        explicit_numeric_groups.Add(group);
                    }
                    continue;
                }

                if (Char.IsDigit(group.Name [0]))
                {
                    int ptr       = 0;
                    int group_gid = ParseDecimal(group.Name, ref ptr);
                    if (ptr == group.Name.Length)
                    {
                        group.Index = group_gid;
                        dict.Add(group.Name, group);
                        ++num_groups;

                        if (group_gid == gid)
                        {
                            gid++;
                        }
                        else
                        {
                            // all numbers before 'gid' are already in the dictionary.  So, we know group_gid > gid
                            if (explicit_numeric_groups == null)
                            {
                                explicit_numeric_groups = new ArrayList(4);
                            }
                            explicit_numeric_groups.Add(group);
                        }

                        continue;
                    }
                }

                string gid_s = gid.ToString();
                while (dict.Contains(gid_s))
                {
                    gid_s = (++gid).ToString();
                }

                dict.Add(gid_s, group);
                dict.Add(group.Name, group);
                group.Index = gid++;
                ++num_groups;
            }

            gap = gid;             // == 1 + num_groups, if explicit_numeric_groups == null

            if (explicit_numeric_groups != null)
            {
                HandleExplicitNumericGroups(explicit_numeric_groups);
            }

            // resolve references

            foreach (Expression expr in refs.Keys)
            {
                string name = (string)refs [expr];
                if (!dict.Contains(name))
                {
                    if (expr is CaptureAssertion && !Char.IsDigit(name [0]))
                    {
                        continue;
                    }
                    BackslashNumber bn = expr as BackslashNumber;
                    if (bn != null && bn.ResolveReference(name, dict))
                    {
                        continue;
                    }
                    throw NewParseException("Reference to undefined group " +
                                            (Char.IsDigit(name[0]) ? "number " : "name ") +
                                            name);
                }

                CapturingGroup group = (CapturingGroup)dict[name];
                if (expr is Reference)
                {
                    ((Reference)expr).CapturingGroup = group;
                }
                else if (expr is CaptureAssertion)
                {
                    ((CaptureAssertion)expr).CapturingGroup = group;
                }
                else if (expr is BalancingGroup)
                {
                    ((BalancingGroup)expr).Balance = group;
                }
            }
        }
Exemplo n.º 2
0
        private Expression ParseSpecial(RegexOptions options)
        {
            int        p    = ptr;
            bool       ecma = IsECMAScript(options);
            Expression expr = null;

            switch (pattern[ptr++])
            {
            // categories

            case 'd':
                expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, false);
                break;

            case 'w':
                expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, false);
                break;

            case 's':
                expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
                break;

            case 'p':
                // this is odd - ECMAScript isn't supposed to support Unicode,
                // yet \p{..} compiles and runs under the MS implementation
                // identically to canonical mode. That's why I'm ignoring the
                // value of ecma here.

                expr = new CharacterClass(ParseUnicodeCategory(), false);
                break;

            case 'D':
                expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, true);
                break;

            case 'W':
                expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, true);
                break;

            case 'S':
                expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
                break;

            case 'P':
                expr = new CharacterClass(ParseUnicodeCategory(), true);
                break;

            // positions

            case 'A': expr = new PositionAssertion(Position.StartOfString); break;

            case 'Z': expr = new PositionAssertion(Position.End); break;

            case 'z': expr = new PositionAssertion(Position.EndOfString); break;

            case 'G': expr = new PositionAssertion(Position.StartOfScan); break;

            case 'b': expr = new PositionAssertion(Position.Boundary); break;

            case 'B': expr = new PositionAssertion(Position.NonBoundary); break;

            // references

            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9': {
                ptr--;
                int n = ParseNumber(10, 1, 0);
                if (n < 0)
                {
                    ptr = p;
                    return(null);
                }

                // FIXME test if number is within number of assigned groups
                // this may present a problem for right-to-left matching

                Reference reference = new BackslashNumber(IsIgnoreCase(options), ecma);
                refs.Add(reference, n.ToString());
                expr = reference;
                break;
            }

            case 'k': {
                char delim = pattern[ptr++];
                if (delim == '<')
                {
                    delim = '>';
                }
                else if (delim != '\'')
                {
                    throw NewParseException("Malformed \\k<...> named backreference.");
                }

                string name = ParseName();
                if (name == null || pattern[ptr] != delim)
                {
                    throw NewParseException("Malformed \\k<...> named backreference.");
                }

                ++ptr;
                Reference reference = new Reference(IsIgnoreCase(options));
                refs.Add(reference, name);
                expr = reference;
                break;
            }

            default:
                expr = null;
                break;
            }

            if (expr == null)
            {
                ptr = p;
            }

            return(expr);
        }
Exemplo n.º 3
0
        private Expression ParseSpecial(RegexOptions options)
        {
            int        num        = ptr;
            bool       flag       = IsECMAScript(options);
            Expression expression = null;

            switch (pattern[ptr++])
            {
            case 'd':
                expression = new CharacterClass((!flag) ? Category.Digit : Category.EcmaDigit, negate: false);
                break;

            case 'w':
                expression = new CharacterClass((!flag) ? Category.Word : Category.EcmaWord, negate: false);
                break;

            case 's':
                expression = new CharacterClass((!flag) ? Category.WhiteSpace : Category.EcmaWhiteSpace, negate: false);
                break;

            case 'p':
                expression = new CharacterClass(ParseUnicodeCategory(), negate: false);
                break;

            case 'D':
                expression = new CharacterClass((!flag) ? Category.Digit : Category.EcmaDigit, negate: true);
                break;

            case 'W':
                expression = new CharacterClass((!flag) ? Category.Word : Category.EcmaWord, negate: true);
                break;

            case 'S':
                expression = new CharacterClass((!flag) ? Category.WhiteSpace : Category.EcmaWhiteSpace, negate: true);
                break;

            case 'P':
                expression = new CharacterClass(ParseUnicodeCategory(), negate: true);
                break;

            case 'A':
                expression = new PositionAssertion(Position.StartOfString);
                break;

            case 'Z':
                expression = new PositionAssertion(Position.End);
                break;

            case 'z':
                expression = new PositionAssertion(Position.EndOfString);
                break;

            case 'G':
                expression = new PositionAssertion(Position.StartOfScan);
                break;

            case 'b':
                expression = new PositionAssertion(Position.Boundary);
                break;

            case 'B':
                expression = new PositionAssertion(Position.NonBoundary);
                break;

            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            {
                ptr--;
                int num2 = ParseNumber(10, 1, 0);
                if (num2 < 0)
                {
                    ptr = num;
                    return(null);
                }
                Reference reference2 = new BackslashNumber(IsIgnoreCase(options), flag);
                refs.Add(reference2, num2.ToString());
                expression = reference2;
                break;
            }

            case 'k':
            {
                char c = pattern[ptr++];
                switch (c)
                {
                case '<':
                    c = '>';
                    break;

                default:
                    throw NewParseException("Malformed \\k<...> named backreference.");

                case '\'':
                    break;
                }
                string text = ParseName();
                if (text == null || pattern[ptr] != c)
                {
                    throw NewParseException("Malformed \\k<...> named backreference.");
                }
                ptr++;
                Reference reference = new Reference(IsIgnoreCase(options));
                refs.Add(reference, text);
                expression = reference;
                break;
            }

            default:
                expression = null;
                break;
            }
            if (expression == null)
            {
                ptr = num;
            }
            return(expression);
        }
Exemplo n.º 4
0
		private Expression ParseSpecial (RegexOptions options) {
			int p = ptr;
			bool ecma = IsECMAScript (options);
			Expression expr = null;
			
			switch (pattern[ptr ++]) {

			// categories

			case 'd':
				expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false);
				break;
				
			case 'w':
				expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false);
				break;
				
			case 's':
				expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
				break;
				
			case 'p':
				// this is odd - ECMAScript isn't supposed to support Unicode,
				// yet \p{..} compiles and runs under the MS implementation
				// identically to canonical mode. That's why I'm ignoring the
				// value of ecma here.
			
				expr = new CharacterClass (ParseUnicodeCategory (), false);
				break;
				
			case 'D':
				expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true);
				break;
				
			case 'W':
				expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true);
				break;
				
			case 'S':
				expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
				break;
				
			case 'P':
				expr = new CharacterClass (ParseUnicodeCategory (), true);
				break;

			// positions

			case 'A': expr = new PositionAssertion (Position.StartOfString); break;
			case 'Z': expr = new PositionAssertion (Position.End); break;
			case 'z': expr = new PositionAssertion (Position.EndOfString); break;
			case 'G': expr = new PositionAssertion (Position.StartOfScan); break;
			case 'b': expr = new PositionAssertion (Position.Boundary); break;
			case 'B': expr = new PositionAssertion (Position.NonBoundary); break;
			
			// references

			case '1': case '2': case '3': case '4': case '5':
			case '6': case '7': case '8': case '9': {
				ptr --;
				int n = ParseNumber (10, 1, 0);
				if (n < 0) {
					ptr = p;
					return null;
				}

				// FIXME test if number is within number of assigned groups
				// this may present a problem for right-to-left matching

				Reference reference = new BackslashNumber (IsIgnoreCase (options), ecma);
				refs.Add (reference, n.ToString ());
				expr = reference;
				break;
			}

			case 'k': {
				char delim = pattern[ptr ++];
				if (delim == '<')
					delim = '>';
				else if (delim != '\'')
					throw NewParseException ("Malformed \\k<...> named backreference.");

				string name = ParseName ();
				if (name == null || pattern[ptr] != delim)
					throw NewParseException ("Malformed \\k<...> named backreference.");

				++ ptr;
				Reference reference = new Reference (IsIgnoreCase (options));
				refs.Add (reference, name);
				expr = reference;
				break;
			}

			default:
				expr = null;
				break;
			}

			if (expr == null)
				ptr = p;

			return expr;
		}
Exemplo n.º 5
0
        private void ResolveReferences()
        {
            int       num       = 1;
            Hashtable hashtable = new Hashtable();
            ArrayList arrayList = null;

            foreach (CapturingGroup cap in caps)
            {
                if (cap.Name == null)
                {
                    hashtable.Add(num.ToString(), cap);
                    cap.Index = num++;
                    num_groups++;
                }
            }
            foreach (CapturingGroup cap2 in caps)
            {
                if (cap2.Name != null)
                {
                    if (hashtable.Contains(cap2.Name))
                    {
                        CapturingGroup capturingGroup3 = (CapturingGroup)hashtable[cap2.Name];
                        cap2.Index = capturingGroup3.Index;
                        if (cap2.Index == num)
                        {
                            num++;
                        }
                        else if (cap2.Index > num)
                        {
                            arrayList.Add(cap2);
                        }
                    }
                    else
                    {
                        if (char.IsDigit(cap2.Name[0]))
                        {
                            int num3 = 0;
                            int num4 = ParseDecimal(cap2.Name, ref num3);
                            if (num3 == cap2.Name.Length)
                            {
                                cap2.Index = num4;
                                hashtable.Add(cap2.Name, cap2);
                                num_groups++;
                                if (num4 == num)
                                {
                                    num++;
                                }
                                else
                                {
                                    if (arrayList == null)
                                    {
                                        arrayList = new ArrayList(4);
                                    }
                                    arrayList.Add(cap2);
                                }
                                continue;
                            }
                        }
                        string key = num.ToString();
                        while (hashtable.Contains(key))
                        {
                            int num5 = ++num;
                            key = num5.ToString();
                        }
                        hashtable.Add(key, cap2);
                        hashtable.Add(cap2.Name, cap2);
                        cap2.Index = num++;
                        num_groups++;
                    }
                }
            }
            gap = num;
            if (arrayList != null)
            {
                HandleExplicitNumericGroups(arrayList);
            }
            foreach (Expression key2 in refs.Keys)
            {
                string text = (string)refs[key2];
                if (!hashtable.Contains(text))
                {
                    if (!(key2 is CaptureAssertion) || char.IsDigit(text[0]))
                    {
                        BackslashNumber backslashNumber = key2 as BackslashNumber;
                        if (backslashNumber == null || !backslashNumber.ResolveReference(text, hashtable))
                        {
                            throw NewParseException("Reference to undefined group " + ((!char.IsDigit(text[0])) ? "name " : "number ") + text);
                        }
                    }
                }
                else
                {
                    CapturingGroup capturingGroup4 = (CapturingGroup)hashtable[text];
                    if (key2 is Reference)
                    {
                        ((Reference)key2).CapturingGroup = capturingGroup4;
                    }
                    else if (key2 is CaptureAssertion)
                    {
                        ((CaptureAssertion)key2).CapturingGroup = capturingGroup4;
                    }
                    else if (key2 is BalancingGroup)
                    {
                        ((BalancingGroup)key2).Balance = capturingGroup4;
                    }
                }
            }
        }