Пример #1
0
        private Expression ParseCharacterClass(RegexOptions options)
        {
            bool negate = false;

            if (pattern[ptr] == '^')
            {
                negate = true;
                ++ptr;
            }

            bool           ecma = IsECMAScript(options);
            CharacterClass cls  = new CharacterClass(negate, IsIgnoreCase(options));

            if (pattern[ptr] == ']')
            {
                cls.AddCharacter(']');
                ++ptr;
            }

            int  c      = -1;
            int  last   = -1;
            bool range  = false;
            bool closed = false;

            while (ptr < pattern.Length)
            {
                c = pattern[ptr++];

                if (c == ']')
                {
                    closed = true;
                    break;
                }

                if (c == '-' && last >= 0 && !range)
                {
                    range = true;
                    continue;
                }

                if (c == '\\')
                {
                    c = ParseEscape();
                    if (c >= 0)
                    {
                        goto char_recognized;
                    }

                    // didn't recognize escape
                    c = pattern [ptr++];
                    switch (c)
                    {
                    case 'b':
                        c = '\b';
                        goto char_recognized;

                    case 'd':
                    case 'D':
                        cls.AddCategory(ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
                        break;

                    case 'w':
                    case 'W':
                        cls.AddCategory(ecma ? Category.EcmaWord : Category.Word, c == 'W');
                        break;

                    case 's':
                    case 'S':
                        cls.AddCategory(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
                        break;

                    case 'p':
                    case 'P':
                        cls.AddCategory(ParseUnicodeCategory(), c == 'P');                              // ignore ecma
                        break;

                    default:                                    // add escaped character
                        goto char_recognized;
                    }

                    // if the pattern looks like [a-\s] ...
                    if (range)
                    {
                        throw NewParseException("character range cannot have category \\" + c);
                    }

                    last = -1;
                    continue;
                }

char_recognized:
                if (range)
                {
                    // if 'range' is true, we know that 'last >= 0'
                    if (c < last)
                    {
                        throw NewParseException("[" + last + "-" + c + "] range in reverse order.");
                    }
                    cls.AddRange((char)last, (char)c);
                    last  = -1;
                    range = false;
                    continue;
                }

                cls.AddCharacter((char)c);
                last = c;
            }

            if (!closed)
            {
                throw NewParseException("Unterminated [] set.");
            }

            if (range)
            {
                cls.AddCharacter('-');
            }

            return(cls);
        }
Пример #2
0
		private Expression ParseCharacterClass (RegexOptions options) {
			bool negate = false;
			if (pattern[ptr] == '^') {
				negate = true;
				++ ptr;
			}
			
			bool ecma = IsECMAScript (options);
			CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));

			if (pattern[ptr] == ']') {
				cls.AddCharacter (']');
				++ ptr;
			}

			int c = -1;
			int last = -1;
			bool range = false;
			bool closed = false;
			while (ptr < pattern.Length) {
				c = pattern[ptr ++];

				if (c == ']') {
					closed = true;
					break;
				}

				if (c == '-' && last >= 0 && !range) {
					range = true;
					continue;
				}

				if (c == '\\') {
					c = ParseEscape (true);
					if (c >= 0)
						goto char_recognized;

					// didn't recognize escape
					c = pattern [ptr ++];
					switch (c) {
					case 'b':
						c = '\b';
						goto char_recognized;

					case 'd': case 'D':
						cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
						break;
						
					case 'w': case 'W':
						cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
						break;
						
					case 's': case 'S':
						cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
						break;
						
					case 'p': case 'P':
						cls.AddCategory (ParseUnicodeCategory (), c == 'P');	// ignore ecma
						break;

					default:		// add escaped character
						goto char_recognized;
					}

					// if the pattern looks like [a-\s] ...
					if (range)
						throw NewParseException ("character range cannot have category \\" + c);

					last = -1;
					continue;
				}

			char_recognized:
				if (range) {
					// if 'range' is true, we know that 'last >= 0'
					if (c < last)
						throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
					cls.AddRange ((char)last, (char)c);
					last = -1;
					range = false;
					continue;
				}

				cls.AddCharacter ((char)c);
				last = c;
			}

			if (!closed)
				throw NewParseException ("Unterminated [] set.");

			if (range)
				cls.AddCharacter ('-');

			return cls;
		}
Пример #3
0
        private Expression ParseCharacterClass(RegexOptions options)
        {
            bool negate = false;

            if (pattern[ptr] == '^')
            {
                negate = true;
                ptr++;
            }
            bool           flag           = IsECMAScript(options);
            CharacterClass characterClass = new CharacterClass(negate, IsIgnoreCase(options));

            if (pattern[ptr] == ']')
            {
                characterClass.AddCharacter(']');
                ptr++;
            }
            int  num   = -1;
            int  num2  = -1;
            bool flag2 = false;
            bool flag3 = false;

            while (ptr < pattern.Length)
            {
                num = pattern[ptr++];
                if (num == 93)
                {
                    flag3 = true;
                    break;
                }
                if (num == 45 && num2 >= 0 && !flag2)
                {
                    flag2 = true;
                    continue;
                }
                if (num == 92)
                {
                    num = ParseEscape();
                    if (num < 0)
                    {
                        num = pattern[ptr++];
                        switch (num)
                        {
                        case 98:
                            num = 8;
                            break;

                        case 68:
                        case 100:
                            characterClass.AddCategory((!flag) ? Category.Digit : Category.EcmaDigit, num == 68);
                            goto IL_01ec;

                        case 87:
                        case 119:
                            characterClass.AddCategory((!flag) ? Category.Word : Category.EcmaWord, num == 87);
                            goto IL_01ec;

                        case 83:
                        case 115:
                            characterClass.AddCategory((!flag) ? Category.WhiteSpace : Category.EcmaWhiteSpace, num == 83);
                            goto IL_01ec;

                        case 80:
                        case 112:
                        {
                            characterClass.AddCategory(ParseUnicodeCategory(), num == 80);
                            goto IL_01ec;
                        }
IL_01ec:
                            if (flag2)
                            {
                                throw NewParseException("character range cannot have category \\" + num);
                            }
                            num2 = -1;
                            continue;
                        }
                    }
                }
                if (flag2)
                {
                    if (num < num2)
                    {
                        throw NewParseException("[" + num2 + "-" + num + "] range in reverse order.");
                    }
                    characterClass.AddRange((char)num2, (char)num);
                    num2  = -1;
                    flag2 = false;
                }
                else
                {
                    characterClass.AddCharacter((char)num);
                    num2 = num;
                }
            }
            if (!flag3)
            {
                throw NewParseException("Unterminated [] set.");
            }
            if (flag2)
            {
                characterClass.AddCharacter('-');
            }
            return(characterClass);
        }
Пример #4
0
		private Expression ParseCharacterClass (RegexOptions options) {
			bool negate, ecma;
			if (pattern[ptr] == '^') {
				negate = true;
				++ ptr;
			}
			else
				negate = false;
			
			ecma = IsECMAScript (options);
			CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));

			if (pattern[ptr] == ']') {
				cls.AddCharacter (']');
				++ ptr;
			}

			int c = -1;
			int last = -1;
			bool range = false;
			bool closed = false;
			while (ptr < pattern.Length) {
				c = pattern[ptr ++];

				if (c == ']') {
					closed = true;
					break;
				}
				
				if (c == '-') {
					range = true;
					continue;
				}

				if (c == '\\') {
					c = ParseEscape ();
					if (c < 0) {
						// didn't recognize escape

						c = pattern[ptr ++];
						switch (c) {
						case 'b': c = '\b'; break;

						case 'd':
							cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
							last = -1;
							continue;
							
						case 'w':
							cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
							last = -1;
							continue;
							
						case 's':
							cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
							last = -1;
							continue;
							
						case 'p':
							cls.AddCategory (ParseUnicodeCategory (), false);	// ignore ecma
							last = -1;
							continue;
							
						case 'D':
							cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
							last = -1;
							continue;
							
						case 'W':
							cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
							last = -1;
							continue;
							
						case 'S':
							cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
							last = -1;
							continue;
							
						case 'P':
							cls.AddCategory (ParseUnicodeCategory (), true);
							last = -1;
							continue;

						default: break;		// add escaped character
						}
					}
				}

				if (range) {
					if (c < last)
						throw NewParseException ("[x-y] range in reverse order.");

					if (last >=0 )
						cls.AddRange ((char)last, (char)c);
					else {
						cls.AddCharacter ((char)c);
						cls.AddCharacter ('-');
					}

					range = false;
					last = -1;
				}
				else {
					cls.AddCharacter ((char)c);
					last = c;
				}
			}

			if (!closed)
				throw NewParseException ("Unterminated [] set.");

			if (range)
				cls.AddCharacter ('-');

			return cls;
		}
Пример #5
0
        private Expression ParseCharacterClass(RegexOptions options)
        {
            bool negate, ecma;

            if (pattern[ptr] == '^')
            {
                negate = true;
                ++ptr;
            }
            else
            {
                negate = false;
            }

            ecma = IsECMAScript(options);
            CharacterClass cls = new CharacterClass(negate, IsIgnoreCase(options));

            if (pattern[ptr] == ']')
            {
                cls.AddCharacter(']');
                ++ptr;
            }

            int  c      = -1;
            int  last   = -1;
            bool range  = false;
            bool closed = false;

            while (ptr < pattern.Length)
            {
                c = pattern[ptr++];

                if (c == ']')
                {
                    closed = true;
                    break;
                }

                if (c == '-')
                {
                    range = true;
                    continue;
                }

                if (c == '\\')
                {
                    c = ParseEscape();
                    if (c < 0)
                    {
                        // didn't recognize escape

                        c = pattern[ptr++];
                        switch (c)
                        {
                        case 'b': c = '\b'; break;

                        case 'd':
                            cls.AddCategory(ecma ? Category.EcmaDigit : Category.Digit, false);
                            last = -1;
                            continue;

                        case 'w':
                            cls.AddCategory(ecma ? Category.EcmaWord : Category.Word, false);
                            last = -1;
                            continue;

                        case 's':
                            cls.AddCategory(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
                            last = -1;
                            continue;

                        case 'p':
                            cls.AddCategory(ParseUnicodeCategory(), false);                                     // ignore ecma
                            last = -1;
                            continue;

                        case 'D':
                            cls.AddCategory(ecma ? Category.EcmaDigit : Category.Digit, true);
                            last = -1;
                            continue;

                        case 'W':
                            cls.AddCategory(ecma ? Category.EcmaWord : Category.Word, true);
                            last = -1;
                            continue;

                        case 'S':
                            cls.AddCategory(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
                            last = -1;
                            continue;

                        case 'P':
                            cls.AddCategory(ParseUnicodeCategory(), true);
                            last = -1;
                            continue;

                        default: break;                                 // add escaped character
                        }
                    }
                }

                if (range)
                {
                    if (c < last)
                    {
                        throw NewParseException("[x-y] range in reverse order.");
                    }

                    if (last >= 0)
                    {
                        cls.AddRange((char)last, (char)c);
                    }
                    else
                    {
                        cls.AddCharacter((char)c);
                        cls.AddCharacter('-');
                    }

                    range = false;
                    last  = -1;
                }
                else
                {
                    cls.AddCharacter((char)c);
                    last = c;
                }
            }

            if (!closed)
            {
                throw NewParseException("Unterminated [] set.");
            }

            if (range)
            {
                cls.AddCharacter('-');
            }

            return(cls);
        }