Ejemplo n.º 1
0
            private IExpression ParseOctalEscapeOrNumberedBackreference()
            {
                int    endIndex     = GetNumberEndIndex();
                string numberString = regex.Substring(index, endIndex - index);

                index = endIndex; // swallow number
                int number = Int32.Parse(numberString);

                if (number >= 1 && number <= 9)
                {
                    return(Backreference.For(number));
                }
                else if (numberString[0] == '8' || numberString[0] == '9')
                {
                    return(CharacterEscapes.Octal(numberString));
                }
                else if (number >= 10)
                {
                    if (number < groupNames.Count)
                    {
                        return(Backreference.For(number));
                    }
                    else
                    {
                        return(CharacterEscapes.Octal(numberString));
                    }
                }
                // The Regex constructor to prevent backreferences to \0 and invalid octal codes
                throw new InvalidOperationException();
            }
Ejemplo n.º 2
0
            private IExpression ParseControlCharacterEscape()
            {
                ++index; // swallow 'c'
                char controlCharacter = regex[index];

                ++index; // swallow control character
                return(CharacterEscapes.ControlCharacter(controlCharacter));
            }
Ejemplo n.º 3
0
            private IExpression ParseUnicodeCharacterEscape()
            {
                ++index; // swallow 'u'
                int    endIndex     = GetHexidecimalNumberEndIndex(Math.Min(index + 4, regex.Length));
                string numberString = regex.Substring(index, endIndex - index);

                index = endIndex; // swallow code
                return(CharacterEscapes.Unicode(numberString));
            }
Ejemplo n.º 4
0
            private IExpression ParseHexidecimalEscape()
            {
                ++index; // swallow 'x'
                int    endIndex     = GetHexidecimalNumberEndIndex(Math.Min(index + 2, regex.Length));
                string numberString = regex.Substring(index, endIndex - index);

                index = endIndex; // swallow hex number
                return(CharacterEscapes.Hexidecimal(numberString));
            }
Ejemplo n.º 5
0
            private IExpression ParseUnicodeCategory(bool isPositive)
            {
                ++index; // swallow 'p' or 'P'
                if (regex[index] != '{')
                {
                    return(CharacterEscapes.For('p'));
                }
                ++index; // swallow {
                int endIndex = regex.IndexOf('}', index);

                if (endIndex == -1)
                {
                    // The Regex constructor should prevent this from happening.
                    throw new InvalidOperationException();
                }
                string category = regex.Substring(index, endIndex - index);

                index = endIndex + 1; // swallow category and '}'
                return(new UnicodeCategory($@"\{(isPositive ? 'p' : 'P')}{{{category}}}"));
            }
Ejemplo n.º 6
0
            private IExpression ParseEscapeSequenceInternal(ExpressionContext context)
            {
                ++index;
                char nextChar = regex[index];

                switch (nextChar)
                {
                case 'A': return(ParseAnchor(Anchors.A));

                case 'Z': return(ParseAnchor(Anchors.Z));

                case 'z': return(ParseAnchor(Anchors.z));

                case 'G': return(ParseAnchor(Anchors.G));

                case 'b':
                    return((context == ExpressionContext.CharacterGroup)
                            ? ParseCharacterEscape(CharacterEscapes.Backspace)
                            : ParseAnchor(Anchors.b));

                case 'B': return(ParseAnchor(Anchors.B));

                case 'k': return(ParseNamedBackreference());

                case 'w': return(ParseCharacterClass(CharacterClasses.Word));

                case 'W': return(ParseCharacterClass(CharacterClasses.NonWord));

                case 's': return(ParseCharacterClass(CharacterClasses.Whitespace));

                case 'S': return(ParseCharacterClass(CharacterClasses.NonWhitespace));

                case 'd': return(ParseCharacterClass(CharacterClasses.Digit));

                case 'D': return(ParseCharacterClass(CharacterClasses.NonDigit));

                case 'p': return(ParseUnicodeCategory(isPositive: true));

                case 'P': return(ParseUnicodeCategory(isPositive: false));

                case 'a': return(ParseCharacterEscape(CharacterEscapes.Bell));

                case 't': return(ParseCharacterEscape(CharacterEscapes.Tab));

                case 'r': return(ParseCharacterEscape(CharacterEscapes.CarriageReturn));

                case 'v': return(ParseCharacterEscape(CharacterEscapes.VerticalTab));

                case 'f': return(ParseCharacterEscape(CharacterEscapes.FormFeed));

                case 'n': return(ParseCharacterEscape(CharacterEscapes.NewLine));

                case 'e': return(ParseCharacterEscape(CharacterEscapes.Escape));

                case '.': return(ParseCharacterEscape(CharacterEscapes.Period));

                case '$': return(ParseCharacterEscape(CharacterEscapes.Dollar));

                case '^': return(ParseCharacterEscape(CharacterEscapes.Carot));

                case '{': return(ParseCharacterEscape(CharacterEscapes.LeftCurlyBrace));

                case '[': return(ParseCharacterEscape(CharacterEscapes.LeftSquareBracket));

                case '(': return(ParseCharacterEscape(CharacterEscapes.LeftParenthesis));

                case '|': return(ParseCharacterEscape(CharacterEscapes.Pipe));

                case ')': return(ParseCharacterEscape(CharacterEscapes.RightParenthesis));

                case '*': return(ParseCharacterEscape(CharacterEscapes.Asterisk));

                case '+': return(ParseCharacterEscape(CharacterEscapes.Plus));

                case '?': return(ParseCharacterEscape(CharacterEscapes.QuestionMark));

                case '\\': return(ParseCharacterEscape(CharacterEscapes.Backslash));

                case 'x': return(ParseHexidecimalEscape());

                case 'c': return(ParseControlCharacterEscape());

                case 'u': return(ParseUnicodeCharacterEscape());

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    return(ParseOctalEscapeOrNumberedBackreference());

                default: return(ParseCharacterEscape(CharacterEscapes.For(nextChar)));
                }
            }