private IExpression ParseOctalEscapeOrNumberedBackreference() { int endIndex = GetNumberEndIndex(); string numberString = regex.Substring(index, endIndex - index); index = endIndex; // swallow number int number = Int32.Parse(numberString); if (number >= 1 && number <= 9) { return(Backreference.For(number)); } else if (numberString[0] == '8' || numberString[0] == '9') { return(CharacterEscapes.Octal(numberString)); } else if (number >= 10) { if (number < groupNames.Count) { return(Backreference.For(number)); } else { return(CharacterEscapes.Octal(numberString)); } } // The Regex constructor to prevent backreferences to \0 and invalid octal codes throw new InvalidOperationException(); }
private IExpression ParseControlCharacterEscape() { ++index; // swallow 'c' char controlCharacter = regex[index]; ++index; // swallow control character return(CharacterEscapes.ControlCharacter(controlCharacter)); }
private IExpression ParseUnicodeCharacterEscape() { ++index; // swallow 'u' int endIndex = GetHexidecimalNumberEndIndex(Math.Min(index + 4, regex.Length)); string numberString = regex.Substring(index, endIndex - index); index = endIndex; // swallow code return(CharacterEscapes.Unicode(numberString)); }
private IExpression ParseHexidecimalEscape() { ++index; // swallow 'x' int endIndex = GetHexidecimalNumberEndIndex(Math.Min(index + 2, regex.Length)); string numberString = regex.Substring(index, endIndex - index); index = endIndex; // swallow hex number return(CharacterEscapes.Hexidecimal(numberString)); }
private IExpression ParseUnicodeCategory(bool isPositive) { ++index; // swallow 'p' or 'P' if (regex[index] != '{') { return(CharacterEscapes.For('p')); } ++index; // swallow { int endIndex = regex.IndexOf('}', index); if (endIndex == -1) { // The Regex constructor should prevent this from happening. throw new InvalidOperationException(); } string category = regex.Substring(index, endIndex - index); index = endIndex + 1; // swallow category and '}' return(new UnicodeCategory($@"\{(isPositive ? 'p' : 'P')}{{{category}}}")); }
private IExpression ParseEscapeSequenceInternal(ExpressionContext context) { ++index; char nextChar = regex[index]; switch (nextChar) { case 'A': return(ParseAnchor(Anchors.A)); case 'Z': return(ParseAnchor(Anchors.Z)); case 'z': return(ParseAnchor(Anchors.z)); case 'G': return(ParseAnchor(Anchors.G)); case 'b': return((context == ExpressionContext.CharacterGroup) ? ParseCharacterEscape(CharacterEscapes.Backspace) : ParseAnchor(Anchors.b)); case 'B': return(ParseAnchor(Anchors.B)); case 'k': return(ParseNamedBackreference()); case 'w': return(ParseCharacterClass(CharacterClasses.Word)); case 'W': return(ParseCharacterClass(CharacterClasses.NonWord)); case 's': return(ParseCharacterClass(CharacterClasses.Whitespace)); case 'S': return(ParseCharacterClass(CharacterClasses.NonWhitespace)); case 'd': return(ParseCharacterClass(CharacterClasses.Digit)); case 'D': return(ParseCharacterClass(CharacterClasses.NonDigit)); case 'p': return(ParseUnicodeCategory(isPositive: true)); case 'P': return(ParseUnicodeCategory(isPositive: false)); case 'a': return(ParseCharacterEscape(CharacterEscapes.Bell)); case 't': return(ParseCharacterEscape(CharacterEscapes.Tab)); case 'r': return(ParseCharacterEscape(CharacterEscapes.CarriageReturn)); case 'v': return(ParseCharacterEscape(CharacterEscapes.VerticalTab)); case 'f': return(ParseCharacterEscape(CharacterEscapes.FormFeed)); case 'n': return(ParseCharacterEscape(CharacterEscapes.NewLine)); case 'e': return(ParseCharacterEscape(CharacterEscapes.Escape)); case '.': return(ParseCharacterEscape(CharacterEscapes.Period)); case '$': return(ParseCharacterEscape(CharacterEscapes.Dollar)); case '^': return(ParseCharacterEscape(CharacterEscapes.Carot)); case '{': return(ParseCharacterEscape(CharacterEscapes.LeftCurlyBrace)); case '[': return(ParseCharacterEscape(CharacterEscapes.LeftSquareBracket)); case '(': return(ParseCharacterEscape(CharacterEscapes.LeftParenthesis)); case '|': return(ParseCharacterEscape(CharacterEscapes.Pipe)); case ')': return(ParseCharacterEscape(CharacterEscapes.RightParenthesis)); case '*': return(ParseCharacterEscape(CharacterEscapes.Asterisk)); case '+': return(ParseCharacterEscape(CharacterEscapes.Plus)); case '?': return(ParseCharacterEscape(CharacterEscapes.QuestionMark)); case '\\': return(ParseCharacterEscape(CharacterEscapes.Backslash)); case 'x': return(ParseHexidecimalEscape()); case 'c': return(ParseControlCharacterEscape()); case 'u': return(ParseUnicodeCharacterEscape()); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return(ParseOctalEscapeOrNumberedBackreference()); default: return(ParseCharacterEscape(CharacterEscapes.For(nextChar))); } }