/// <summary> /// Initializes a new instance of the <see cref="RegexCapture"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCapture(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer), "RegexBuffer is null"); } this.startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.Clearinseries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = this.CheckNamed(buffer); if (!decoded) { decoded = this.CheckBalancedGroup(buffer); } if (!decoded) { decoded = this.CheckNonCapturing(buffer); } if (!decoded) { decoded = this.CheckOptions(buffer); } if (!decoded) { decoded = this.CheckLookahead(buffer); } if (!decoded) { decoded = this.CheckNonBacktracking(buffer); } if (!decoded) { this.CheckConditional(buffer); } } else { // plain old capture... if (!this.HandlePlainOldCapture(buffer)) { throw new Exception($"Unrecognized capture: {buffer.String}"); } } buffer.AddLookup(this, this.startLocation, buffer.Offset - 1); }
// eat the whole comment until the end of line... private static void EatComment(RegexBuffer buffer) { while (buffer.Current != '\r') { buffer.MoveNext(); } }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharClass"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharClass(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer), "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format(match.Groups["Negated"].ToString() == "^" ? "Any character not in \"{0}\"" : "Any character in \"{0}\"", match.Groups["Class"]); buffer.Offset += match.Groups[0].Length; } else { this.description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharClass"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharClass(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format(match.Groups["Negated"].ToString() == "^" ? "Any character not in \"{0}\"" : "Any character in \"{0}\"", match.Groups["Class"]); buffer.Offset += match.Groups[0].Length; } else { this.description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCapture"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCapture(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } this.startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.Clearinseries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = this.CheckNamed(buffer); if (!decoded) { decoded = this.CheckBalancedGroup(buffer); } if (!decoded) { decoded = this.CheckNonCapturing(buffer); } if (!decoded) { decoded = this.CheckOptions(buffer); } if (!decoded) { decoded = this.CheckLookahead(buffer); } if (!decoded) { decoded = this.CheckNonBacktracking(buffer); } if (!decoded) { this.CheckConditional(buffer); } } else { // plain old capture... if (!this.HandlePlainOldCapture(buffer)) { throw new Exception(string.Format("Unrecognized capture: {0}", buffer.String)); } } buffer.AddLookup(this, this.startLocation, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexQuantifier"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexQuantifier(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); // look for "n}", "n,}", or "n,m}" Regex regex = new Regex(@"(?<n>\d+)(?<Comma>,?)(?<m>\d*)\}"); Match match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["m"].Length != 0) { this.description = string.Format("At least {0}, but not more than {1} times", match.Groups["n"], match.Groups["m"]); } else if (match.Groups["Comma"].Length != 0) { this.description = string.Format("At least {0} times", match.Groups["n"]); } else { this.description = string.Format("Exactly {0} times", match.Groups["n"]); } buffer.Offset += match.Groups[0].Length; if (!buffer.AtEnd && buffer.Current == '?') { this.description += " (non-greedy)"; buffer.MoveNext(); } } else { this.description = "missing '}' in quantifier"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
/// <summary> /// Decodes the escape. /// </summary> /// <param name="buffer">The buffer.</param> private void DecodeEscape(RegexBuffer buffer) { buffer.MoveNext(); this.character = (string)Escaped[buffer.Current]; if (this.character == null) { bool decoded = this.CheckBackReference(buffer); if (!decoded) { // TODO: Handle other items below: switch (buffer.Current) { case 'u': buffer.MoveNext(); string unicode = buffer.String.Substring(0, 4); this.character = "Unicode " + unicode; buffer.Offset += 4; break; case ' ': this.character = "' ' (space)"; this.special = false; buffer.MoveNext(); break; case 'c': buffer.MoveNext(); this.character = "CTRL-" + buffer.Current; buffer.MoveNext(); break; case 'x': buffer.MoveNext(); string number = buffer.String.Substring(0, 2); this.character = "Hex " + number; buffer.Offset += 2; break; default: this.character = new string(buffer.Current, 1); this.special = false; buffer.MoveNext(); break; } } } else { this.special = true; buffer.MoveNext(); } }
/// <summary> /// Parses the specified buffer. /// </summary> /// <param name="buffer">The buffer.</param> private void Parse(RegexBuffer buffer) { while (!buffer.AtEnd) { // if this regex ignores whitespace, we need to ignore these if (buffer.IgnorePatternWhitespace && ((buffer.Current == ' ') || (buffer.Current == '\r') || (buffer.Current == '\n') || (buffer.Current == '\t'))) { buffer.MoveNext(); } else { switch (buffer.Current) { case '(': this.items.Add(new RegexCapture(buffer)); break; case ')': // end of closure; just return. return; case '[': this.items.Add(new RegexCharClass(buffer)); break; case '{': this.items.Add(new RegexQuantifier(buffer)); break; case '|': this.items.Add(new RegexAlternate(buffer)); break; case '\\': this.items.Add(new RegexCharacter(buffer)); break; case '#': if (buffer.IgnorePatternWhitespace) { EatComment(buffer); } else { this.items.Add(new RegexCharacter(buffer)); } break; default: this.items.Add(new RegexCharacter(buffer)); break; } } } }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharacter"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharacter(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; bool quantifier = false; switch (buffer.Current) { case '.': this.character = ". (any character)"; buffer.MoveNext(); this.special = true; break; case '+': this.character = "+ (one or more times)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '*': this.character = "* (zero or more times)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '?': this.character = "? (zero or one time)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '^': this.character = "^ (anchor to start of string)"; buffer.MoveNext(); break; case '$': this.character = "$ (anchor to end of string)"; buffer.MoveNext(); break; case ' ': this.character = "' ' (space)"; buffer.MoveNext(); break; case '\\': this.DecodeEscape(buffer); break; default: this.character = buffer.Current.ToString(CultureInfo.CurrentCulture); buffer.MoveNext(); this.special = false; break; } if (quantifier) { if (!buffer.AtEnd && buffer.Current == '?') { this.character += " (non-greedy)"; buffer.MoveNext(); } } buffer.AddLookup(this, startLoc, buffer.Offset - 1, this.character.Length == 1); }