/// <summary> /// Checks the closing paren. /// </summary> /// <param name="buffer">The buffer.</param> private void CheckClosingParen(RegexBuffer buffer) { // check for closing ")" char current; try { current = buffer.Current; } catch (Exception e) { // no closing brace. Set highlight for this capture... buffer.ErrorLocation = this.startLocation; buffer.ErrorLength = 1; throw new Exception( "Missing closing \')\' in capture", e); } if (current != ')') { throw new Exception($"Unterminated closure at offset {buffer.Offset}"); } buffer.Offset++; // eat closing parenthesis }
/// <summary> /// Checks the named. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckNamed(RegexBuffer buffer) { // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex( @" ^ # anchor to start of string \?(\<|') # ?< or ?' (?<Name>[a-zA-Z0-9]+?) # Capture name (\>|') # ?> or ?' (?<Rest>.+) # The rest of the string ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = $"Capture to <{match.Groups["Name"]}>"; // advance buffer to the rest of the expression buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); } return(false); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharClass"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharClass(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format(match.Groups["Negated"].ToString() == "^" ? "Any character not in \"{0}\"" : "Any character in \"{0}\"", match.Groups["Class"]); buffer.Offset += match.Groups[0].Length; } else { this.description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCapture"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCapture(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer), "RegexBuffer is null"); } this.startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.Clearinseries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = this.CheckNamed(buffer); if (!decoded) { decoded = this.CheckBalancedGroup(buffer); } if (!decoded) { decoded = this.CheckNonCapturing(buffer); } if (!decoded) { decoded = this.CheckOptions(buffer); } if (!decoded) { decoded = this.CheckLookahead(buffer); } if (!decoded) { decoded = this.CheckNonBacktracking(buffer); } if (!decoded) { this.CheckConditional(buffer); } } else { // plain old capture... if (!this.HandlePlainOldCapture(buffer)) { throw new Exception($"Unrecognized capture: {buffer.String}"); } } buffer.AddLookup(this, this.startLocation, buffer.Offset - 1); }
/// <summary> /// Checks the balanced group. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckBalancedGroup(RegexBuffer buffer) { // look for ?<Name1-Name2> or ?'Name1-Name2' syntax... // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex( @" ^ # anchor to start of string \?[\<|'] # ?< or ?' (?<Name1>[a-zA-Z]+?) # Capture name1 - (?<Name2>[a-zA-Z]+?) # Capture name2 [\>|'] # ?> or ?' (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = $"Balancing Group <{match.Groups["Name1"]}>-<{match.Groups["Name2"]}>"; buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); } return(false); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharClass"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharClass(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer), "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format(match.Groups["Negated"].ToString() == "^" ? "Any character not in \"{0}\"" : "Any character in \"{0}\"", match.Groups["Class"]); buffer.Offset += match.Groups[0].Length; } else { this.description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCapture"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCapture(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } this.startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.Clearinseries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = this.CheckNamed(buffer); if (!decoded) { decoded = this.CheckBalancedGroup(buffer); } if (!decoded) { decoded = this.CheckNonCapturing(buffer); } if (!decoded) { decoded = this.CheckOptions(buffer); } if (!decoded) { decoded = this.CheckLookahead(buffer); } if (!decoded) { decoded = this.CheckNonBacktracking(buffer); } if (!decoded) { this.CheckConditional(buffer); } } else { // plain old capture... if (!this.HandlePlainOldCapture(buffer)) { throw new Exception(string.Format("Unrecognized capture: {0}", buffer.String)); } } buffer.AddLookup(this, this.startLocation, buffer.Offset - 1); }
// eat the whole comment until the end of line... private static void EatComment(RegexBuffer buffer) { while (buffer.Current != '\r') { buffer.MoveNext(); } }
/// <summary> /// Checks the non backtracking. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckNonBacktracking(RegexBuffer buffer) { // Look for non-backtracking sub-expression ?> Regex regex = new Regex( @" ^ # anchor to start of string \?\> (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = "Non-backtracking subexpression"; buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); } return(false); }
/// <summary> /// Decodes the escape. /// </summary> /// <param name="buffer">The buffer.</param> private void DecodeEscape(RegexBuffer buffer) { buffer.MoveNext(); this.character = (string)Escaped[buffer.Current]; if (this.character == null) { bool decoded = this.CheckBackReference(buffer); if (!decoded) { // TODO: Handle other items below: switch (buffer.Current) { case 'u': buffer.MoveNext(); string unicode = buffer.String.Substring(0, 4); this.character = "Unicode " + unicode; buffer.Offset += 4; break; case ' ': this.character = "' ' (space)"; this.special = false; buffer.MoveNext(); break; case 'c': buffer.MoveNext(); this.character = "CTRL-" + buffer.Current; buffer.MoveNext(); break; case 'x': buffer.MoveNext(); string number = buffer.String.Substring(0, 2); this.character = "Hex " + number; buffer.Offset += 2; break; default: this.character = new string(buffer.Current, 1); this.special = false; buffer.MoveNext(); break; } } } else { this.special = true; buffer.MoveNext(); } }
/// <summary> /// Handles the plain old capture. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool HandlePlainOldCapture(RegexBuffer buffer) { // we're already at the expression. Just create a new // expression, and make sure that we're at a ")" when // we're done if (buffer.ExplicitCapture) { this.description = "Non-capturing Group"; } this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); }
/// <summary> /// Parses the specified buffer. /// </summary> /// <param name="buffer">The buffer.</param> private void Parse(RegexBuffer buffer) { while (!buffer.AtEnd) { // if this regex ignores whitespace, we need to ignore these if (buffer.IgnorePatternWhitespace && ((buffer.Current == ' ') || (buffer.Current == '\r') || (buffer.Current == '\n') || (buffer.Current == '\t'))) { buffer.MoveNext(); } else { switch (buffer.Current) { case '(': this.items.Add(new RegexCapture(buffer)); break; case ')': // end of closure; just return. return; case '[': this.items.Add(new RegexCharClass(buffer)); break; case '{': this.items.Add(new RegexQuantifier(buffer)); break; case '|': this.items.Add(new RegexAlternate(buffer)); break; case '\\': this.items.Add(new RegexCharacter(buffer)); break; case '#': if (buffer.IgnorePatternWhitespace) { EatComment(buffer); } else { this.items.Add(new RegexCharacter(buffer)); } break; default: this.items.Add(new RegexCharacter(buffer)); break; } } } }
private void InterpretRegEx() { this.buffer = new RegexBuffer(this.textBoxRegex.Text) { RegexOptions = this.CreateRegexOptions() }; try { RegexExpression exp = new RegexExpression(this.buffer); this.textBoxInterpretation.Text = exp.ToString(0); this.textBoxInterpretation.ForeColor = Color.Black; } catch (Exception ex) { this.textBoxInterpretation.Text = "We have a situation...\r\n\r\n (" + ex.Message + ")"; this.textBoxRegex.Focus(); this.textBoxInterpretation.ForeColor = Color.Gray; } }
/// <summary> /// Initializes a new instance of the <see cref="RegexConditional"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexConditional(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } this.startLocation = buffer.Offset; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); this.yesNo = new RegexExpression(buffer); this.CheckClosingParen(buffer); buffer.AddLookup(this, this.startLocation, buffer.Offset - 1); }
/// <summary> /// Initializes a new instance of the <see cref="RegexQuantifier"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexQuantifier(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; buffer.MoveNext(); // look for "n}", "n,}", or "n,m}" Regex regex = new Regex(@"(?<n>\d+)(?<Comma>,?)(?<m>\d*)\}"); Match match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["m"].Length != 0) { this.description = string.Format("At least {0}, but not more than {1} times", match.Groups["n"], match.Groups["m"]); } else if (match.Groups["Comma"].Length != 0) { this.description = string.Format("At least {0} times", match.Groups["n"]); } else { this.description = string.Format("Exactly {0} times", match.Groups["n"]); } buffer.Offset += match.Groups[0].Length; if (!buffer.AtEnd && buffer.Current == '?') { this.description += " (non-greedy)"; buffer.MoveNext(); } } else { this.description = "missing '}' in quantifier"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
private bool CheckLookahead(RegexBuffer buffer) { Regex regex = new Regex( @" ^ # anchor to start of string \? (?<Assertion><=|<!|=|!) # assertion char (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { switch (match.Groups["Assertion"].Value) { case "=": this.description = "zero-width positive lookahead"; break; case "!": this.description = "zero-width negative lookahead"; break; case "<=": this.description = "zero-width positive lookbehind"; break; case "<!": this.description = "zero-width negative lookbehind"; break; } buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); } return(false); }
/// <summary> /// Checks the back reference. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckBackReference(RegexBuffer buffer) { // look for \k<name> Regex regex = new Regex( @" k\<(?<Name>.+?)\> ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.special = true; this.character = string.Format("Backreference to match: {0}", match.Groups["Name"]); buffer.Offset += match.Groups[0].Length; return(true); } return(false); }
/// <summary> /// Checks the conditional. /// </summary> /// <param name="buffer">The buffer.</param> private void CheckConditional(RegexBuffer buffer) { // Look for conditional (?(name)yesmatch|nomatch) // (name can also be an expression) Regex regex = new Regex( @" ^ # anchor to start of string \?\( (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = "Conditional Subexpression"; buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexConditional(buffer); } }
/// <summary> /// Checks the options. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckOptions(RegexBuffer buffer) { // look for ?imnsx-imnsx: Regex regex = new Regex( @" ^ # anchor to start of string \?(?<Options>[imnsx-]+): ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { string option = match.Groups["Options"].Value; this.description = $"Set options to {OptionNames[option]}"; this.expression = null; buffer.Offset += match.Groups[0].Length; return(true); } return(false); }
/// <summary> /// Checks the closing paren. /// </summary> /// <param name="buffer">The buffer.</param> private void CheckClosingParen(RegexBuffer buffer) { // check for closing ")" char current; try { current = buffer.Current; } catch (Exception e) { // no closing brace. Set highlight for this capture... buffer.ErrorLocation = this.startLocation; buffer.ErrorLength = 1; throw new Exception(string.Format("Missing closing ')' in capture"), e); } if (current != ')') { throw new Exception(string.Format("Unterminated closure at offset {0}", buffer.Offset)); } // eat closing parenthesis buffer.Offset++; }
/// <summary> /// Checks the options. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckOptions(RegexBuffer buffer) { // look for ?imnsx-imnsx: Regex regex = new Regex( @" ^ # anchor to start of string \?(?<Options>[imnsx-]+): ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { string option = match.Groups["Options"].Value; this.description = string.Format("Set options to {0}", OptionNames[option]); this.expression = null; buffer.Offset += match.Groups[0].Length; return true; } return false; }
/// <summary> /// Initializes a new instance of the <see cref="RegexExpression"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexExpression(RegexBuffer buffer) { this.Parse(buffer); }
/// <summary> /// Initializes a new instance of the <see cref="RegexCharacter"/> class. /// </summary> /// <param name="buffer">The buffer.</param> public RegexCharacter(RegexBuffer buffer) { if (buffer == null) { throw new ArgumentNullException("buffer", "RegexBuffer is null"); } int startLoc = buffer.Offset; bool quantifier = false; switch (buffer.Current) { case '.': this.character = ". (any character)"; buffer.MoveNext(); this.special = true; break; case '+': this.character = "+ (one or more times)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '*': this.character = "* (zero or more times)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '?': this.character = "? (zero or one time)"; buffer.MoveNext(); this.special = true; quantifier = true; break; case '^': this.character = "^ (anchor to start of string)"; buffer.MoveNext(); break; case '$': this.character = "$ (anchor to end of string)"; buffer.MoveNext(); break; case ' ': this.character = "' ' (space)"; buffer.MoveNext(); break; case '\\': this.DecodeEscape(buffer); break; default: this.character = buffer.Current.ToString(CultureInfo.CurrentCulture); buffer.MoveNext(); this.special = false; break; } if (quantifier) { if (!buffer.AtEnd && buffer.Current == '?') { this.character += " (non-greedy)"; buffer.MoveNext(); } } buffer.AddLookup(this, startLoc, buffer.Offset - 1, this.character.Length == 1); }
private bool CheckLookahead(RegexBuffer buffer) { Regex regex = new Regex( @" ^ # anchor to start of string \? (?<Assertion><=|<!|=|!) # assertion char (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { switch (match.Groups["Assertion"].Value) { case "=": this.description = "zero-width positive lookahead"; break; case "!": this.description = "zero-width negative lookahead"; break; case "<=": this.description = "zero-width positive lookbehind"; break; case "<!": this.description = "zero-width negative lookbehind"; break; } buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; } return false; }
/// <summary> /// Checks the balanced group. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckBalancedGroup(RegexBuffer buffer) { // look for ?<Name1-Name2> or ?'Name1-Name2' syntax... // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex( @" ^ # anchor to start of string \?[\<|'] # ?< or ?' (?<Name1>[a-zA-Z]+?) # Capture name1 - (?<Name2>[a-zA-Z]+?) # Capture name2 [\>|'] # ?> or ?' (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format("Balancing Group <{0}>-<{1}>", match.Groups["Name1"], match.Groups["Name2"]); buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; } return false; }
/// <summary> /// Checks the back reference. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckBackReference(RegexBuffer buffer) { // look for \k<name> Regex regex = new Regex( @" k\<(?<Name>.+?)\> ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.special = true; this.character = string.Format("Backreference to match: {0}", match.Groups["Name"]); buffer.Offset += match.Groups[0].Length; return true; } return false; }
/// <summary> /// Checks the conditional. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckConditional(RegexBuffer buffer) { // Look for conditional (?(name)yesmatch|nomatch) // (name can also be an expression) Regex regex = new Regex( @" ^ # anchor to start of string \?\( (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format("Conditional Subexpression"); buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexConditional(buffer); return true; } return false; }
/// <summary> /// Checks the named. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckNamed(RegexBuffer buffer) { // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex( @" ^ # anchor to start of string \?(\<|') # ?< or ?' (?<Name>[a-zA-Z0-9]+?) # Capture name (\>|') # ?> or ?' (?<Rest>.+) # The rest of the string ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format("Capture to <{0}>", match.Groups["Name"]); // advance buffer to the rest of the expression buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; } return false; }
/// <summary> /// Handles the plain old capture. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool HandlePlainOldCapture(RegexBuffer buffer) { // we're already at the expression. Just create a new // expression, and make sure that we're at a ")" when // we're done if (buffer.ExplicitCapture) { this.description = string.Format("Non-capturing Group"); } this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; }
/// <summary> /// Checks the non backtracking. /// </summary> /// <param name="buffer">The buffer.</param> /// <returns>bool</returns> private bool CheckNonBacktracking(RegexBuffer buffer) { // Look for non-backtracking sub-expression ?> Regex regex = new Regex( @" ^ # anchor to start of string \?\> (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { this.description = string.Format("Non-backtracking subexpression"); buffer.Offset += match.Groups["Rest"].Index; this.expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; } return false; }