public RegexCharClass(RegexBuffer buffer) { int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex; Match match; regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["Negated"].ToString() == "^") { description = String.Format("Any character not in \"{0}\"", match.Groups["Class"]); } else { description = String.Format("Any character in \"{0}\"", match.Groups["Class"]); } buffer.Offset += match.Groups[0].Length; } else { description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
void CheckClosingParen(RegexBuffer buffer) { // check for closing ")" char current = ' '; try { current = buffer.Current; } // no closing brace. Set highlight for this capture... catch (Exception e) { buffer.ErrorLocation = startLocation; buffer.ErrorLength = 1; throw new Exception( String.Format("Missing closing ')' in capture"), e); } if (current != ')') { throw new Exception( String.Format("Unterminated closure at offset {0}", buffer.Offset)); } buffer.Offset++; // eat closing parenthesis }
bool CheckBalancedGroup(RegexBuffer buffer) { // look for ?<Name1-Name2> or ?'Name1-Name2' syntax... // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex(@" ^ # anchor to start of string \?[\<|'] # ?< or ?' (?<Name1>[a-zA-Z]+?) # Capture name1 - (?<Name2>[a-zA-Z]+?) # Capture name2 [\>|'] # ?> or ?' (?<Rest>.+) # The rest of the expression " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Balancing Group <{0}>-<{1}>", match.Groups["Name1"], match.Groups["Name2"]); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return(true); } return(false); }
string Interpret(string regex) { RegexBuffer buffer = new RegexBuffer(regex); RegexExpression expression = new RegexExpression(buffer); string output = expression.ToString(0); return output; }
// eat the whole comment until the end of line... void EatComment(RegexBuffer buffer) { while (buffer.Current != '\r') { buffer.MoveNext(); } }
bool CheckNamed(RegexBuffer buffer) { Regex regex; Match match; // look for ?<Name> or ?'Name' syntax... regex = new Regex(@" ^ # anchor to start of string \?(\<|') # ?< or ?' (?<Name>[a-zA-Z0-9]+?) # Capture name (\>|') # ?> or ?' (?<Rest>.+) # The rest of the string " , RegexOptions.IgnorePatternWhitespace); match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Capture to <{0}>", match.Groups["Name"]); // advance buffer to the rest of the expression buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return(true); } return(false); }
void Parse(RegexBuffer buffer) { while (!buffer.AtEnd) { // if this regex ignores whitespace, we need to ignore these if (buffer.IgnorePatternWhitespace && ((buffer.Current == ' ') || (buffer.Current == '\r') || (buffer.Current == '\n') || (buffer.Current == '\t'))) { buffer.MoveNext(); } else { switch (buffer.Current) { case '(': items.Add(new RegexCapture(buffer)); break; case ')': // end of closure; just return. return; case '[': items.Add(new RegexCharClass(buffer)); break; case '{': items.Add(new RegexQuantifier(buffer)); break; case '|': items.Add(new RegexAlternate(buffer)); break; case '\\': items.Add(new RegexCharacter(buffer)); break; case '#': if (buffer.IgnorePatternWhitespace) { EatComment(buffer); } else { items.Add(new RegexCharacter(buffer)); } break; default: items.Add(new RegexCharacter(buffer)); break; } } } }
string Interpret(string regex) { RegexBuffer buffer = new RegexBuffer(regex); RegexExpression expression = new RegexExpression(buffer); string output = expression.ToString(0); return(output); }
public RegexCapture(RegexBuffer buffer) { startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.ClearInSeries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = CheckNamed(buffer); if (!decoded) { decoded = CheckBalancedGroup(buffer); } if (!decoded) { decoded = CheckNonCapturing(buffer); } if (!decoded) { decoded = CheckOptions(buffer); } if (!decoded) { decoded = CheckLookahead(buffer); } if (!decoded) { decoded = CheckNonBacktracking(buffer); } if (!decoded) { decoded = CheckConditional(buffer); } } else // plain old capture... { if (!HandlePlainOldCapture(buffer)) { throw new Exception( String.Format("Unrecognized capture: {0}", buffer.String)); } } buffer.AddLookup(this, startLocation, buffer.Offset - 1); }
void DecodeEscape(RegexBuffer buffer) { buffer.MoveNext(); character = (string)escaped[buffer.Current]; if (character == null) { bool decoded = false; decoded = CheckBackReference(buffer); if (!decoded) { // TODO: Handle other items below: switch (buffer.Current) { case 'u': buffer.MoveNext(); string unicode = buffer.String.Substring(0, 4); character = "Unicode " + unicode; buffer.Offset += 4; break; case ' ': character = "' ' (space)"; special = false; buffer.MoveNext(); break; case 'c': buffer.MoveNext(); character = "CTRL-" + buffer.Current; buffer.MoveNext(); break; case 'x': buffer.MoveNext(); string number = buffer.String.Substring(0, 2); character = "Hex " + number; buffer.Offset += 2; break; default: character = new String(buffer.Current, 1); special = false; buffer.MoveNext(); break; } } } else { special = true; buffer.MoveNext(); } }
// Handle (?(expression)yes|no) // when we get called, we're pointing to the first character of the expression public RegexConditional(RegexBuffer buffer) { startLocation = buffer.Offset; expression = new RegexExpression(buffer); CheckClosingParen(buffer); yesNo = new RegexExpression(buffer); CheckClosingParen(buffer); buffer.AddLookup(this, startLocation, buffer.Offset - 1); }
bool HandlePlainOldCapture(RegexBuffer buffer) { // we're already at the expression. Just create a new // expression, and make sure that we're at a ")" when // we're done if (buffer.ExplicitCapture) { description = String.Format("Non-capturing Group"); } expression = new RegexExpression(buffer); CheckClosingParen(buffer); return(true); }
public RegexQuantifier(RegexBuffer buffer) { int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex; Match match; // look for "n}", "n,}", or "n,m}" regex = new Regex(@"(?<n>\d+)(?<Comma>,?)(?<m>\d*)\}"); match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["m"].Length != 0) { description = String.Format("At least {0}, but not more than {1} times", match.Groups["n"], match.Groups["m"]); } else if (match.Groups["Comma"].Length != 0) { description = String.Format("At least {0} times", match.Groups["n"]); } else { description = String.Format("Exactly {0} times", match.Groups["n"]); } buffer.Offset += match.Groups[0].Length; if (!buffer.AtEnd && buffer.Current == '?') { description += " (non-greedy)"; buffer.MoveNext(); } } else { description = "missing '}' in quantifier"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
bool CheckBackReference(RegexBuffer buffer) { // look for \k<name> Regex regex = new Regex(@" k\<(?<Name>.+?)\> " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { special = true; this.character = String.Format("Backreference to match: {0}", match.Groups["Name"]); buffer.Offset += match.Groups[0].Length; return(true); } return(false); }
bool CheckLookahead(RegexBuffer buffer) { Regex regex = new Regex(@" ^ # anchor to start of string \? (?<Assertion><=|<!|=|!) # assertion char (?<Rest>.+) # The rest of the expression " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { switch (match.Groups["Assertion"].Value) { case "=": description = "zero-width positive lookahead"; break; case "!": description = "zero-width negative lookahead"; break; case "<=": description = "zero-width positive lookbehind"; break; case "<!": description = "zero-width negative lookbehind"; break; } buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return(true); } return(false); }
bool CheckOptions(RegexBuffer buffer) { // look for ?imnsx-imnsx: Regex regex = new Regex(@" ^ # anchor to start of string \?(?<Options>[imnsx-]+): " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { string option = match.Groups["Options"].Value; description = String.Format("Set options to {0}", optionNames[option]); expression = null; buffer.Offset += match.Groups[0].Length; return(true); } return(false); }
bool CheckNonBacktracking(RegexBuffer buffer) { // Look for non-backtracking sub-expression ?> Regex regex = new Regex(@" ^ # anchor to start of string \?\> (?<Rest>.+) # The rest of the expression " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Non-backtracking subexpressio"); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return(true); } return(false); }
bool CheckConditional(RegexBuffer buffer) { // Look for conditional (?(name)yesmatch|nomatch) // (name can also be an expression) Regex regex = new Regex(@" ^ # anchor to start of string \?\( (?<Rest>.+) # The rest of the expression " , RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Conditional Subexpression"); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexConditional(buffer); return(true); } return(false); }
public RegexCharacter(RegexBuffer buffer) { int startLoc = buffer.Offset; bool quantifier = false; switch (buffer.Current) { case '.': character = ". (any character)"; buffer.MoveNext(); special = true; break; case '+': character = "+ (one or more times)"; buffer.MoveNext(); special = true; quantifier = true; break; case '*': character = "* (zero or more times)"; buffer.MoveNext(); special = true; quantifier = true; break; case '?': character = "? (zero or one time)"; buffer.MoveNext(); special = true; quantifier = true; break; case '^': character = "^ (anchor to start of string)"; buffer.MoveNext(); break; case '$': character = "$ (anchor to end of string)"; buffer.MoveNext(); break; case ' ': character = "' ' (space)"; buffer.MoveNext(); break; case '\\': DecodeEscape(buffer); break; default: character = buffer.Current.ToString(); buffer.MoveNext(); special = false; break; } if (quantifier) { if (!buffer.AtEnd && buffer.Current == '?') { character += " (non-greedy)"; buffer.MoveNext(); } } buffer.AddLookup(this, startLoc, buffer.Offset - 1, (character.Length == 1)); }
bool CheckBackReference(RegexBuffer buffer) { // look for \k<name> Regex regex = new Regex(@" k\<(?<Name>.+?)\> ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { special = true; this.character = String.Format("Backreference to match: {0}", match.Groups["Name"]); buffer.Offset += match.Groups[0].Length; return true; } return false; }
void DecodeEscape(RegexBuffer buffer) { buffer.MoveNext(); character = (string) escaped[buffer.Current]; if (character == null) { bool decoded = false; decoded = CheckBackReference(buffer); if (!decoded) { // TODO: Handle other items below: switch (buffer.Current) { case 'u': buffer.MoveNext(); string unicode = buffer.String.Substring(0, 4); character = "Unicode " + unicode; buffer.Offset += 4; break; case ' ': character = "' ' (space)"; special = false; buffer.MoveNext(); break; case 'c': buffer.MoveNext(); character = "CTRL-" + buffer.Current; buffer.MoveNext(); break; case 'x': buffer.MoveNext(); string number = buffer.String.Substring(0, 2); character = "Hex " + number; buffer.Offset += 2; break; default: character = new String(buffer.Current, 1); special = false; buffer.MoveNext(); break; } } } else { special = true; buffer.MoveNext(); } }
public RegexAlternate(RegexBuffer buffer) { buffer.AddLookup(this, buffer.Offset, buffer.Offset); buffer.MoveNext(); // skip "|" }
bool CheckConditional(RegexBuffer buffer) { // Look for conditional (?(name)yesmatch|nomatch) // (name can also be an expression) Regex regex = new Regex(@" ^ # anchor to start of string \?\( (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Conditional Subexpression"); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexConditional(buffer); return true; } return false; }
bool CheckOptions(RegexBuffer buffer) { // look for ?imnsx-imnsx: Regex regex = new Regex(@" ^ # anchor to start of string \?(?<Options>[imnsx-]+): ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { string option = match.Groups["Options"].Value; description = String.Format("Set options to {0}", optionNames[option]); expression = null; buffer.Offset += match.Groups[0].Length; return true; } return false; }
bool CheckLookahead(RegexBuffer buffer) { Regex regex = new Regex(@" ^ # anchor to start of string \? (?<Assertion><=|<!|=|!) # assertion char (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { switch (match.Groups["Assertion"].Value) { case "=": description = "zero-width positive lookahead"; break; case "!": description = "zero-width negative lookahead"; break; case "<=": description = "zero-width positive lookbehind"; break; case "<!": description = "zero-width negative lookbehind"; break; } buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return true; } return false; }
bool CheckNamed(RegexBuffer buffer) { Regex regex; Match match; // look for ?<Name> or ?'Name' syntax... regex = new Regex(@" ^ # anchor to start of string \?(\<|') # ?< or ?' (?<Name>[a-zA-Z0-9]+?) # Capture name (\>|') # ?> or ?' (?<Rest>.+) # The rest of the string ", RegexOptions.IgnorePatternWhitespace); match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Capture to <{0}>", match.Groups["Name"]); // advance buffer to the rest of the expression buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return true; } return false; }
bool HandlePlainOldCapture(RegexBuffer buffer) { // we're already at the expression. Just create a new // expression, and make sure that we're at a ")" when // we're done if (buffer.ExplicitCapture) { description = String.Format("Non-capturing Group"); } expression = new RegexExpression(buffer); CheckClosingParen(buffer); return true; }
public RegexExpression(RegexBuffer buffer) { Parse(buffer); }
bool CheckBalancedGroup(RegexBuffer buffer) { // look for ?<Name1-Name2> or ?'Name1-Name2' syntax... // look for ?<Name> or ?'Name' syntax... Regex regex = new Regex(@" ^ # anchor to start of string \?[\<|'] # ?< or ?' (?<Name1>[a-zA-Z]+?) # Capture name1 - (?<Name2>[a-zA-Z]+?) # Capture name2 [\>|'] # ?> or ?' (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Balancing Group <{0}>-<{1}>", match.Groups["Name1"], match.Groups["Name2"]); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); CheckClosingParen(buffer); return true; } return false; }
bool CheckNonCapturing(RegexBuffer buffer) { // Look for non-capturing ?: Regex regex = new Regex(@" ^ # anchor to start of string \?: (?<Rest>.+) # The rest of the expression ", RegexOptions.IgnorePatternWhitespace); Match match = regex.Match(buffer.String); if (match.Success) { description = String.Format("Non-capturing Group"); buffer.Offset += match.Groups["Rest"].Index; expression = new RegexExpression(buffer); this.CheckClosingParen(buffer); return true; } return false; }