public RegexCharClass(RegexBuffer buffer) { int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex; Match match; regex = new Regex(@"(?<Negated>\^?)(?<Class>.+?)\]"); match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["Negated"].ToString() == "^") { description = String.Format("Any character not in \"{0}\"", match.Groups["Class"]); } else { description = String.Format("Any character in \"{0}\"", match.Groups["Class"]); } buffer.Offset += match.Groups[0].Length; } else { description = "missing ']' in character class"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
// eat the whole comment until the end of line... void EatComment(RegexBuffer buffer) { while (buffer.Current != '\r') { buffer.MoveNext(); } }
void Parse(RegexBuffer buffer) { while (!buffer.AtEnd) { // if this regex ignores whitespace, we need to ignore these if (buffer.IgnorePatternWhitespace && ((buffer.Current == ' ') || (buffer.Current == '\r') || (buffer.Current == '\n') || (buffer.Current == '\t'))) { buffer.MoveNext(); } else { switch (buffer.Current) { case '(': items.Add(new RegexCapture(buffer)); break; case ')': // end of closure; just return. return; case '[': items.Add(new RegexCharClass(buffer)); break; case '{': items.Add(new RegexQuantifier(buffer)); break; case '|': items.Add(new RegexAlternate(buffer)); break; case '\\': items.Add(new RegexCharacter(buffer)); break; case '#': if (buffer.IgnorePatternWhitespace) { EatComment(buffer); } else { items.Add(new RegexCharacter(buffer)); } break; default: items.Add(new RegexCharacter(buffer)); break; } } } }
void DecodeEscape(RegexBuffer buffer) { buffer.MoveNext(); character = (string)escaped[buffer.Current]; if (character == null) { bool decoded = false; decoded = CheckBackReference(buffer); if (!decoded) { // TODO: Handle other items below: switch (buffer.Current) { case 'u': buffer.MoveNext(); string unicode = buffer.String.Substring(0, 4); character = "Unicode " + unicode; buffer.Offset += 4; break; case ' ': character = "' ' (space)"; special = false; buffer.MoveNext(); break; case 'c': buffer.MoveNext(); character = "CTRL-" + buffer.Current; buffer.MoveNext(); break; case 'x': buffer.MoveNext(); string number = buffer.String.Substring(0, 2); character = "Hex " + number; buffer.Offset += 2; break; default: character = new String(buffer.Current, 1); special = false; buffer.MoveNext(); break; } } } else { special = true; buffer.MoveNext(); } }
public RegexQuantifier(RegexBuffer buffer) { int startLoc = buffer.Offset; buffer.MoveNext(); Regex regex; Match match; // look for "n}", "n,}", or "n,m}" regex = new Regex(@"(?<n>\d+)(?<Comma>,?)(?<m>\d*)\}"); match = regex.Match(buffer.String); if (match.Success) { if (match.Groups["m"].Length != 0) { description = String.Format("At least {0}, but not more than {1} times", match.Groups["n"], match.Groups["m"]); } else if (match.Groups["Comma"].Length != 0) { description = String.Format("At least {0} times", match.Groups["n"]); } else { description = String.Format("Exactly {0} times", match.Groups["n"]); } buffer.Offset += match.Groups[0].Length; if (!buffer.AtEnd && buffer.Current == '?') { description += " (non-greedy)"; buffer.MoveNext(); } } else { description = "missing '}' in quantifier"; } buffer.AddLookup(this, startLoc, buffer.Offset - 1); }
public RegexCapture(RegexBuffer buffer) { startLocation = buffer.Offset; buffer.MoveNext(); // we're not in a series of normal characters, so clear buffer.ClearInSeries(); // if the first character of the capture is a '?', // we need to decode what comes after it. if (buffer.Current == '?') { bool decoded = CheckNamed(buffer); if (!decoded) { decoded = CheckBalancedGroup(buffer); } if (!decoded) { decoded = CheckNonCapturing(buffer); } if (!decoded) { decoded = CheckOptions(buffer); } if (!decoded) { decoded = CheckLookahead(buffer); } if (!decoded) { decoded = CheckNonBacktracking(buffer); } if (!decoded) { decoded = CheckConditional(buffer); } } else // plain old capture... { if (!HandlePlainOldCapture(buffer)) { throw new Exception( String.Format("Unrecognized capture: {0}", buffer.String)); } } buffer.AddLookup(this, startLocation, buffer.Offset - 1); }
public RegexCharacter(RegexBuffer buffer) { int startLoc = buffer.Offset; bool quantifier = false; switch (buffer.Current) { case '.': character = ". (any character)"; buffer.MoveNext(); special = true; break; case '+': character = "+ (one or more times)"; buffer.MoveNext(); special = true; quantifier = true; break; case '*': character = "* (zero or more times)"; buffer.MoveNext(); special = true; quantifier = true; break; case '?': character = "? (zero or one time)"; buffer.MoveNext(); special = true; quantifier = true; break; case '^': character = "^ (anchor to start of string)"; buffer.MoveNext(); break; case '$': character = "$ (anchor to end of string)"; buffer.MoveNext(); break; case ' ': character = "' ' (space)"; buffer.MoveNext(); break; case '\\': DecodeEscape(buffer); break; default: character = buffer.Current.ToString(); buffer.MoveNext(); special = false; break; } if (quantifier) { if (!buffer.AtEnd && buffer.Current == '?') { character += " (non-greedy)"; buffer.MoveNext(); } } buffer.AddLookup(this, startLoc, buffer.Offset - 1, (character.Length == 1)); }
public RegexAlternate(RegexBuffer buffer) { buffer.AddLookup(this, buffer.Offset, buffer.Offset); buffer.MoveNext(); // skip "|" }