/// <summary> /// Gets a string representation of the charset entry /// </summary> /// <returns>The string representation of this charset entry</returns> public override string ToString() { if (1 == Last - First) { return(string.Concat(RegexExpression.EscapeRangeChar(First), RegexExpression.EscapeRangeChar(Last))); } if (2 == Last - First) { return(string.Concat(RegexExpression.EscapeRangeChar(First), RegexExpression.EscapeRangeChar((char)(First + 1)), RegexExpression.EscapeRangeChar(Last))); } return(string.Concat(RegexExpression.EscapeRangeChar(First), "-", RegexExpression.EscapeRangeChar(Last))); }
/// <summary> /// Creates a new expression with the specified left and right hand sides /// </summary> /// <param name="left">The left expression</param> /// <param name="right">The right expressions</param> public RegexConcatExpression(RegexExpression left, params RegexExpression[] right) { Left = left; for (int i = 0; i < right.Length; i++) { var r = right[i]; if (null == Right) { Right = r; } if (i != right.Length - 1) { var c = new RegexConcatExpression(); c.Left = Left; c.Right = Right; Right = null; Left = c; } } }
/// <summary> /// Gets a string representation of the charset entry /// </summary> /// <returns>The string representation of this charset entry</returns> public override string ToString() { return(string.Concat(RegexExpression.EscapeRangeChar(First), "-", RegexExpression.EscapeRangeChar(Last))); }
/// <summary> /// Gets a string representation of the charset entry /// </summary> /// <returns>The string representation of this charset entry</returns> public override string ToString() { return(RegexExpression.EscapeRangeChar(Value)); }
/// <summary> /// Creates a repeat expression with the specifed target expression, and minimum and maximum occurances /// </summary> /// <param name="expression">The target expression</param> /// <param name="minOccurs">The minimum number of times the target expression can occur or -1</param> /// <param name="maxOccurs">The maximum number of times the target expression can occur or -1</param> public RegexRepeatExpression(RegexExpression expression, int minOccurs = -1, int maxOccurs = -1) { Expression = expression; MinOccurs = minOccurs; MaxOccurs = maxOccurs; }
/// <summary> /// Creates an optional expression using the specified target expression /// </summary> /// <param name="expression">The target expression to make optional</param> public RegexOptionalExpression(RegexExpression expression) { Expression = expression; }
static RegexExpression _ParseModifier(RegexExpression expr, ParseContext pc) { var line = pc.Line; var column = pc.Column; var position = pc.Position; switch (pc.Current) { case '*': expr = new RegexRepeatExpression(expr); expr.SetLocation(line, column, position); pc.Advance(); break; case '+': expr = new RegexRepeatExpression(expr, 1); expr.SetLocation(line, column, position); pc.Advance(); break; case '?': expr = new RegexOptionalExpression(expr); expr.SetLocation(line, column, position); pc.Advance(); break; case '{': pc.Advance(); pc.TrySkipWhiteSpace(); pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}'); var min = -1; var max = -1; if (',' != pc.Current && '}' != pc.Current) { var l = pc.CaptureBuffer.Length; pc.TryReadDigits(); min = int.Parse(pc.GetCapture(l)); pc.TrySkipWhiteSpace(); } if (',' == pc.Current) { pc.Advance(); pc.TrySkipWhiteSpace(); pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}'); if ('}' != pc.Current) { var l = pc.CaptureBuffer.Length; pc.TryReadDigits(); max = int.Parse(pc.GetCapture(l)); pc.TrySkipWhiteSpace(); } } else { max = min; } pc.Expecting('}'); pc.Advance(); expr = new RegexRepeatExpression(expr, min, max); expr.SetLocation(line, column, position); break; } return(expr); }
/// <summary> /// Parses a regular expression from the specified <see cref="ParseContext"/> /// </summary> /// <param name="pc">The parse context to use</param> /// <returns>A new abstract syntax tree representing the expression</returns> public static RegexExpression Parse(ParseContext pc) { RegexExpression result = null, next = null; int ich; pc.EnsureStarted(); var line = pc.Line; var column = pc.Column; var position = pc.Position; while (true) { switch (pc.Current) { case -1: return(result); case '.': var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false); nset.SetLocation(line, column, position); if (null == result) { result = nset; } else { result = new RegexConcatExpression(result, nset); result.SetLocation(line, column, position); } pc.Advance(); result = _ParseModifier(result, pc); line = pc.Line; column = pc.Column; position = pc.Position; break; case '\\': pc.Advance(); pc.Expecting(); switch (pc.Current) { case 'd': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") }); pc.Advance(); break; case 'D': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") }, true); pc.Advance(); break; case 'h': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("blank") }); pc.Advance(); break; case 'l': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("lower") }); pc.Advance(); break; case 's': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") }); pc.Advance(); break; case 'S': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") }, true); pc.Advance(); break; case 'u': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("upper") }); pc.Advance(); break; case 'w': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") }); pc.Advance(); break; case 'W': next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") }, true); pc.Advance(); break; default: if (-1 != (ich = _ParseEscapePart(pc))) { next = new RegexLiteralExpression((char)ich); } else { pc.Expecting(); // throw an error return(null); // doesn't execute } break; } next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null != result) { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } else { result = next; } line = pc.Line; column = pc.Column; position = pc.Position; break; case ')': return(result); case '(': pc.Advance(); pc.Expecting(); next = Parse(pc); pc.Expecting(')'); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '|': if (-1 != pc.Advance()) { next = Parse(pc); result = new RegexOrExpression(result, next); result.SetLocation(line, column, position); } else { result = new RegexOrExpression(result, null); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; case '[': pc.ClearCapture(); pc.Advance(); pc.Expecting(); bool not = false; if ('^' == pc.Current) { not = true; pc.Advance(); pc.Expecting(); } var ranges = _ParseRanges(pc); if (ranges.Count == 0) { System.Diagnostics.Debugger.Break(); } pc.Expecting(']'); pc.Advance(); next = new RegexCharsetExpression(ranges, not); next.SetLocation(line, column, position); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(pc.Line, pc.Column, pc.Position); } line = pc.Line; column = pc.Column; position = pc.Position; break; default: ich = pc.Current; next = new RegexLiteralExpression((char)ich); next.SetLocation(line, column, position); pc.Advance(); next = _ParseModifier(next, pc); if (null == result) { result = next; } else { result = new RegexConcatExpression(result, next); result.SetLocation(line, column, position); } line = pc.Line; column = pc.Column; position = pc.Position; break; } } }
static RegexExpression _FromFA <TAccept>(CharFA <TAccept> fa, HashSet <CharFA <TAccept> > visited) { if (!visited.Add(fa)) { return(null); } var trgs = fa.FillInputTransitionRangesGroupedByState(); bool isAccepting = fa.IsAccepting; RegexExpression expr = null; foreach (var trg in trgs) { if (1 == trg.Value.Count && 1 == trg.Value[0].Length) { RegexExpression le = new RegexLiteralExpression(trg.Value[0][0]); var next = _FromFA(trg.Key, visited); if (null != next) { le = new RegexConcatExpression(le, next); } if (null == expr) { expr = le; } else { expr = new RegexOrExpression(expr, le); } } else { var csel = new List <RegexCharsetEntry>(); foreach (var rng in trg.Value) { if (rng.First == rng.Last) { csel.Add(new RegexCharsetCharEntry(rng.First)); } else { csel.Add(new RegexCharsetRangeEntry(rng.First, rng.Last)); } } RegexExpression cse = new RegexCharsetExpression(csel); var next = _FromFA(trg.Key, visited); if (null != next) { cse = new RegexConcatExpression(cse, next); } if (null == expr) { expr = cse; } else { expr = new RegexOrExpression(expr, cse); } } } var isLoop = false; foreach (var val in fa.Descendants) { if (val == fa) { isLoop = true; break; } } if (isAccepting && !fa.IsFinal && !isLoop) { expr = new RegexOptionalExpression(expr); } return(expr); }