/// <summary> /// Initializes a new instance of the <see cref = "RegExp" /> class from a string. /// </summary> /// <param name = "s">A string with the regular expression.</param> /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param> public RegExp(string s, RegExpSyntaxOptions syntaxFlags) { this.b = s; this.flags = syntaxFlags; RegExp e; if (s.Length == 0) { e = RegExp.MakeString(string.Empty); } else { e = this.ParseUnionExp(); if (this.pos < b.Length) { throw new ArgumentException("end-of-string expected at position " + this.pos); } } this.Kind = e.Kind; this.Expr1 = e.Expr1; this.Expr2 = e.Expr2; this.SourceRegExpr = e.SourceRegExpr; this.Char = e.Char; this.Min = e.Min; this.Max = e.Max; this.Digits = e.Digits; this.FromChar = e.FromChar; this.ToChar = e.ToChar; this.b = null; }
/// <summary> /// Initializes a new instance of the <see cref = "RegExp" /> class from a string. /// </summary> /// <param name = "s">A string with the regular expression.</param> /// <param name = "anyCharAlphabet">A string with the regular expression.</param> /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param> public RegExp(string s, string anyCharAlphabet, RegExpSyntaxOptions syntaxFlags) { this.b = s; this.flags = syntaxFlags; RegExp e; if (anyCharAlphabet != null) { this.anyCharAlphabet = anyCharAlphabet; } if (s.Length == 0) { e = RegExp.MakeString(string.Empty); } else { e = this.ParseUnionExp(); if (this.pos < b.Length) { throw new ArgumentException("end-of-string expected at position " + this.pos); } } this.kind = e.kind; this.exp1 = e.exp1; this.exp2 = e.exp2; this.s = e.s; this.c = e.c; this.min = e.min; this.max = e.max; this.digits = e.digits; this.from = e.from; this.to = e.to; this.b = null; }
private static RegExp MakeConcatenation(RegExp exp1, RegExp exp2) { if ((exp1.kind == Kind.RegexpChar || exp1.kind == Kind.RegexpString) && (exp2.kind == Kind.RegexpChar || exp2.kind == Kind.RegexpString)) { return(RegExp.MakeString(exp1, exp2)); } var r = new RegExp(); r.kind = Kind.RegexpConcatenation; if (exp1.kind == Kind.RegexpConcatenation && (exp1.exp2.kind == Kind.RegexpChar || exp1.exp2.kind == Kind.RegexpString) && (exp2.kind == Kind.RegexpChar || exp2.kind == Kind.RegexpString)) { r.exp1 = exp1.exp1; r.exp2 = RegExp.MakeString(exp1.exp2, exp2); } else if ((exp1.kind == Kind.RegexpChar || exp1.kind == Kind.RegexpString) && exp2.kind == Kind.RegexpConcatenation && (exp2.exp1.kind == Kind.RegexpChar || exp2.exp1.kind == Kind.RegexpString)) { r.exp1 = RegExp.MakeString(exp1, exp2.exp1); r.exp2 = exp2.exp2; } else { r.exp1 = exp1; r.exp2 = exp2; } return(r); }
/// <summary> /// Initializes a new instance of the <see cref = "RegExp" /> class from a string. /// </summary> /// <param name = "s">A string with the regular expression.</param> /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param> internal RegExp(string s, RegExpSyntaxOptions syntaxFlags) { this.b = RegExp.ReplaceShorthandCharacterClasses(s); this.flags = syntaxFlags; RegExp e; if (s.Length == 0) { e = RegExp.MakeString(string.Empty); } else { e = this.ParseUnionExp(); if (this.pos < this.b.Length) { throw new ArgumentException("end-of-string expected at position " + this.pos); } } this.kind = e.kind; this.exp1 = e.exp1; this.exp2 = e.exp2; this.s = e.s; this.c = e.c; this.min = e.min; this.max = e.max; this.digits = e.digits; this.from = e.from; this.to = e.to; this.b = null; }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { var sb = new StringBuilder(); if (exp1.kind == Kind.RegexpString) { sb.Append(exp1.s); } else { sb.Append(exp1.c); } if (exp2.kind == Kind.RegexpString) { sb.Append(exp2.s); } else { sb.Append(exp2.c); } return(RegExp.MakeString(sb.ToString())); }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { var sb = new StringBuilder(); if (exp1.Kind == Kind.RegexpString) { sb.Append(exp1.SourceRegExpr); } else { sb.Append(exp1.Char); } if (exp2.Kind == Kind.RegexpString) { sb.Append(exp2.SourceRegExpr); } else { sb.Append(exp2.Char); } return(RegExp.MakeString(sb.ToString())); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(MakeAnyPrintableASCIIChar(this.anyCharAlphabet)); } if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) { return(RegExp.MakeEmpty()); } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string str = b.Substring(start, ((pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin); int imax = int.Parse(smax); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } } if (this.Match('\\')) { // Escaped '\' character. if (this.Match('\\')) { return(MakeChar('\\')); } bool inclusion; // Digits. if ((inclusion = this.Match('d')) || this.Match('D')) { RegExp digitChars = MakeCharRange('0', '9'); return(inclusion ? this.anyCharAlphabet == null ? digitChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsDigit).ToArray())) : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } // Whitespace chars only. if ((inclusion = this.Match('s')) || this.Match('S')) { RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t')); return(inclusion ? this.anyCharAlphabet == null ? whitespaceChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsWhiteSpace).ToArray())) : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } // Word character. Range is [A-Za-z0-9_] if ((inclusion = this.Match('w')) || this.Match('W')) { var ranges = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') }; RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion); return(inclusion ? this.anyCharAlphabet == null ? wordChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray() .Where(x => char.IsLetter(x) || char.IsDigit(x) || x == '_').ToArray())) : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } } return(RegExp.MakeChar(this.ParseCharExp())); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(MakeAnyPrintableASCIIChar()); } /* Issue 32, https://github.com/moodmosaic/Fare/issues/32 * The intent of the original code is a little unclear. The comment for the Empty value in the * enum is 'Enables empty language.' Using '#' as token seems non-standard, and caused * unhandled exception in some cases. The best option at this point is to remove handling of the * Empty option until a proper implementation is proposed. */ // if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) // { // return RegExp.MakeEmpty(); // } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string str = b.Substring(start, ((pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin, System.Globalization.NumberFormatInfo.CurrentInfo); int imax = int.Parse(smax, System.Globalization.NumberFormatInfo.CurrentInfo); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } } if (this.Match('\\')) { // Escaped '\' character. if (this.Match('\\')) { return(MakeChar('\\')); } bool inclusion; // Digits. if ((inclusion = this.Match('d')) || this.Match('D')) { RegExp digitChars = MakeCharRange('0', '9'); return(inclusion ? digitChars : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar())); } // Whitespace chars only. if ((inclusion = this.Match('s')) || this.Match('S')) { // Do not add line breaks, as usually RegExp is single line. RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t')); return(inclusion ? whitespaceChars : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar())); } // Word character. Range is [A-Za-z0-9_] if ((inclusion = this.Match('w')) || this.Match('W')) { var ranges = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') }; RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion); return(inclusion ? wordChars : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar())); } } return(RegExp.MakeChar(this.ParseCharExp())); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(RegExp.MakeAnyChar()); } if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) { return(RegExp.MakeEmpty()); } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = this.pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + this.pos); } return(RegExp.MakeString(this.b.Substring(start, ((this.pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + this.pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = this.pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + this.pos); } string str = this.b.Substring(start, ((this.pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (this.pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (this.pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin, CultureInfo.CurrentCulture); int imax = int.Parse(smax, CultureInfo.CurrentCulture); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (this.pos - 1)); } } return(RegExp.MakeChar(this.ParseCharExp())); }