private static RegExp MakeIntersection(RegExp exp1, RegExp exp2) { var r = new RegExp(); r.kind = Kind.RegexpIntersection; r.exp1 = exp1; r.exp2 = exp2; return(r); }
private static RegExp MakeRepeat(RegExp exp, int min) { var r = new RegExp(); r.kind = Kind.RegexpRepeatMin; r.exp1 = exp; r.min = min; return(r); }
private RegExp ParseComplExp() { if (this.Check(RegExpSyntaxOptions.Complement) && this.Match('~')) { return(RegExp.MakeComplement(this.ParseComplExp())); } return(this.ParseCharClassExp()); }
private static RegExp MakeCharRange(char from, char to) { var r = new RegExp(); r.kind = Kind.RegexpCharRange; r.from = from; r.to = to; return(r); }
private static RegExp MakeRepeat(RegExp exp, int min, int max) { var r = new RegExp(); r.Kind = Kind.RegexpRepeatMinMax; r.Expr1 = exp; r.Min = min; r.Max = max; return(r); }
private static RegExp MakeInterval(int min, int max, int digits) { var r = new RegExp(); r.kind = Kind.RegexpInterval; r.min = min; r.max = max; r.digits = digits; return(r); }
private RegExp ParseCharClasses() { RegExp e = this.ParseCharClass(); while (this.More() && !this.Peek("]")) { e = RegExp.MakeUnion(e, this.ParseCharClass()); } return(e); }
private RegExp ParseConcatExp() { RegExp e = this.ParseRepeatExp(); if (this.More() && !this.Peek(")|") && (!this.Check(RegExpSyntaxOptions.Intersection) || !this.Peek("&"))) { e = RegExp.MakeConcatenation(e, this.ParseConcatExp()); } return(e); }
private RegExp ParseUnionExp() { RegExp e = this.ParseInterExp(); if (this.Match('|')) { e = RegExp.MakeUnion(e, this.ParseUnionExp()); } return(e); }
private RegExp ParseInterExp() { RegExp e = this.ParseConcatExp(); if (this.Check(RegExpSyntaxOptions.Intersection) && this.Match('&')) { e = RegExp.MakeIntersection(e, this.ParseInterExp()); } return(e); }
public void DumpRegEx() { string rawRegEx = @"\d{3}\d3}"; Fare.RegExp e = new Fare.RegExp(rawRegEx); _testOutput.WriteLine(e.ToString()); // fail test to get output Assert.True(false); }
public void DumpDot() { string rawRegEx = @"\d{3}-\d{2}-(a|q)J"; rawRegEx = @"0x[0-9A-Fa-f]{4}"; Fare.RegExp e = new Fare.RegExp(rawRegEx); var a = e.ToAutomaton(); _testOutput.WriteLine($"/* {rawRegEx} */"); _testOutput.WriteLine(Fare.DotFormatter.ToDot(a)); // fail test to get output Assert.True(false); }
private RegExp ParseCharClass() { char @char = this.ParseCharExp(); if (this.Match('-')) { if (this.Peek("]")) { return(RegExp.MakeUnion(RegExp.MakeChar(@char), RegExp.MakeChar('-'))); } return(RegExp.MakeCharRange(@char, this.ParseCharExp())); } return(RegExp.MakeChar(@char)); }
private void FindLeaves( RegExp exp, Kind regExpKind, IList <Automaton> list, IDictionary <String, Automaton> automata, IAutomatonProvider automatonProvider, bool minimize) { if (exp.kind == regExpKind) { this.FindLeaves(exp.exp1, regExpKind, list, automata, automatonProvider, minimize); this.FindLeaves(exp.exp2, regExpKind, list, automata, automatonProvider, minimize); } else { list.Add(exp.ToAutomaton(automata, automatonProvider, minimize)); } }
private Automaton ToAutomatonAllowMutate( IDictionary <string, Automaton> automata, IAutomatonProvider automatonProvider, bool minimize) { bool @bool = false; if (allowMutation) { @bool = RegExp.SetAllowMutate(true); // This is not thead safe. } Automaton a = this.ToAutomaton(automata, automatonProvider, minimize); if (allowMutation) { RegExp.SetAllowMutate(@bool); } return(a); }
public void DumpAST() { string rawRegEx = @"\d{3}\d3}"; Fare.RegExp e = new Fare.RegExp(rawRegEx); var a = e.ToAutomaton(); _testOutput.WriteLine($"expr: {rawRegEx} has {a.NumberOfStates} states and {a.NumberOfTransitions} transistions"); foreach (var state in a.GetStates()) { _testOutput.WriteLine($"\tState (Id:{state.Id}, Number:{state.Number}, Accept: {state.Accept}) has {state.Transitions.Count} transitions"); foreach (var trans in state.GetSortedTransitions(false)) { _testOutput.WriteLine($"\t\tTransition to: {trans.To.Id} [{trans.Min} - {trans.Max}]"); } } // fail test to get output Assert.True(false); }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { var sb = new StringBuilder(); if (exp1.Kind == Kind.RegexpString) { sb.Append(exp1.SourceRegExpr); } else { sb.Append(exp1.Char); } if (exp2.Kind == Kind.RegexpString) { sb.Append(exp2.SourceRegExpr); } else { sb.Append(exp2.Char); } return(RegExp.MakeString(sb.ToString())); }
/// <summary> /// Initializes a new instance of the <see cref="Xeger"/> class. /// </summary> /// <param name="regex">The regex.</param> /// <param name="random">The random.</param> /// <param name="anyCharAlphabet">The list of characters used for computing the possible values for classes "." "\s", "\d", "\w" (and "\S", "\D", "\W"). It does not check explicitly defined chars in regexp.</param> public Xeger(string regex, Random random, string anyCharAlphabet = null) { if (string.IsNullOrEmpty(regex)) { throw new ArgumentNullException("regex"); } if (random == null) { throw new ArgumentNullException("random"); } if (anyCharAlphabet != null) { this.anyCharAlphabet = anyCharAlphabet; } regex = RemoveStartEndMarkers(regex); var rx = new RegExp(regex, anyCharAlphabet, AllExceptAnyString); this.UsedAlphabet = rx.UsedAlphabet(); this.automaton = rx.ToAutomaton(); this.random = random; }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { var sb = new StringBuilder(); if (exp1.kind == Kind.RegexpString) { sb.Append(exp1.s); } else { sb.Append(exp1.c); } if (exp2.kind == Kind.RegexpString) { sb.Append(exp2.s); } else { sb.Append(exp2.c); } return(RegExp.MakeString(sb.ToString())); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(RegExp.MakeAnyChar()); } if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) { return(RegExp.MakeEmpty()); } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = this.pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + this.pos); } return(RegExp.MakeString(this.b.Substring(start, ((this.pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + this.pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = this.pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + this.pos); } string str = this.b.Substring(start, ((this.pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (this.pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (this.pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin, CultureInfo.CurrentCulture); int imax = int.Parse(smax, CultureInfo.CurrentCulture); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (this.pos - 1)); } } return(RegExp.MakeChar(this.ParseCharExp())); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(MakeAnyPrintableASCIIChar(this.anyCharAlphabet)); } if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) { return(RegExp.MakeEmpty()); } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string str = b.Substring(start, ((pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin); int imax = int.Parse(smax); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } } if (this.Match('\\')) { // Escaped '\' character. if (this.Match('\\')) { return(MakeChar('\\')); } bool inclusion; // Digits. if ((inclusion = this.Match('d')) || this.Match('D')) { RegExp digitChars = MakeCharRange('0', '9'); return(inclusion ? this.anyCharAlphabet == null ? digitChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsDigit).ToArray())) : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } // Whitespace chars only. if ((inclusion = this.Match('s')) || this.Match('S')) { RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t')); return(inclusion ? this.anyCharAlphabet == null ? whitespaceChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsWhiteSpace).ToArray())) : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } // Word character. Range is [A-Za-z0-9_] if ((inclusion = this.Match('w')) || this.Match('W')) { var ranges = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') }; RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion); return(inclusion ? this.anyCharAlphabet == null ? wordChars : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray() .Where(x => char.IsLetter(x) || char.IsDigit(x) || x == '_').ToArray())) : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet))); } } return(RegExp.MakeChar(this.ParseCharExp())); }
private static RegExp MakeAnyFromCharset(string charset) { var rmajor = new RegExp($"[{charset}]", null); return(rmajor); }
private static RegExp ExcludeChars(RegExp exclusion, RegExp allChars) { return(MakeIntersection(allChars, MakeComplement(exclusion))); }
private RegExp ParseRepeatExp() { RegExp e = this.ParseComplExp(); while (this.Peek("?*+{")) { if (this.Match('?')) { e = RegExp.MakeOptional(e); } else if (this.Match('*')) { e = RegExp.MakeRepeat(e); } else if (this.Match('+')) { e = RegExp.MakeRepeat(e, 1); } else if (this.Match('{')) { int start = pos; while (this.Peek("0123456789")) { this.Next(); } if (start == pos) { throw new ArgumentException("integer expected at position " + pos); } int n = int.Parse(b.Substring(start, pos - start)); int m = -1; if (this.Match(',')) { start = pos; while (this.Peek("0123456789")) { this.Next(); } if (start != pos) { m = int.Parse(b.Substring(start, pos - start)); } } else { m = n; } if (!this.Match('}')) { throw new ArgumentException("expected '}' at position " + pos); } e = m == -1 ? RegExp.MakeRepeat(e, n) : RegExp.MakeRepeat(e, n, m); } } return(e); }
private RegExp ParseSimpleExp() { if (this.Match('.')) { return(MakeAnyPrintableASCIIChar()); } /* Issue 32, https://github.com/moodmosaic/Fare/issues/32 * The intent of the original code is a little unclear. The comment for the Empty value in the * enum is 'Enables empty language.' Using '#' as token seems non-standard, and caused * unhandled exception in some cases. The best option at this point is to remove handling of the * Empty option until a proper implementation is proposed. */ // if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#')) // { // return RegExp.MakeEmpty(); // } if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@')) { return(RegExp.MakeAnyString()); } if (this.Match('"')) { int start = pos; while (this.More() && !this.Peek("\"")) { this.Next(); } if (!this.Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start)))); } if (this.Match('(')) { if (this.Match('?')) { this.SkipNonCapturingSubpatternExp(); } if (this.Match(')')) { return(RegExp.MakeString(string.Empty)); } RegExp e = this.ParseUnionExp(); if (!this.Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<')) { int start = pos; while (this.More() && !this.Peek(">")) { this.Next(); } if (!this.Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string str = b.Substring(start, ((pos - 1) - start)); int i = str.IndexOf('-'); if (i == -1) { if (!this.Check(RegExpSyntaxOptions.Automaton)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(RegExp.MakeAutomaton(str)); } if (!this.Check(RegExpSyntaxOptions.Interval)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } try { if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-')) { throw new FormatException(); } string smin = str.Substring(0, i - 0); string smax = str.Substring(i + 1, (str.Length - (i + 1))); int imin = int.Parse(smin, System.Globalization.NumberFormatInfo.CurrentInfo); int imax = int.Parse(smax, System.Globalization.NumberFormatInfo.CurrentInfo); int numdigits = smin.Length == smax.Length ? smin.Length : 0; if (imin > imax) { int t = imin; imin = imax; imax = t; } return(RegExp.MakeInterval(imin, imax, numdigits)); } catch (FormatException) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } } if (this.Match('\\')) { // Escaped '\' character. if (this.Match('\\')) { return(MakeChar('\\')); } bool inclusion; // Digits. if ((inclusion = this.Match('d')) || this.Match('D')) { RegExp digitChars = MakeCharRange('0', '9'); return(inclusion ? digitChars : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar())); } // Whitespace chars only. if ((inclusion = this.Match('s')) || this.Match('S')) { // Do not add line breaks, as usually RegExp is single line. RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t')); return(inclusion ? whitespaceChars : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar())); } // Word character. Range is [A-Za-z0-9_] if ((inclusion = this.Match('w')) || this.Match('W')) { var ranges = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') }; RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion); return(inclusion ? wordChars : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar())); } } return(RegExp.MakeChar(this.ParseCharExp())); }