internal RegExp ParseCharClasses() { RegExp e = ParseCharClass(); while (More() && !Peek("]")) { e = MakeUnion(e, ParseCharClass()); } return e; }
internal RegExp ParseConcatExp() { RegExp e = ParseRepeatExp(); if (More() && !Peek(")|") && (!Check(RegExpSyntax.INTERSECTION) || !Peek("&"))) { e = MakeConcatenation(e, ParseConcatExp()); } return e; }
internal static RegExp MakeInterval(int min, int max, int digits) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_INTERVAL; r.min = min; r.max = max; r.digits = digits; return r; }
internal RegExp ParseUnionExp() { RegExp e = ParseInterExp(); if (Match('|')) { e = MakeUnion(e, ParseUnionExp()); } return e; }
internal static RegExp MakeRepeat(RegExp exp, int min) { return(new RegExp { kind = Kind.REGEXP_REPEAT_MIN, exp1 = exp, min = min }); }
internal static RegExp MakeUnion(RegExp exp1, RegExp exp2) { return(new RegExp { kind = Kind.REGEXP_UNION, exp1 = exp1, exp2 = exp2 }); }
internal static RegExp MakeIntersection(RegExp exp1, RegExp exp2) { return(new RegExp { kind = Kind.REGEXP_INTERSECTION, exp1 = exp1, exp2 = exp2 }); }
internal static RegExp MakeRepeat(RegExp exp, int min, int max) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_REPEAT_MINMAX; r.exp1 = exp; r.min = min; r.max = max; return r; }
internal RegExp ParseInterExp() { RegExp e = ParseConcatExp(); if (Check(RegExpSyntax.INTERSECTION) && Match('&')) { e = MakeIntersection(e, ParseInterExp()); } return e; }
internal static RegExp MakeRepeat(RegExp exp, int min, int max) { return(new RegExp { kind = Kind.REGEXP_REPEAT_MINMAX, exp1 = exp, min = min, max = max }); }
private void FindLeaves(RegExp exp, Kind kind, IList <Automaton> list, IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { if (exp.kind == kind) { FindLeaves(exp.exp1, kind, list, automata, automaton_provider); FindLeaves(exp.exp2, kind, list, automata, automaton_provider); } else { list.Add(exp.ToAutomaton(automata, automaton_provider)); } }
internal static RegExp MakeCharRange(int from, int to) { if (from > to) { throw new ArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")"); } RegExp r = new RegExp(); r.kind = Kind.REGEXP_CHAR_RANGE; r.from = from; r.to = to; return r; }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { StringBuilder b = new StringBuilder(); if (exp1.kind == Kind.REGEXP_STRING) { b.Append(exp1.s); } else { b.AppendCodePoint(exp1.c); } if (exp2.kind == Kind.REGEXP_STRING) { b.Append(exp2.s); } else { b.AppendCodePoint(exp2.c); } return MakeString(b.ToString()); }
internal RegExp ParseSimpleExp() { if (Match('.')) { return(MakeAnyChar()); } else if (Check(RegExpSyntax.EMPTY) && Match('#')) { return(MakeEmpty()); } else if (Check(RegExpSyntax.ANYSTRING) && Match('@')) { return(MakeAnyString()); } else if (Match('"')) { int start = pos; while (More() && !Peek("\"")) { Next(); } if (!Match('"')) { throw new System.ArgumentException("expected '\"' at position " + pos); } return(MakeString(b.Substring(start, pos - 1 - start))); } else if (Match('(')) { if (Match(')')) { return(MakeString("")); } RegExp e = ParseUnionExp(); if (!Match(')')) { throw new System.ArgumentException("expected ')' at position " + pos); } return(e); } else if ((Check(RegExpSyntax.AUTOMATON) || Check(RegExpSyntax.INTERVAL)) && Match('<')) { int start = pos; while (More() && !Peek(">")) { Next(); } if (!Match('>')) { throw new System.ArgumentException("expected '>' at position " + pos); } string s = b.Substring(start, pos - 1 - start); int i = s.IndexOf('-'); if (i == -1) { if (!Check(RegExpSyntax.AUTOMATON)) { throw new System.ArgumentException("interval syntax error at position " + (pos - 1)); } return(MakeAutomaton(s)); } else { if (!Check(RegExpSyntax.INTERVAL)) { throw new System.ArgumentException("illegal identifier at position " + (pos - 1)); } try { if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-')) { throw new System.FormatException(); } string smin = s.Substring(0, i); string smax = s.Substring(i + 1, s.Length - (i + 1)); int imin = Convert.ToInt32(smin, CultureInfo.InvariantCulture); int imax = Convert.ToInt32(smax, CultureInfo.InvariantCulture); int digits; if (smin.Length == smax.Length) { digits = smin.Length; } else { digits = 0; } if (imin > imax) { int t = imin; imin = imax; imax = t; } return(MakeInterval(imin, imax, digits)); } #pragma warning disable 168 catch (System.FormatException e) #pragma warning restore 168 { throw new System.ArgumentException("interval syntax error at position " + (pos - 1), e); } } } else { return(MakeChar(ParseCharExp())); } }
internal RegExp ParseRepeatExp() { RegExp e = ParseComplExp(); while (Peek("?*+{")) { if (Match('?')) { e = MakeOptional(e); } else if (Match('*')) { e = MakeRepeat(e); } else if (Match('+')) { e = MakeRepeat(e, 1); } else if (Match('{')) { int start = pos; while (Peek("0123456789")) { Next(); } if (start == pos) { throw new System.ArgumentException("integer expected at position " + pos); } int n = Convert.ToInt32(b.Substring(start, pos - start), CultureInfo.InvariantCulture); int m = -1; if (Match(',')) { start = pos; while (Peek("0123456789")) { Next(); } if (start != pos) { m = Convert.ToInt32(b.Substring(start, pos - start), CultureInfo.InvariantCulture); } } else { m = n; } if (!Match('}')) { throw new System.ArgumentException("expected '}' at position " + pos); } if (m == -1) { e = MakeRepeat(e, n); } else { e = MakeRepeat(e, n, m); } } } return(e); }
internal RegExp ParseSimpleExp() { if (Match('.')) { return(MakeAnyChar()); } else if (Check(RegExpSyntax.EMPTY) && Match('#')) { return(MakeEmpty()); } else if (Check(RegExpSyntax.ANYSTRING) && Match('@')) { return(MakeAnyString()); } else if (Match('"')) { int start = pos; while (More() && !Peek("\"")) { Next(); } if (!Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(MakeString(b.Substring(start, pos - 1 - start))); } else if (Match('(')) { if (Match(')')) { return(MakeString("")); } RegExp e = ParseUnionExp(); if (!Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } else if ((Check(RegExpSyntax.AUTOMATON) || Check(RegExpSyntax.INTERVAL)) && Match('<')) { int start = pos; while (More() && !Peek(">")) { Next(); } if (!Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string s = b.Substring(start, pos - 1 - start); int i = s.IndexOf('-'); if (i == -1) { if (!Check(RegExpSyntax.AUTOMATON)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(MakeAutomaton(s)); } else { if (!Check(RegExpSyntax.INTERVAL)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } // LUCENENET: Refactored so we don't throw exceptions in the normal flow if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-')) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } string smin = s.Substring(0, i); string smax = s.Substring(i + 1, s.Length - (i + 1)); if (!int.TryParse(smin, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imin) || !int.TryParse(smax, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imax)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } int digits; if (smin.Length == smax.Length) { digits = smin.Length; } else { digits = 0; } if (imin > imax) { int t = imin; imin = imax; imax = t; } return(MakeInterval(imin, imax, digits)); } } else { return(MakeChar(ParseCharExp())); } }
internal RegExp ParseRepeatExp() { RegExp e = ParseComplExp(); while (Peek("?*+{")) { if (Match('?')) { e = MakeOptional(e); } else if (Match('*')) { e = MakeRepeat(e); } else if (Match('+')) { e = MakeRepeat(e, 1); } else if (Match('{')) { int start = pos; while (Peek("0123456789")) { Next(); } if (start == pos) { throw new ArgumentException("integer expected at position " + pos); } // LUCENENET: Optimized so we don't allocate a substring during the parse int n = Integer.Parse(b, start, pos - start, radix: 10); int m = -1; if (Match(',')) { start = pos; while (Peek("0123456789")) { Next(); } if (start != pos) { // LUCENENET: Optimized so we don't allocate a substring during the parse m = Integer.Parse(b, start, pos - start, radix: 10); } } else { m = n; } if (!Match('}')) { throw new ArgumentException("expected '}' at position " + pos); } if (m == -1) { e = MakeRepeat(e, n); } else { e = MakeRepeat(e, n, m); } } } return(e); }
internal static RegExp MakeAnyString() { RegExp r = new RegExp(); r.kind = Kind.REGEXP_ANYSTRING; return r; }
internal static RegExp MakeEmpty() { RegExp r = new RegExp(); r.kind = Kind.REGEXP_EMPTY; return r; }
internal static RegExp MakeAnyChar() { RegExp r = new RegExp(); r.kind = Kind.REGEXP_ANYCHAR; return r; }