internal static RegExp MakeRepeat(RegExp exp, int min) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_REPEAT_MIN; r.Exp1 = exp; r.Min = min; return(r); }
internal static RegExp MakeUnion(RegExp exp1, RegExp exp2) { return(new RegExp { kind = Kind.REGEXP_UNION, exp1 = exp1, exp2 = exp2 }); }
internal static RegExp MakeIntersection(RegExp exp1, RegExp exp2) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_INTERSECTION; r.Exp1 = exp1; r.Exp2 = exp2; return(r); }
internal static RegExp MakeUnion(RegExp exp1, RegExp exp2) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_UNION; r.Exp1 = exp1; r.Exp2 = exp2; return(r); }
internal static RegExp MakeRepeat(RegExp exp, int min) { return(new RegExp { kind = Kind.REGEXP_REPEAT_MIN, exp1 = exp, min = min }); }
internal static RegExp MakeIntersection(RegExp exp1, RegExp exp2) { return(new RegExp { kind = Kind.REGEXP_INTERSECTION, exp1 = exp1, exp2 = exp2 }); }
internal static RegExp MakeRepeat(RegExp exp, int min, int max) { return(new RegExp { kind = Kind.REGEXP_REPEAT_MINMAX, exp1 = exp, min = min, max = max }); }
internal static RegExp MakeInterval(int min, int max, int digits) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_INTERVAL; r.Min = min; r.Max = max; r.Digits = digits; return(r); }
internal RegExp ParseInterExp() { RegExp e = ParseConcatExp(); if (Check(RegExpSyntax.INTERSECTION) && Match('&')) { e = MakeIntersection(e, ParseInterExp()); } return(e); }
internal RegExp ParseConcatExp() { RegExp e = ParseRepeatExp(); if (More() && !Peek(")|") && (!Check(RegExpSyntax.INTERSECTION) || !Peek("&"))) { e = MakeConcatenation(e, ParseConcatExp()); } return(e); }
internal RegExp ParseCharClasses() { RegExp e = ParseCharClass(); while (More() && !Peek("]")) { e = MakeUnion(e, ParseCharClass()); } return(e); }
internal RegExp ParseUnionExp() { RegExp e = ParseInterExp(); if (Match('|')) { e = MakeUnion(e, ParseUnionExp()); } return(e); }
internal static RegExp MakeRepeat(RegExp exp, int min, int max) { RegExp r = new RegExp(); r.kind = Kind.REGEXP_REPEAT_MINMAX; r.exp1 = exp; r.min = min; r.max = max; return(r); }
private void FindLeaves(RegExp exp, Kind kind, IList <Automaton> list, IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { if (exp.kind == kind) { FindLeaves(exp.exp1, kind, list, automata, automaton_provider); FindLeaves(exp.exp2, kind, list, automata, automaton_provider); } else { list.Add(exp.ToAutomaton(automata, automaton_provider)); } }
public void TestSpecialCase2() { RegExp re = new RegExp(".+\u0775"); string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails! }
internal static RegExp MakeCharRange(int from, int to) { if (from > to) { throw new System.ArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")"); } RegExp r = new RegExp(); r.kind = Kind.REGEXP_CHAR_RANGE; r.From = from; r.To = to; return(r); }
public void TestSpecialCase3() { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); }
public void TestSpecialCase() { RegExp re = new RegExp(".?"); Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); // make sure character dfa accepts empty string Assert.IsTrue(cra.IsAccept(cra.InitialState)); Assert.IsTrue(cra.Run("")); Assert.IsTrue(cra.Run(new char[0], 0, 0)); // make sure byte dfa accepts empty string Assert.IsTrue(bra.IsAccept(bra.InitialState)); Assert.IsTrue(bra.Run(new byte[0], 0, 0)); }
private static RegExp MakeString(RegExp exp1, RegExp exp2) { StringBuilder b = new StringBuilder(); if (exp1.kind == Kind.REGEXP_STRING) { b.Append(exp1.s); } else { b.AppendCodePoint(exp1.c); } if (exp2.kind == Kind.REGEXP_STRING) { b.Append(exp2.s); } else { b.AppendCodePoint(exp2.c); } return(MakeString(b.ToString())); }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random()); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception t) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw t; } } } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); RandomAcceptedStrings rx = new RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception /*t*/) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } } }
internal DumbRegexpQuery(Term term, RegExpSyntax flags) : base(term.Field) { RegExp re = new RegExp(term.Text, flags); automaton = re.ToAutomaton(); }
internal RegExp ParseRepeatExp() { RegExp e = ParseComplExp(); while (Peek("?*+{")) { if (Match('?')) { e = MakeOptional(e); } else if (Match('*')) { e = MakeRepeat(e); } else if (Match('+')) { e = MakeRepeat(e, 1); } else if (Match('{')) { int start = Pos; while (Peek("0123456789")) { Next(); } if (start == Pos) { throw new System.ArgumentException("integer expected at position " + Pos); } int n = Convert.ToInt32(b.Substring(start, Pos - start)); int m = -1; if (Match(',')) { start = Pos; while (Peek("0123456789")) { Next(); } if (start != Pos) { m = Convert.ToInt32(b.Substring(start, Pos - start)); } } else { m = n; } if (!Match('}')) { throw new System.ArgumentException("expected '}' at position " + Pos); } if (m == -1) { e = MakeRepeat(e, n); } else { e = MakeRepeat(e, n, m); } } } return(e); }
internal RegExp ParseRepeatExp() { RegExp e = ParseComplExp(); while (Peek("?*+{")) { if (Match('?')) { e = MakeOptional(e); } else if (Match('*')) { e = MakeRepeat(e); } else if (Match('+')) { e = MakeRepeat(e, 1); } else if (Match('{')) { int start = pos; while (Peek("0123456789")) { Next(); } if (start == pos) { throw new ArgumentException("integer expected at position " + pos); } // LUCENENET: Optimized so we don't allocate a substring during the parse int n = Integer.Parse(b, start, pos - start, radix: 10); int m = -1; if (Match(',')) { start = pos; while (Peek("0123456789")) { Next(); } if (start != pos) { // LUCENENET: Optimized so we don't allocate a substring during the parse m = Integer.Parse(b, start, pos - start, radix: 10); } } else { m = n; } if (!Match('}')) { throw new ArgumentException("expected '}' at position " + pos); } if (m == -1) { e = MakeRepeat(e, n); } else { e = MakeRepeat(e, n, m); } } } return(e); }
internal RegExp ParseSimpleExp() { if (Match('.')) { return(MakeAnyChar()); } else if (Check(EMPTY) && Match('#')) { return(MakeEmpty()); } else if (Check(ANYSTRING) && Match('@')) { return(MakeAnyString()); } else if (Match('"')) { int start = Pos; while (More() && !Peek("\"")) { Next(); } if (!Match('"')) { throw new System.ArgumentException("expected '\"' at position " + Pos); } return(MakeString(b.Substring(start, Pos - 1 - start))); } else if (Match('(')) { if (Match(')')) { return(MakeString("")); } RegExp e = ParseUnionExp(); if (!Match(')')) { throw new System.ArgumentException("expected ')' at position " + Pos); } return(e); } else if ((Check(AUTOMATON) || Check(INTERVAL)) && Match('<')) { int start = Pos; while (More() && !Peek(">")) { Next(); } if (!Match('>')) { throw new System.ArgumentException("expected '>' at position " + Pos); } string s = b.Substring(start, Pos - 1 - start); int i = s.IndexOf('-'); if (i == -1) { if (!Check(AUTOMATON)) { throw new System.ArgumentException("interval syntax error at position " + (Pos - 1)); } return(MakeAutomaton(s)); } else { if (!Check(INTERVAL)) { throw new System.ArgumentException("illegal identifier at position " + (Pos - 1)); } try { if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-')) { throw new System.FormatException(); } string smin = s.Substring(0, i); string smax = s.Substring(i + 1, s.Length - (i + 1)); int imin = Convert.ToInt32(smin); int imax = Convert.ToInt32(smax); int digits; if (smin.Length == smax.Length) { digits = smin.Length; } else { digits = 0; } if (imin > imax) { int t = imin; imin = imax; imax = t; } return(MakeInterval(imin, imax, digits)); } catch (System.FormatException e) { throw new System.ArgumentException("interval syntax error at position " + (Pos - 1)); } } } else { return(MakeChar(ParseCharExp())); } }
internal RegExp ParseSimpleExp() { if (Match('.')) { return(MakeAnyChar()); } else if (Check(RegExpSyntax.EMPTY) && Match('#')) { return(MakeEmpty()); } else if (Check(RegExpSyntax.ANYSTRING) && Match('@')) { return(MakeAnyString()); } else if (Match('"')) { int start = pos; while (More() && !Peek("\"")) { Next(); } if (!Match('"')) { throw new ArgumentException("expected '\"' at position " + pos); } return(MakeString(b.Substring(start, pos - 1 - start))); } else if (Match('(')) { if (Match(')')) { return(MakeString("")); } RegExp e = ParseUnionExp(); if (!Match(')')) { throw new ArgumentException("expected ')' at position " + pos); } return(e); } else if ((Check(RegExpSyntax.AUTOMATON) || Check(RegExpSyntax.INTERVAL)) && Match('<')) { int start = pos; while (More() && !Peek(">")) { Next(); } if (!Match('>')) { throw new ArgumentException("expected '>' at position " + pos); } string s = b.Substring(start, pos - 1 - start); int i = s.IndexOf('-'); if (i == -1) { if (!Check(RegExpSyntax.AUTOMATON)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } return(MakeAutomaton(s)); } else { if (!Check(RegExpSyntax.INTERVAL)) { throw new ArgumentException("illegal identifier at position " + (pos - 1)); } // LUCENENET: Refactored so we don't throw exceptions in the normal flow if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-')) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } string smin = s.Substring(0, i); string smax = s.Substring(i + 1, s.Length - (i + 1)); if (!int.TryParse(smin, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imin) || !int.TryParse(smax, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imax)) { throw new ArgumentException("interval syntax error at position " + (pos - 1)); } int digits; if (smin.Length == smax.Length) { digits = smin.Length; } else { digits = 0; } if (imin > imax) { int t = imin; imin = imax; imax = t; } return(MakeInterval(imin, imax, digits)); } } else { return(MakeChar(ParseCharExp())); } }