C# (CSharp) Lucene.Net.Util.Automaton RegExp Examples

Programming Language: C# (CSharp)

Namespace/Package Name: Lucene.Net.Util.Automaton

Class/Type: RegExp

Examples at hotexamples.com: 26

C# (CSharp) Lucene.Net.Util.Automaton RegExp - 26 examples found. These are the top rated real world C# (CSharp) examples of Lucene.Net.Util.Automaton.RegExp extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ToAutomaton(8)

ToStringBuilder(2)

GetIdentifiers(1)

Regular Expression extension to Automaton.

Regular expressions are built from the following abstract syntax:

regexp	::=	unionexp
	\|
unionexp	::=	interexp `\|` unionexp	(union)
	\|	interexp
interexp	::=	concatexp `&` interexp	(intersection)	[OPTIONAL]
	\|	concatexp
concatexp	::=	repeatexp concatexp	(concatenation)
	\|	repeatexp
repeatexp	::=	repeatexp `?`	(zero or one occurrence)
	\|	repeatexp `*`	(zero or more occurrences)
	\|	repeatexp `+`	(one or more occurrences)
	\|	repeatexp `{n}`	(`n` occurrences)
	\|	repeatexp `{n,}`	(`n` or more occurrences)
	\|	repeatexp `{n,m}`	(`n` to `m` occurrences, including both)
	\|	complexp
complexp	::=	`~` complexp	(complement)	[OPTIONAL]
	\|	charclassexp
charclassexp	::=	`[` charclasses `]`	(character class)
	\|	`[^` charclasses `]`	(negated character class)
	\|	simpleexp
charclasses	::=	charclass charclasses
	\|	charclass
charclass	::=	charexp `-` charexp	(character range, including end-points)
	\|	charexp
simpleexp	::=	charexp
	\|	`.`	(any single character)
	\|	`#`	(the empty language)	[OPTIONAL]
	\|	`@`	(any string)	[OPTIONAL]
	\|	`"` <Unicode string without double-quotes> `"`	(a string)
	\|	`(` `)`	(the empty string)
	\|	`(` unionexp `)`	(precedence override)
	\|	`<` <identifier> `>`	(named automaton)	[OPTIONAL]
	\|	`<n-m>`	(numerical interval)	[OPTIONAL]
charexp	::=	<Unicode character>	(a single non-reserved character)
	\|	`\` <Unicode character>	(a single character)

The productions marked [OPTIONAL] are only allowed if specified by the syntax flags passed to the RegExp constructor. The reserved characters used in the (enabled) syntax must be escaped with backslash (\) or double-quotes ("..."). (In contrast to other regexp syntaxes, this is required also in character classes.) Be aware that dash (-) has a special meaning in charclass expressions. An identifier is a string not containing right angle bracket (>) or dash (-). Numerical intervals are specified by non-negative decimal integers and include both end points, and if n and m have the same number of digits, then the conforming strings must have that length (i.e. prefixed by 0's). @lucene.experimental

RegExp Class Documentation

Example #1

Show file

File: RegExp.cs Project: freemsly/lucenenet

        internal static RegExp MakeRepeat(RegExp exp, int min)
        {
            RegExp r = new RegExp();

            r.kind = Kind.REGEXP_REPEAT_MIN;
            r.Exp1 = exp;
            r.Min  = min;
            return(r);
        }

Example #2

Show file

 internal static RegExp MakeUnion(RegExp exp1, RegExp exp2)
 {
     return(new RegExp
     {
         kind = Kind.REGEXP_UNION,
         exp1 = exp1,
         exp2 = exp2
     });
 }

Example #3

Show file

File: RegExp.cs Project: freemsly/lucenenet

        internal static RegExp MakeIntersection(RegExp exp1, RegExp exp2)
        {
            RegExp r = new RegExp();

            r.kind = Kind.REGEXP_INTERSECTION;
            r.Exp1 = exp1;
            r.Exp2 = exp2;
            return(r);
        }

Example #4

Show file

File: RegExp.cs Project: freemsly/lucenenet

        internal static RegExp MakeUnion(RegExp exp1, RegExp exp2)
        {
            RegExp r = new RegExp();

            r.kind = Kind.REGEXP_UNION;
            r.Exp1 = exp1;
            r.Exp2 = exp2;
            return(r);
        }

Example #5

Show file

 internal static RegExp MakeRepeat(RegExp exp, int min)
 {
     return(new RegExp
     {
         kind = Kind.REGEXP_REPEAT_MIN,
         exp1 = exp,
         min = min
     });
 }

Example #6

Show file

 internal static RegExp MakeIntersection(RegExp exp1, RegExp exp2)
 {
     return(new RegExp
     {
         kind = Kind.REGEXP_INTERSECTION,
         exp1 = exp1,
         exp2 = exp2
     });
 }

Example #7

Show file

 internal static RegExp MakeRepeat(RegExp exp, int min, int max)
 {
     return(new RegExp
     {
         kind = Kind.REGEXP_REPEAT_MINMAX,
         exp1 = exp,
         min = min,
         max = max
     });
 }

Example #8

Show file

        internal static RegExp MakeInterval(int min, int max, int digits)
        {
            RegExp r = new RegExp();

            r.kind   = Kind.REGEXP_INTERVAL;
            r.Min    = min;
            r.Max    = max;
            r.Digits = digits;
            return(r);
        }

Example #9

Show file

        internal RegExp ParseInterExp()
        {
            RegExp e = ParseConcatExp();

            if (Check(RegExpSyntax.INTERSECTION) && Match('&'))
            {
                e = MakeIntersection(e, ParseInterExp());
            }
            return(e);
        }

Example #10

Show file

        internal RegExp ParseConcatExp()
        {
            RegExp e = ParseRepeatExp();

            if (More() && !Peek(")|") && (!Check(RegExpSyntax.INTERSECTION) || !Peek("&")))
            {
                e = MakeConcatenation(e, ParseConcatExp());
            }
            return(e);
        }

Example #11

Show file

        internal RegExp ParseCharClasses()
        {
            RegExp e = ParseCharClass();

            while (More() && !Peek("]"))
            {
                e = MakeUnion(e, ParseCharClass());
            }
            return(e);
        }

Example #12

Show file

        internal RegExp ParseUnionExp()
        {
            RegExp e = ParseInterExp();

            if (Match('|'))
            {
                e = MakeUnion(e, ParseUnionExp());
            }
            return(e);
        }

Example #13

Show file

        internal static RegExp MakeRepeat(RegExp exp, int min, int max)
        {
            RegExp r = new RegExp();

            r.kind = Kind.REGEXP_REPEAT_MINMAX;
            r.exp1 = exp;
            r.min  = min;
            r.max  = max;
            return(r);
        }

Example #14

Show file

 private void FindLeaves(RegExp exp, Kind kind, IList <Automaton> list, IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
 {
     if (exp.kind == kind)
     {
         FindLeaves(exp.exp1, kind, list, automata, automaton_provider);
         FindLeaves(exp.exp2, kind, list, automata, automaton_provider);
     }
     else
     {
         list.Add(exp.ToAutomaton(automata, automaton_provider));
     }
 }

Example #15

Show file

        public void TestSpecialCase2()
        {
            RegExp                re        = new RegExp(".+\u0775");
            string                input     = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails!
        }

Example #16

Show file

        internal static RegExp MakeCharRange(int from, int to)
        {
            if (from > to)
            {
                throw new System.ArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")");
            }
            RegExp r = new RegExp();

            r.kind = Kind.REGEXP_CHAR_RANGE;
            r.From = from;
            r.To   = to;
            return(r);
        }

Example #17

Show file

        public void TestSpecialCase3()
        {
            RegExp                re        = new RegExp("(\\鯺)*(.)*\\Ӕ");
            string                input     = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length));
        }

Example #18

Show file

File: TestUTF32ToUTF8.cs Project: freemsly/lucenenet

        public void TestSpecialCase()
        {
            RegExp                re        = new RegExp(".?");
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            // make sure character dfa accepts empty string
            Assert.IsTrue(cra.IsAccept(cra.InitialState));
            Assert.IsTrue(cra.Run(""));
            Assert.IsTrue(cra.Run(new char[0], 0, 0));

            // make sure byte dfa accepts empty string
            Assert.IsTrue(bra.IsAccept(bra.InitialState));
            Assert.IsTrue(bra.Run(new byte[0], 0, 0));
        }

Example #19

Show file

        private static RegExp MakeString(RegExp exp1, RegExp exp2)
        {
            StringBuilder b = new StringBuilder();

            if (exp1.kind == Kind.REGEXP_STRING)
            {
                b.Append(exp1.s);
            }
            else
            {
                b.AppendCodePoint(exp1.c);
            }
            if (exp2.kind == Kind.REGEXP_STRING)
            {
                b.Append(exp2.s);
            }
            else
            {
                b.AppendCodePoint(exp2.c);
            }
            return(MakeString(b.ToString()));
        }

Example #20

Show file

        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }

Example #21

Show file

File: TestBasicOperations.cs Project: zalintyre/lucenenet

        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                RandomAcceptedStrings rx = new RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random);
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception /*t*/)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
            }
        }

Example #22

Show file

 internal DumbRegexpQuery(Term term, RegExpSyntax flags)
     : base(term.Field)
 {
     RegExp re = new RegExp(term.Text, flags);
     automaton = re.ToAutomaton();
 }

Example #23

Show file

        internal RegExp ParseRepeatExp()
        {
            RegExp e = ParseComplExp();

            while (Peek("?*+{"))
            {
                if (Match('?'))
                {
                    e = MakeOptional(e);
                }
                else if (Match('*'))
                {
                    e = MakeRepeat(e);
                }
                else if (Match('+'))
                {
                    e = MakeRepeat(e, 1);
                }
                else if (Match('{'))
                {
                    int start = Pos;
                    while (Peek("0123456789"))
                    {
                        Next();
                    }
                    if (start == Pos)
                    {
                        throw new System.ArgumentException("integer expected at position " + Pos);
                    }
                    int n = Convert.ToInt32(b.Substring(start, Pos - start));
                    int m = -1;
                    if (Match(','))
                    {
                        start = Pos;
                        while (Peek("0123456789"))
                        {
                            Next();
                        }
                        if (start != Pos)
                        {
                            m = Convert.ToInt32(b.Substring(start, Pos - start));
                        }
                    }
                    else
                    {
                        m = n;
                    }
                    if (!Match('}'))
                    {
                        throw new System.ArgumentException("expected '}' at position " + Pos);
                    }
                    if (m == -1)
                    {
                        e = MakeRepeat(e, n);
                    }
                    else
                    {
                        e = MakeRepeat(e, n, m);
                    }
                }
            }
            return(e);
        }

Example #24

Show file

        internal RegExp ParseRepeatExp()
        {
            RegExp e = ParseComplExp();

            while (Peek("?*+{"))
            {
                if (Match('?'))
                {
                    e = MakeOptional(e);
                }
                else if (Match('*'))
                {
                    e = MakeRepeat(e);
                }
                else if (Match('+'))
                {
                    e = MakeRepeat(e, 1);
                }
                else if (Match('{'))
                {
                    int start = pos;
                    while (Peek("0123456789"))
                    {
                        Next();
                    }
                    if (start == pos)
                    {
                        throw new ArgumentException("integer expected at position " + pos);
                    }
                    // LUCENENET: Optimized so we don't allocate a substring during the parse
                    int n = Integer.Parse(b, start, pos - start, radix: 10);
                    int m = -1;
                    if (Match(','))
                    {
                        start = pos;
                        while (Peek("0123456789"))
                        {
                            Next();
                        }
                        if (start != pos)
                        {
                            // LUCENENET: Optimized so we don't allocate a substring during the parse
                            m = Integer.Parse(b, start, pos - start, radix: 10);
                        }
                    }
                    else
                    {
                        m = n;
                    }
                    if (!Match('}'))
                    {
                        throw new ArgumentException("expected '}' at position " + pos);
                    }
                    if (m == -1)
                    {
                        e = MakeRepeat(e, n);
                    }
                    else
                    {
                        e = MakeRepeat(e, n, m);
                    }
                }
            }
            return(e);
        }

Example #25

Show file

 internal RegExp ParseSimpleExp()
 {
     if (Match('.'))
     {
         return(MakeAnyChar());
     }
     else if (Check(EMPTY) && Match('#'))
     {
         return(MakeEmpty());
     }
     else if (Check(ANYSTRING) && Match('@'))
     {
         return(MakeAnyString());
     }
     else if (Match('"'))
     {
         int start = Pos;
         while (More() && !Peek("\""))
         {
             Next();
         }
         if (!Match('"'))
         {
             throw new System.ArgumentException("expected '\"' at position " + Pos);
         }
         return(MakeString(b.Substring(start, Pos - 1 - start)));
     }
     else if (Match('('))
     {
         if (Match(')'))
         {
             return(MakeString(""));
         }
         RegExp e = ParseUnionExp();
         if (!Match(')'))
         {
             throw new System.ArgumentException("expected ')' at position " + Pos);
         }
         return(e);
     }
     else if ((Check(AUTOMATON) || Check(INTERVAL)) && Match('<'))
     {
         int start = Pos;
         while (More() && !Peek(">"))
         {
             Next();
         }
         if (!Match('>'))
         {
             throw new System.ArgumentException("expected '>' at position " + Pos);
         }
         string s = b.Substring(start, Pos - 1 - start);
         int    i = s.IndexOf('-');
         if (i == -1)
         {
             if (!Check(AUTOMATON))
             {
                 throw new System.ArgumentException("interval syntax error at position " + (Pos - 1));
             }
             return(MakeAutomaton(s));
         }
         else
         {
             if (!Check(INTERVAL))
             {
                 throw new System.ArgumentException("illegal identifier at position " + (Pos - 1));
             }
             try
             {
                 if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-'))
                 {
                     throw new System.FormatException();
                 }
                 string smin = s.Substring(0, i);
                 string smax = s.Substring(i + 1, s.Length - (i + 1));
                 int    imin = Convert.ToInt32(smin);
                 int    imax = Convert.ToInt32(smax);
                 int    digits;
                 if (smin.Length == smax.Length)
                 {
                     digits = smin.Length;
                 }
                 else
                 {
                     digits = 0;
                 }
                 if (imin > imax)
                 {
                     int t = imin;
                     imin = imax;
                     imax = t;
                 }
                 return(MakeInterval(imin, imax, digits));
             }
             catch (System.FormatException e)
             {
                 throw new System.ArgumentException("interval syntax error at position " + (Pos - 1));
             }
         }
     }
     else
     {
         return(MakeChar(ParseCharExp()));
     }
 }

Example #26

Show file

        internal RegExp ParseSimpleExp()
        {
            if (Match('.'))
            {
                return(MakeAnyChar());
            }
            else if (Check(RegExpSyntax.EMPTY) && Match('#'))
            {
                return(MakeEmpty());
            }
            else if (Check(RegExpSyntax.ANYSTRING) && Match('@'))
            {
                return(MakeAnyString());
            }
            else if (Match('"'))
            {
                int start = pos;
                while (More() && !Peek("\""))
                {
                    Next();
                }
                if (!Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }
                return(MakeString(b.Substring(start, pos - 1 - start)));
            }
            else if (Match('('))
            {
                if (Match(')'))
                {
                    return(MakeString(""));
                }
                RegExp e = ParseUnionExp();
                if (!Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }
                return(e);
            }
            else if ((Check(RegExpSyntax.AUTOMATON) || Check(RegExpSyntax.INTERVAL)) && Match('<'))
            {
                int start = pos;
                while (More() && !Peek(">"))
                {
                    Next();
                }
                if (!Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }
                string s = b.Substring(start, pos - 1 - start);
                int    i = s.IndexOf('-');
                if (i == -1)
                {
                    if (!Check(RegExpSyntax.AUTOMATON))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }
                    return(MakeAutomaton(s));
                }
                else
                {
                    if (!Check(RegExpSyntax.INTERVAL))
                    {
                        throw new ArgumentException("illegal identifier at position " + (pos - 1));
                    }

                    // LUCENENET: Refactored so we don't throw exceptions in the normal flow
                    if (i == 0 || i == s.Length - 1 || i != s.LastIndexOf('-'))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }
                    string smin = s.Substring(0, i);
                    string smax = s.Substring(i + 1, s.Length - (i + 1));

                    if (!int.TryParse(smin, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imin) ||
                        !int.TryParse(smax, NumberStyles.Integer, CultureInfo.InvariantCulture, out int imax))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    int digits;
                    if (smin.Length == smax.Length)
                    {
                        digits = smin.Length;
                    }
                    else
                    {
                        digits = 0;
                    }
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }
                    return(MakeInterval(imin, imax, digits));
                }
            }
            else
            {
                return(MakeChar(ParseCharExp()));
            }
        }