Пример #1
0
        private static RegExp MakeIntersection(RegExp exp1, RegExp exp2)
        {
            var r = new RegExp();

            r.kind = Kind.RegexpIntersection;
            r.exp1 = exp1;
            r.exp2 = exp2;
            return(r);
        }
Пример #2
0
        private static RegExp MakeRepeat(RegExp exp, int min)
        {
            var r = new RegExp();

            r.kind = Kind.RegexpRepeatMin;
            r.exp1 = exp;
            r.min  = min;
            return(r);
        }
Пример #3
0
        private RegExp ParseComplExp()
        {
            if (this.Check(RegExpSyntaxOptions.Complement) && this.Match('~'))
            {
                return(RegExp.MakeComplement(this.ParseComplExp()));
            }

            return(this.ParseCharClassExp());
        }
Пример #4
0
        private static RegExp MakeCharRange(char from, char to)
        {
            var r = new RegExp();

            r.kind = Kind.RegexpCharRange;
            r.from = from;
            r.to   = to;
            return(r);
        }
Пример #5
0
        private static RegExp MakeRepeat(RegExp exp, int min, int max)
        {
            var r = new RegExp();

            r.Kind  = Kind.RegexpRepeatMinMax;
            r.Expr1 = exp;
            r.Min   = min;
            r.Max   = max;
            return(r);
        }
Пример #6
0
        private static RegExp MakeInterval(int min, int max, int digits)
        {
            var r = new RegExp();

            r.kind   = Kind.RegexpInterval;
            r.min    = min;
            r.max    = max;
            r.digits = digits;
            return(r);
        }
Пример #7
0
        private RegExp ParseCharClasses()
        {
            RegExp e = this.ParseCharClass();

            while (this.More() && !this.Peek("]"))
            {
                e = RegExp.MakeUnion(e, this.ParseCharClass());
            }

            return(e);
        }
Пример #8
0
        private RegExp ParseConcatExp()
        {
            RegExp e = this.ParseRepeatExp();

            if (this.More() && !this.Peek(")|") && (!this.Check(RegExpSyntaxOptions.Intersection) || !this.Peek("&")))
            {
                e = RegExp.MakeConcatenation(e, this.ParseConcatExp());
            }

            return(e);
        }
Пример #9
0
        private RegExp ParseUnionExp()
        {
            RegExp e = this.ParseInterExp();

            if (this.Match('|'))
            {
                e = RegExp.MakeUnion(e, this.ParseUnionExp());
            }

            return(e);
        }
Пример #10
0
        private RegExp ParseInterExp()
        {
            RegExp e = this.ParseConcatExp();

            if (this.Check(RegExpSyntaxOptions.Intersection) && this.Match('&'))
            {
                e = RegExp.MakeIntersection(e, this.ParseInterExp());
            }

            return(e);
        }
Пример #11
0
        public void DumpRegEx()
        {
            string rawRegEx = @"\d{3}\d3}";

            Fare.RegExp e = new Fare.RegExp(rawRegEx);

            _testOutput.WriteLine(e.ToString());


            // fail test to get output
            Assert.True(false);
        }
Пример #12
0
        public void DumpDot()
        {
            string rawRegEx = @"\d{3}-\d{2}-(a|q)J";

            rawRegEx = @"0x[0-9A-Fa-f]{4}";

            Fare.RegExp e = new Fare.RegExp(rawRegEx);
            var         a = e.ToAutomaton();

            _testOutput.WriteLine($"/* {rawRegEx} */");
            _testOutput.WriteLine(Fare.DotFormatter.ToDot(a));

            // fail test to get output
            Assert.True(false);
        }
Пример #13
0
        private RegExp ParseCharClass()
        {
            char @char = this.ParseCharExp();

            if (this.Match('-'))
            {
                if (this.Peek("]"))
                {
                    return(RegExp.MakeUnion(RegExp.MakeChar(@char), RegExp.MakeChar('-')));
                }

                return(RegExp.MakeCharRange(@char, this.ParseCharExp()));
            }

            return(RegExp.MakeChar(@char));
        }
Пример #14
0
 private void FindLeaves(
     RegExp exp,
     Kind regExpKind,
     IList <Automaton> list,
     IDictionary <String, Automaton> automata,
     IAutomatonProvider automatonProvider,
     bool minimize)
 {
     if (exp.kind == regExpKind)
     {
         this.FindLeaves(exp.exp1, regExpKind, list, automata, automatonProvider, minimize);
         this.FindLeaves(exp.exp2, regExpKind, list, automata, automatonProvider, minimize);
     }
     else
     {
         list.Add(exp.ToAutomaton(automata, automatonProvider, minimize));
     }
 }
Пример #15
0
        private Automaton ToAutomatonAllowMutate(
            IDictionary <string, Automaton> automata,
            IAutomatonProvider automatonProvider,
            bool minimize)
        {
            bool @bool = false;

            if (allowMutation)
            {
                @bool = RegExp.SetAllowMutate(true); // This is not thead safe.
            }

            Automaton a = this.ToAutomaton(automata, automatonProvider, minimize);

            if (allowMutation)
            {
                RegExp.SetAllowMutate(@bool);
            }

            return(a);
        }
Пример #16
0
        public void DumpAST()
        {
            string rawRegEx = @"\d{3}\d3}";

            Fare.RegExp e = new Fare.RegExp(rawRegEx);
            var         a = e.ToAutomaton();

            _testOutput.WriteLine($"expr: {rawRegEx} has {a.NumberOfStates} states and {a.NumberOfTransitions} transistions");

            foreach (var state in a.GetStates())
            {
                _testOutput.WriteLine($"\tState (Id:{state.Id}, Number:{state.Number}, Accept: {state.Accept}) has {state.Transitions.Count} transitions");
                foreach (var trans in state.GetSortedTransitions(false))
                {
                    _testOutput.WriteLine($"\t\tTransition to: {trans.To.Id} [{trans.Min} - {trans.Max}]");
                }
            }

            // fail test to get output
            Assert.True(false);
        }
Пример #17
0
        private static RegExp MakeString(RegExp exp1, RegExp exp2)
        {
            var sb = new StringBuilder();

            if (exp1.Kind == Kind.RegexpString)
            {
                sb.Append(exp1.SourceRegExpr);
            }
            else
            {
                sb.Append(exp1.Char);
            }

            if (exp2.Kind == Kind.RegexpString)
            {
                sb.Append(exp2.SourceRegExpr);
            }
            else
            {
                sb.Append(exp2.Char);
            }

            return(RegExp.MakeString(sb.ToString()));
        }
Пример #18
0
        /// <summary>
        /// Initializes a new instance of the <see cref="Xeger"/> class.
        /// </summary>
        /// <param name="regex">The regex.</param>
        /// <param name="random">The random.</param>
        /// <param name="anyCharAlphabet">The list of characters used for computing the possible values for classes "." "\s", "\d", "\w" (and "\S", "\D", "\W"). It does not check explicitly defined chars in regexp.</param>
        public Xeger(string regex, Random random, string anyCharAlphabet = null)
        {
            if (string.IsNullOrEmpty(regex))
            {
                throw new ArgumentNullException("regex");
            }

            if (random == null)
            {
                throw new ArgumentNullException("random");
            }

            if (anyCharAlphabet != null)
            {
                this.anyCharAlphabet = anyCharAlphabet;
            }

            regex = RemoveStartEndMarkers(regex);
            var rx = new RegExp(regex, anyCharAlphabet, AllExceptAnyString);

            this.UsedAlphabet = rx.UsedAlphabet();
            this.automaton    = rx.ToAutomaton();
            this.random       = random;
        }
Пример #19
0
        private static RegExp MakeString(RegExp exp1, RegExp exp2)
        {
            var sb = new StringBuilder();

            if (exp1.kind == Kind.RegexpString)
            {
                sb.Append(exp1.s);
            }
            else
            {
                sb.Append(exp1.c);
            }

            if (exp2.kind == Kind.RegexpString)
            {
                sb.Append(exp2.s);
            }
            else
            {
                sb.Append(exp2.c);
            }

            return(RegExp.MakeString(sb.ToString()));
        }
Пример #20
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(RegExp.MakeAnyChar());
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + this.pos);
                }

                return(RegExp.MakeString(this.b.Substring(start, ((this.pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + this.pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + this.pos);
                }

                string str = this.b.Substring(start, ((this.pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (this.pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, CultureInfo.CurrentCulture);
                    int    imax      = int.Parse(smax, CultureInfo.CurrentCulture);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
Пример #21
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar(this.anyCharAlphabet));
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin);
                    int    imax      = int.Parse(smax);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? digitChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsDigit).ToArray()))
                        : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? whitespaceChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsWhiteSpace).ToArray()))
                        : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? wordChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray()
                                                                   .Where(x => char.IsLetter(x) || char.IsDigit(x) || x == '_').ToArray()))
                        : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
Пример #22
0
        private static RegExp MakeAnyFromCharset(string charset)
        {
            var rmajor = new RegExp($"[{charset}]", null);

            return(rmajor);
        }
Пример #23
0
 private static RegExp ExcludeChars(RegExp exclusion, RegExp allChars)
 {
     return(MakeIntersection(allChars, MakeComplement(exclusion)));
 }
Пример #24
0
        private RegExp ParseRepeatExp()
        {
            RegExp e = this.ParseComplExp();

            while (this.Peek("?*+{"))
            {
                if (this.Match('?'))
                {
                    e = RegExp.MakeOptional(e);
                }
                else if (this.Match('*'))
                {
                    e = RegExp.MakeRepeat(e);
                }
                else if (this.Match('+'))
                {
                    e = RegExp.MakeRepeat(e, 1);
                }
                else if (this.Match('{'))
                {
                    int start = pos;
                    while (this.Peek("0123456789"))
                    {
                        this.Next();
                    }

                    if (start == pos)
                    {
                        throw new ArgumentException("integer expected at position " + pos);
                    }

                    int n = int.Parse(b.Substring(start, pos - start));
                    int m = -1;
                    if (this.Match(','))
                    {
                        start = pos;
                        while (this.Peek("0123456789"))
                        {
                            this.Next();
                        }

                        if (start != pos)
                        {
                            m = int.Parse(b.Substring(start, pos - start));
                        }
                    }
                    else
                    {
                        m = n;
                    }

                    if (!this.Match('}'))
                    {
                        throw new ArgumentException("expected '}' at position " + pos);
                    }

                    e = m == -1 ? RegExp.MakeRepeat(e, n) : RegExp.MakeRepeat(e, n, m);
                }
            }

            return(e);
        }
Пример #25
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar());
            }

            /* Issue 32, https://github.com/moodmosaic/Fare/issues/32
             *   The intent of the original code is a little unclear.  The comment for the Empty value in the
             *   enum is 'Enables empty language.'  Using '#' as token seems non-standard, and caused
             *   unhandled exception in some cases.  The best option at this point is to remove handling of the
             *   Empty option until a proper implementation is proposed.
             */
            // if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            // {
            //     return RegExp.MakeEmpty();
            // }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    imax      = int.Parse(smax, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion ? digitChars : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar()));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    // Do not add line breaks, as usually RegExp is single line.
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion ? whitespaceChars : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar()));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);

                    return(inclusion ? wordChars : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar()));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }