Example #1
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar());
            }

            /* Issue 32, https://github.com/moodmosaic/Fare/issues/32
             *   The intent of the original code is a little unclear.  The comment for the Empty value in the
             *   enum is 'Enables empty language.'  Using '#' as token seems non-standard, and caused
             *   unhandled exception in some cases.  The best option at this point is to remove handling of the
             *   Empty option until a proper implementation is proposed.
             */
            // if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            // {
            //     return RegExp.MakeEmpty();
            // }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    imax      = int.Parse(smax, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion ? digitChars : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar()));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    // Do not add line breaks, as usually RegExp is single line.
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion ? whitespaceChars : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar()));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);

                    return(inclusion ? wordChars : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar()));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
Example #2
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar(this.anyCharAlphabet));
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin);
                    int    imax      = int.Parse(smax);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? digitChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsDigit).ToArray()))
                        : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? whitespaceChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsWhiteSpace).ToArray()))
                        : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? wordChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray()
                                                                   .Where(x => char.IsLetter(x) || char.IsDigit(x) || x == '_').ToArray()))
                        : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
Example #3
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(RegExp.MakeAnyChar());
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + this.pos);
                }

                return(RegExp.MakeString(this.b.Substring(start, ((this.pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + this.pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + this.pos);
                }

                string str = this.b.Substring(start, ((this.pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (this.pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, CultureInfo.CurrentCulture);
                    int    imax      = int.Parse(smax, CultureInfo.CurrentCulture);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }