示例#1
0
        /// <summary>
        ///   Initializes a new instance of the <see cref = "RegExp" /> class from a string.
        /// </summary>
        /// <param name = "s">A string with the regular expression.</param>
        /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param>
        public RegExp(string s, RegExpSyntaxOptions syntaxFlags)
        {
            this.b     = s;
            this.flags = syntaxFlags;
            RegExp e;

            if (s.Length == 0)
            {
                e = RegExp.MakeString(string.Empty);
            }
            else
            {
                e = this.ParseUnionExp();
                if (this.pos < b.Length)
                {
                    throw new ArgumentException("end-of-string expected at position " + this.pos);
                }
            }

            this.Kind          = e.Kind;
            this.Expr1         = e.Expr1;
            this.Expr2         = e.Expr2;
            this.SourceRegExpr = e.SourceRegExpr;
            this.Char          = e.Char;
            this.Min           = e.Min;
            this.Max           = e.Max;
            this.Digits        = e.Digits;
            this.FromChar      = e.FromChar;
            this.ToChar        = e.ToChar;
            this.b             = null;
        }
示例#2
0
文件: RegExp.cs 项目: Licho1/Fare
        /// <summary>
        ///   Initializes a new instance of the <see cref = "RegExp" /> class from a string.
        /// </summary>
        /// <param name = "s">A string with the regular expression.</param>
        /// <param name = "anyCharAlphabet">A string with the regular expression.</param>
        /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param>
        public RegExp(string s, string anyCharAlphabet, RegExpSyntaxOptions syntaxFlags)
        {
            this.b     = s;
            this.flags = syntaxFlags;
            RegExp e;

            if (anyCharAlphabet != null)
            {
                this.anyCharAlphabet = anyCharAlphabet;
            }
            if (s.Length == 0)
            {
                e = RegExp.MakeString(string.Empty);
            }
            else
            {
                e = this.ParseUnionExp();
                if (this.pos < b.Length)
                {
                    throw new ArgumentException("end-of-string expected at position " + this.pos);
                }
            }

            this.kind   = e.kind;
            this.exp1   = e.exp1;
            this.exp2   = e.exp2;
            this.s      = e.s;
            this.c      = e.c;
            this.min    = e.min;
            this.max    = e.max;
            this.digits = e.digits;
            this.from   = e.from;
            this.to     = e.to;
            this.b      = null;
        }
示例#3
0
文件: RegExp.cs 项目: Licho1/Fare
        private static RegExp MakeConcatenation(RegExp exp1, RegExp exp2)
        {
            if ((exp1.kind == Kind.RegexpChar || exp1.kind == Kind.RegexpString) &&
                (exp2.kind == Kind.RegexpChar || exp2.kind == Kind.RegexpString))
            {
                return(RegExp.MakeString(exp1, exp2));
            }

            var r = new RegExp();

            r.kind = Kind.RegexpConcatenation;
            if (exp1.kind == Kind.RegexpConcatenation &&
                (exp1.exp2.kind == Kind.RegexpChar || exp1.exp2.kind == Kind.RegexpString) &&
                (exp2.kind == Kind.RegexpChar || exp2.kind == Kind.RegexpString))
            {
                r.exp1 = exp1.exp1;
                r.exp2 = RegExp.MakeString(exp1.exp2, exp2);
            }
            else if ((exp1.kind == Kind.RegexpChar || exp1.kind == Kind.RegexpString) &&
                     exp2.kind == Kind.RegexpConcatenation &&
                     (exp2.exp1.kind == Kind.RegexpChar || exp2.exp1.kind == Kind.RegexpString))
            {
                r.exp1 = RegExp.MakeString(exp1, exp2.exp1);
                r.exp2 = exp2.exp2;
            }
            else
            {
                r.exp1 = exp1;
                r.exp2 = exp2;
            }

            return(r);
        }
示例#4
0
        /// <summary>
        ///   Initializes a new instance of the <see cref = "RegExp" /> class from a string.
        /// </summary>
        /// <param name = "s">A string with the regular expression.</param>
        /// <param name = "syntaxFlags">Boolean 'or' of optional syntax constructs to be enabled.</param>
        internal RegExp(string s, RegExpSyntaxOptions syntaxFlags)
        {
            this.b     = RegExp.ReplaceShorthandCharacterClasses(s);
            this.flags = syntaxFlags;
            RegExp e;

            if (s.Length == 0)
            {
                e = RegExp.MakeString(string.Empty);
            }
            else
            {
                e = this.ParseUnionExp();
                if (this.pos < this.b.Length)
                {
                    throw new ArgumentException("end-of-string expected at position " + this.pos);
                }
            }

            this.kind   = e.kind;
            this.exp1   = e.exp1;
            this.exp2   = e.exp2;
            this.s      = e.s;
            this.c      = e.c;
            this.min    = e.min;
            this.max    = e.max;
            this.digits = e.digits;
            this.from   = e.from;
            this.to     = e.to;
            this.b      = null;
        }
示例#5
0
文件: RegExp.cs 项目: Licho1/Fare
        private static RegExp MakeString(RegExp exp1, RegExp exp2)
        {
            var sb = new StringBuilder();

            if (exp1.kind == Kind.RegexpString)
            {
                sb.Append(exp1.s);
            }
            else
            {
                sb.Append(exp1.c);
            }

            if (exp2.kind == Kind.RegexpString)
            {
                sb.Append(exp2.s);
            }
            else
            {
                sb.Append(exp2.c);
            }

            return(RegExp.MakeString(sb.ToString()));
        }
示例#6
0
        private static RegExp MakeString(RegExp exp1, RegExp exp2)
        {
            var sb = new StringBuilder();

            if (exp1.Kind == Kind.RegexpString)
            {
                sb.Append(exp1.SourceRegExpr);
            }
            else
            {
                sb.Append(exp1.Char);
            }

            if (exp2.Kind == Kind.RegexpString)
            {
                sb.Append(exp2.SourceRegExpr);
            }
            else
            {
                sb.Append(exp2.Char);
            }

            return(RegExp.MakeString(sb.ToString()));
        }
示例#7
0
文件: RegExp.cs 项目: Licho1/Fare
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar(this.anyCharAlphabet));
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin);
                    int    imax      = int.Parse(smax);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? digitChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsDigit).ToArray()))
                        : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? whitespaceChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray().Where(char.IsWhiteSpace).ToArray()))
                        : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);
                    return(inclusion
                        ? this.anyCharAlphabet == null
                            ? wordChars
                            : MakeAnyPrintableASCIIChar(new string((char[])this.anyCharAlphabet.ToCharArray()
                                                                   .Where(x => char.IsLetter(x) || char.IsDigit(x) || x == '_').ToArray()))
                        : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar(this.anyCharAlphabet)));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
示例#8
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(MakeAnyPrintableASCIIChar());
            }

            /* Issue 32, https://github.com/moodmosaic/Fare/issues/32
             *   The intent of the original code is a little unclear.  The comment for the Empty value in the
             *   enum is 'Enables empty language.'  Using '#' as token seems non-standard, and caused
             *   unhandled exception in some cases.  The best option at this point is to remove handling of the
             *   Empty option until a proper implementation is proposed.
             */
            // if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            // {
            //     return RegExp.MakeEmpty();
            // }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + pos);
                }

                return(RegExp.MakeString(b.Substring(start, ((pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + pos);
                }

                string str = b.Substring(start, ((pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    imax      = int.Parse(smax, System.Globalization.NumberFormatInfo.CurrentInfo);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (pos - 1));
                }
            }

            if (this.Match('\\'))
            {
                // Escaped '\' character.
                if (this.Match('\\'))
                {
                    return(MakeChar('\\'));
                }

                bool inclusion;

                // Digits.
                if ((inclusion = this.Match('d')) || this.Match('D'))
                {
                    RegExp digitChars = MakeCharRange('0', '9');
                    return(inclusion ? digitChars : ExcludeChars(digitChars, MakeAnyPrintableASCIIChar()));
                }

                // Whitespace chars only.
                if ((inclusion = this.Match('s')) || this.Match('S'))
                {
                    // Do not add line breaks, as usually RegExp is single line.
                    RegExp whitespaceChars = MakeUnion(MakeChar(' '), MakeChar('\t'));
                    return(inclusion ? whitespaceChars : ExcludeChars(whitespaceChars, MakeAnyPrintableASCIIChar()));
                }

                // Word character. Range is [A-Za-z0-9_]
                if ((inclusion = this.Match('w')) || this.Match('W'))
                {
                    var    ranges    = new[] { MakeCharRange('A', 'Z'), MakeCharRange('a', 'z'), MakeCharRange('0', '9') };
                    RegExp wordChars = ranges.Aggregate(MakeChar('_'), MakeUnion);

                    return(inclusion ? wordChars : ExcludeChars(wordChars, MakeAnyPrintableASCIIChar()));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }
示例#9
0
        private RegExp ParseSimpleExp()
        {
            if (this.Match('.'))
            {
                return(RegExp.MakeAnyChar());
            }

            if (this.Check(RegExpSyntaxOptions.Empty) && this.Match('#'))
            {
                return(RegExp.MakeEmpty());
            }

            if (this.Check(RegExpSyntaxOptions.Anystring) && this.Match('@'))
            {
                return(RegExp.MakeAnyString());
            }

            if (this.Match('"'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek("\""))
                {
                    this.Next();
                }

                if (!this.Match('"'))
                {
                    throw new ArgumentException("expected '\"' at position " + this.pos);
                }

                return(RegExp.MakeString(this.b.Substring(start, ((this.pos - 1) - start))));
            }

            if (this.Match('('))
            {
                if (this.Match('?'))
                {
                    this.SkipNonCapturingSubpatternExp();
                }

                if (this.Match(')'))
                {
                    return(RegExp.MakeString(string.Empty));
                }

                RegExp e = this.ParseUnionExp();
                if (!this.Match(')'))
                {
                    throw new ArgumentException("expected ')' at position " + this.pos);
                }

                return(e);
            }

            if ((this.Check(RegExpSyntaxOptions.Automaton) || this.Check(RegExpSyntaxOptions.Interval)) && this.Match('<'))
            {
                int start = this.pos;
                while (this.More() && !this.Peek(">"))
                {
                    this.Next();
                }

                if (!this.Match('>'))
                {
                    throw new ArgumentException("expected '>' at position " + this.pos);
                }

                string str = this.b.Substring(start, ((this.pos - 1) - start));
                int    i   = str.IndexOf('-');
                if (i == -1)
                {
                    if (!this.Check(RegExpSyntaxOptions.Automaton))
                    {
                        throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                    }

                    return(RegExp.MakeAutomaton(str));
                }

                if (!this.Check(RegExpSyntaxOptions.Interval))
                {
                    throw new ArgumentException("illegal identifier at position " + (this.pos - 1));
                }

                try
                {
                    if (i == 0 || i == str.Length - 1 || i != str.LastIndexOf('-'))
                    {
                        throw new FormatException();
                    }

                    string smin      = str.Substring(0, i - 0);
                    string smax      = str.Substring(i + 1, (str.Length - (i + 1)));
                    int    imin      = int.Parse(smin, CultureInfo.CurrentCulture);
                    int    imax      = int.Parse(smax, CultureInfo.CurrentCulture);
                    int    numdigits = smin.Length == smax.Length ? smin.Length : 0;
                    if (imin > imax)
                    {
                        int t = imin;
                        imin = imax;
                        imax = t;
                    }

                    return(RegExp.MakeInterval(imin, imax, numdigits));
                }
                catch (FormatException)
                {
                    throw new ArgumentException("interval syntax error at position " + (this.pos - 1));
                }
            }

            return(RegExp.MakeChar(this.ParseCharExp()));
        }