Ejemplo n.º 1
0
        internal static RE.CharFAMatch Match(RE.ParseContext context)
        {
            context.EnsureStarted();
            int  line     = context.Line;
            int  column   = context.Column;
            long position = context.Position;
            int  l        = context.CaptureBuffer.Length;
            bool success  = false;

            for (                            //
                ; ((false == success) &&
                   (-1 != context.Current)); //
                )
            {
                // q0
                if ((((context.Current >= 'A') &&
                      (context.Current <= 'Z')) ||
                     ((context.Current >= 'a') &&
                      (context.Current <= 'z'))))
                {
                    context.CaptureCurrent();
                    context.Advance();
                    goto q1;
                }
                goto error;
q1:
                if ((((context.Current >= 'A') &&
                      (context.Current <= 'Z')) ||
                     ((context.Current >= 'a') &&
                      (context.Current <= 'z'))))
                {
                    context.CaptureCurrent();
                    context.Advance();
                    goto q1;
                }
                success = true;
                goto done;
error:
                success = false;
                context.Advance();
done:
                if ((false == success))
                {
                    line     = context.Line;
                    column   = context.Column;
                    position = context.Position;
                    l        = context.CaptureBuffer.Length;
                }
            }
            if (success)
            {
                return(new RE.CharFAMatch(line, column, position, context.GetCapture(l)));
            }
            return(null);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Lexes the next input from the parse context.
        /// </summary>
        /// <param name="context">The <see cref="ParseContext"/> to use.</param>
        /// <param name="errorSymbol">The symbol to report in the case of an error</param>
        /// <returns>The next symbol matched - <paramref name="context"/> contains the capture and line information</returns>
        public TAccept Lex(ParseContext context, TAccept errorSymbol = default(TAccept))
        {
            TAccept acc;
            // get the initial states
            var states = FillEpsilonClosure();

            // prepare the parse context
            context.EnsureStarted();
            while (true)
            {
                // if no more input
                if (-1 == context.Current)
                {
                    // if we accept, return that
                    if (TryGetAnyAcceptSymbol(states, out acc))
                    {
                        return(acc);
                    }
                    // otherwise return error
                    return(errorSymbol);
                }
                // move by current character
                var newStates = FillMove(states, (char)context.Current);
                // we couldn't match anything
                if (0 == newStates.Count)
                {
                    // if we accept, return that
                    if (TryGetAnyAcceptSymbol(states, out acc))
                    {
                        return(acc);
                    }
                    // otherwise error
                    // store the current character
                    context.CaptureCurrent();
                    // advance the input
                    context.Advance();
                    return(errorSymbol);
                }
                // store the current character
                context.CaptureCurrent();
                // advance the input
                context.Advance();
                // iterate to our next states
                states = newStates;
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Lexes the next input from the parse context.
        /// </summary>
        /// <param name="dfaTable">The DFA state table to use</param>
        /// <param name="context">The <see cref="ParseContext"/> to use.</param>
        /// <param name="errorSymbol">The symbol id to report in the case of an error</param>
        /// <returns>The next symbol id matched - <paramref name="context"/> contains the capture and line information</returns>
        public static int LexDfa(CharDfaEntry[] dfaTable, ParseContext context, int errorSymbol = -1)
        {
            // track our current state
            var state = 0;

            // prepare the parse context
            context.EnsureStarted();
            while (true)
            {
                // if no more input
                if (-1 == context.Current)
                {
                    var sid = dfaTable[state].AcceptSymbolId;
                    // if we accept, return that
                    if (-1 != sid)
                    {
                        return(sid);
                    }
                    // otherwise return error
                    return(errorSymbol);
                }
                // move by current character
                var newState = MoveDfa(dfaTable, state, (char)context.Current);
                // we couldn't match anything
                if (-1 == newState)
                {
                    // if we accept, return that
                    if (-1 != dfaTable[state].AcceptSymbolId)
                    {
                        return(dfaTable[state].AcceptSymbolId);
                    }
                    // otherwise error
                    // store the current character
                    context.CaptureCurrent();
                    // advance the input
                    context.Advance();
                    return(errorSymbol);
                }
                // store the current character
                context.CaptureCurrent();
                // advance the input
                context.Advance();
                // iterate to our next states
                state = newState;
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Lexes the next input from the parse context.
        /// </summary>
        /// <param name="context">The <see cref="ParseContext"/> to use.</param>
        /// <param name="errorSymbol">The symbol to report in the case of an error</param>
        /// <returns>The next symbol matched - <paramref name="context"/> contains the capture and line information</returns>
        /// <remarks>This method will not work properly on an NFA but will not error in that case, so take care to only use this with a DFA</remarks>
        public TAccept LexDfa(ParseContext context, TAccept errorSymbol = default(TAccept))
        {
            // track our current state
            var state = this;

            // prepare the parse context
            context.EnsureStarted();
            while (true)
            {
                // if no more input
                if (-1 == context.Current)
                {
                    // if we accept, return that
                    if (state.IsAccepting)
                    {
                        return(state.AcceptSymbol);
                    }
                    // otherwise return error
                    return(errorSymbol);
                }
                // move by current character
                var newState = state.MoveDfa((char)context.Current);
                // we couldn't match anything
                if (null == newState)
                {
                    // if we accept, return that
                    if (state.IsAccepting)
                    {
                        return(state.AcceptSymbol);
                    }
                    // otherwise error
                    // store the current character
                    context.CaptureCurrent();
                    // advance the input
                    context.Advance();
                    return(errorSymbol);
                }
                // store the current character
                context.CaptureCurrent();
                // advance the input
                context.Advance();
                // iterate to our next states
                state = newState;
            }
        }
Ejemplo n.º 5
0
 static void _ParseCharClassEscape(ParseContext pc, string cls, List <RegexCharsetEntry> result, ref RegexCharsetEntry next, ref bool readDash)
 {
     if (null != next)
     {
         result.Add(next);
         if (readDash)
         {
             result.Add(new RegexCharsetCharEntry('-'));
         }
         result.Add(new RegexCharsetCharEntry('-'));
     }
     pc.Advance();
     result.Add(new RegexCharsetClassEntry(cls));
     next     = null;
     readDash = false;
 }
Ejemplo n.º 6
0
        internal static int Lex(RE.ParseContext context)
        {
            context.EnsureStarted();
            // q0
            if (((context.Current >= '0') &&
                 (context.Current <= '9')))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q1;
            }
            if ((((context.Current >= 'A') &&
                  (context.Current <= 'Z')) ||
                 ((context.Current >= 'a') &&
                  (context.Current <= 'z'))))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q2;
            }
            if (((((context.Current == '\t') ||
                   ((context.Current >= '\n') &&
                    (context.Current <= ''))) ||
                  (context.Current == '\r')) ||
                 (context.Current == ' ')))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q3;
            }
            goto error;
q1:
            if (((context.Current >= '0') &&
                 (context.Current <= '9')))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q1;
            }
            return(0);

q2:
            if ((((context.Current >= 'A') &&
                  (context.Current <= 'Z')) ||
                 ((context.Current >= 'a') &&
                  (context.Current <= 'z'))))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q2;
            }
            return(1);

q3:
            if (((((context.Current == '\t') ||
                   ((context.Current >= '\n') &&
                    (context.Current <= ''))) ||
                  (context.Current == '\r')) ||
                 (context.Current == ' ')))
            {
                context.CaptureCurrent();
                context.Advance();
                goto q3;
            }
            return(2);

error:
            context.CaptureCurrent();
            context.Advance();
            return(3);
        }
Ejemplo n.º 7
0
        static int _ParseRangeEscapePart(ParseContext pc)
        {
            if (-1 == pc.Current)
            {
                return(-1);
            }
            switch (pc.Current)
            {
            case 'f':
                pc.Advance();
                return('\f');

            case 'v':
                pc.Advance();
                return('\v');

            case 't':
                pc.Advance();
                return('\t');

            case 'n':
                pc.Advance();
                return('\n');

            case 'r':
                pc.Advance();
                return('\r');

            case 'x':
                if (-1 == pc.Advance() || !_IsHexChar((char)pc.Current))
                {
                    return('x');
                }
                byte b = _FromHexChar((char)pc.Current);
                if (-1 == pc.Advance() || !_IsHexChar((char)pc.Current))
                {
                    return(unchecked ((char)b));
                }
                b <<= 4;
                b  |= _FromHexChar((char)pc.Current);
                if (-1 == pc.Advance() || !_IsHexChar((char)pc.Current))
                {
                    return(unchecked ((char)b));
                }
                b <<= 4;
                b  |= _FromHexChar((char)pc.Current);
                if (-1 == pc.Advance() || !_IsHexChar((char)pc.Current))
                {
                    return(unchecked ((char)b));
                }
                b <<= 4;
                b  |= _FromHexChar((char)pc.Current);
                return(unchecked ((char)b));

            case 'u':
                if (-1 == pc.Advance())
                {
                    return('u');
                }
                ushort u = _FromHexChar((char)pc.Current);
                u <<= 4;
                if (-1 == pc.Advance())
                {
                    return(unchecked ((char)u));
                }
                u  |= _FromHexChar((char)pc.Current);
                u <<= 4;
                if (-1 == pc.Advance())
                {
                    return(unchecked ((char)u));
                }
                u  |= _FromHexChar((char)pc.Current);
                u <<= 4;
                if (-1 == pc.Advance())
                {
                    return(unchecked ((char)u));
                }
                u |= _FromHexChar((char)pc.Current);
                return(unchecked ((char)u));

            default:
                int i = pc.Current;
                pc.Advance();
                return((char)i);
            }
        }
Ejemplo n.º 8
0
        static RegexExpression _ParseModifier(RegexExpression expr, ParseContext pc)
        {
            var line     = pc.Line;
            var column   = pc.Column;
            var position = pc.Position;

            switch (pc.Current)
            {
            case '*':
                expr = new RegexRepeatExpression(expr);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '+':
                expr = new RegexRepeatExpression(expr, 1);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '?':
                expr = new RegexOptionalExpression(expr);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '{':
                pc.Advance();
                pc.TrySkipWhiteSpace();
                pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}');
                var min = -1;
                var max = -1;
                if (',' != pc.Current && '}' != pc.Current)
                {
                    var l = pc.CaptureBuffer.Length;
                    pc.TryReadDigits();
                    min = int.Parse(pc.GetCapture(l));
                    pc.TrySkipWhiteSpace();
                }
                if (',' == pc.Current)
                {
                    pc.Advance();
                    pc.TrySkipWhiteSpace();
                    pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}');
                    if ('}' != pc.Current)
                    {
                        var l = pc.CaptureBuffer.Length;
                        pc.TryReadDigits();
                        max = int.Parse(pc.GetCapture(l));
                        pc.TrySkipWhiteSpace();
                    }
                }
                else
                {
                    max = min;
                }
                pc.Expecting('}');
                pc.Advance();
                expr = new RegexRepeatExpression(expr, min, max);
                expr.SetLocation(line, column, position);
                break;
            }
            return(expr);
        }
Ejemplo n.º 9
0
        static IList <RegexCharsetEntry> _ParseRanges(ParseContext pc)
        {
            pc.EnsureStarted();
            var result             = new List <RegexCharsetEntry>();
            RegexCharsetEntry next = null;
            bool readDash          = false;

            while (-1 != pc.Current && ']' != pc.Current)
            {
                switch (pc.Current)
                {
                case '[':                         // char class
                    if (null != next)
                    {
                        result.Add(next);
                        if (readDash)
                        {
                            result.Add(new RegexCharsetCharEntry('-'));
                        }
                        result.Add(new RegexCharsetCharEntry('-'));
                    }
                    pc.Advance();
                    pc.Expecting(':');
                    pc.Advance();
                    var l = pc.CaptureBuffer.Length;
                    pc.TryReadUntil(':', false);
                    var n = pc.GetCapture(l);
                    pc.Advance();
                    pc.Expecting(']');
                    pc.Advance();
                    result.Add(new RegexCharsetClassEntry(n));
                    readDash = false;
                    next     = null;
                    break;

                case '\\':
                    pc.Advance();
                    pc.Expecting();
                    switch (pc.Current)
                    {
                    case 'h':
                        _ParseCharClassEscape(pc, "space", result, ref next, ref readDash);
                        break;

                    case 'd':
                        _ParseCharClassEscape(pc, "digit", result, ref next, ref readDash);
                        break;

                    case 'D':
                        _ParseCharClassEscape(pc, "^digit", result, ref next, ref readDash);
                        break;

                    case 'l':
                        _ParseCharClassEscape(pc, "lower", result, ref next, ref readDash);
                        break;

                    case 's':
                        _ParseCharClassEscape(pc, "space", result, ref next, ref readDash);
                        break;

                    case 'S':
                        _ParseCharClassEscape(pc, "^space", result, ref next, ref readDash);
                        break;

                    case 'u':
                        _ParseCharClassEscape(pc, "upper", result, ref next, ref readDash);
                        break;

                    case 'w':
                        _ParseCharClassEscape(pc, "word", result, ref next, ref readDash);
                        break;

                    case 'W':
                        _ParseCharClassEscape(pc, "^word", result, ref next, ref readDash);
                        break;

                    default:
                        var ch = (char)_ParseRangeEscapePart(pc);
                        if (null == next)
                        {
                            next = new RegexCharsetCharEntry(ch);
                        }
                        else if (readDash)
                        {
                            result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, ch));
                            next     = null;
                            readDash = false;
                        }
                        else
                        {
                            result.Add(next);
                            next = new RegexCharsetCharEntry(ch);
                        }

                        break;
                    }

                    break;

                case '-':
                    pc.Advance();
                    if (null == next)
                    {
                        next     = new RegexCharsetCharEntry('-');
                        readDash = false;
                    }
                    else
                    {
                        if (readDash)
                        {
                            result.Add(next);
                        }
                        readDash = true;
                    }
                    break;

                default:
                    if (null == next)
                    {
                        next = new RegexCharsetCharEntry((char)pc.Current);
                    }
                    else
                    {
                        if (readDash)
                        {
                            result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, (char)pc.Current));
                            next     = null;
                            readDash = false;
                        }
                        else
                        {
                            result.Add(next);
                            next = new RegexCharsetCharEntry((char)pc.Current);
                        }
                    }
                    pc.Advance();
                    break;
                }
            }
            if (null != next)
            {
                result.Add(next);
                if (readDash)
                {
                    next = new RegexCharsetCharEntry('-');
                    result.Add(next);
                }
            }
            return(result);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Parses a regular expression from the specified <see cref="ParseContext"/>
        /// </summary>
        /// <param name="pc">The parse context to use</param>
        /// <returns>A new abstract syntax tree representing the expression</returns>
        public static RegexExpression Parse(ParseContext pc)
        {
            RegexExpression result = null, next = null;
            int             ich;

            pc.EnsureStarted();
            var line = pc.Line;
            var column   = pc.Column;
            var position = pc.Position;

            while (true)
            {
                switch (pc.Current)
                {
                case -1:
                    return(result);

                case '.':
                    var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false);
                    nset.SetLocation(line, column, position);
                    if (null == result)
                    {
                        result = nset;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, nset);
                        result.SetLocation(line, column, position);
                    }
                    pc.Advance();
                    result   = _ParseModifier(result, pc);
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '\\':

                    pc.Advance();
                    pc.Expecting();
                    switch (pc.Current)
                    {
                    case 'd':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") });
                        pc.Advance();
                        break;

                    case 'D':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") }, true);
                        pc.Advance();
                        break;

                    case 'h':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("blank") });
                        pc.Advance();
                        break;

                    case 'l':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("lower") });
                        pc.Advance();
                        break;

                    case 's':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") });
                        pc.Advance();
                        break;

                    case 'S':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") }, true);
                        pc.Advance();
                        break;

                    case 'u':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("upper") });
                        pc.Advance();
                        break;

                    case 'w':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") });
                        pc.Advance();
                        break;

                    case 'W':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") }, true);
                        pc.Advance();
                        break;

                    default:
                        if (-1 != (ich = _ParseEscapePart(pc)))
                        {
                            next = new RegexLiteralExpression((char)ich);
                        }
                        else
                        {
                            pc.Expecting();                                          // throw an error
                            return(null);                                            // doesn't execute
                        }
                        break;
                    }
                    next.SetLocation(line, column, position);
                    next = _ParseModifier(next, pc);
                    if (null != result)
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    else
                    {
                        result = next;
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case ')':
                    return(result);

                case '(':
                    pc.Advance();
                    pc.Expecting();
                    next = Parse(pc);
                    pc.Expecting(')');
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '|':
                    if (-1 != pc.Advance())
                    {
                        next   = Parse(pc);
                        result = new RegexOrExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    else
                    {
                        result = new RegexOrExpression(result, null);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '[':
                    pc.ClearCapture();
                    pc.Advance();
                    pc.Expecting();
                    bool not = false;


                    if ('^' == pc.Current)
                    {
                        not = true;
                        pc.Advance();
                        pc.Expecting();
                    }
                    var ranges = _ParseRanges(pc);
                    if (ranges.Count == 0)
                    {
                        System.Diagnostics.Debugger.Break();
                    }
                    pc.Expecting(']');
                    pc.Advance();
                    next = new RegexCharsetExpression(ranges, not);
                    next.SetLocation(line, column, position);
                    next = _ParseModifier(next, pc);

                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(pc.Line, pc.Column, pc.Position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                default:
                    ich  = pc.Current;
                    next = new RegexLiteralExpression((char)ich);
                    next.SetLocation(line, column, position);
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;
                }
            }
        }