Ejemplo n.º 1
0
        /// <summary>
        /// Pattern matches through a string of text
        /// </summary>
        /// <param name="context">The parse context to search</param>
        /// <returns>A <see cref="CharFAMatch"/> that contains the match information, or null if the match is not found.</returns>
        public CharFAMatch Match(ParseContext context)
        {
            context.EnsureStarted();
            var line     = context.Line;
            var column   = context.Column;
            var position = context.Position;
            var l        = context.CaptureBuffer.Length;
            var success  = false;

            // keep going until we find something or reach the end
            while (-1 != context.Current && !(success = _DoMatch(context)))
            {
                line     = context.Line;
                column   = context.Column;
                position = context.Position;
                l        = context.CaptureBuffer.Length;
            }
            if (success)
            {
                return(new CharFAMatch(
                           line,
                           column,
                           position,
                           context.GetCapture(l)));
            }
            return(null);
        }
Ejemplo n.º 2
0
        static Cfg _Parse(ParseContext pc)
        {
            var result = new Cfg();

            pc.EnsureStarted();
            while (-1 != pc.Current)
            {
                result.Rules.Add(_ParseRule(pc));
            }

            return(result);
        }
Ejemplo n.º 3
0
 static bool _SkipWhiteSpace(ParseContext pc)
 {
     pc.EnsureStarted();
     if (-1 == pc.Current || '\n' == pc.Current || !char.IsWhiteSpace((char)pc.Current))
     {
         return(false);
     }
     while (-1 != pc.Advance() && '\n' != pc.Current && char.IsWhiteSpace((char)pc.Current))
     {
         ;
     }
     return(true);
 }
Ejemplo n.º 4
0
        static bool _DoMatchDfa(CharDfaEntry[] dfaTable, ParseContext context)
        {
            // track our current state
            var state = 0;

            // prepare the parse context
            context.EnsureStarted();
            while (true)
            {
                // if no more input
                if (-1 == context.Current)
                {
                    // if we accept, return that
                    return(-1 != dfaTable[state].AcceptSymbolId);
                }

                // move by current character
                var newState = MoveDfa(dfaTable, state, (char)context.Current);
                // we couldn't match anything
                if (-1 == newState)
                {
                    // if we accept, return that
                    if (-1 != dfaTable[state].AcceptSymbolId)
                    {
                        return(true);
                    }
                    // otherwise error
                    // advance the input
                    context.Advance();
                    return(false);
                }
                // store the current character
                context.CaptureCurrent();
                // advance the input
                context.Advance();
                // iterate to our next states
                state = newState;
            }
        }
Ejemplo n.º 5
0
        static CharFA <TAccept> _Parse(ParseContext pc, TAccept accept)
        {
            CharFA <TAccept> result = new CharFA <TAccept>(true, accept);

            CharFA <TAccept> f, next;
            int ch;

            pc.EnsureStarted();
            var current = result;

            while (true)
            {
                switch (pc.Current)
                {
                case -1:
                    return(result);

                case '.':
                    pc.Advance();
                    f = current.FirstAcceptingState as CharFA <TAccept>;

                    current = Set(new CharRange[] { new CharRange(char.MinValue, char.MaxValue) }, accept);
                    switch (pc.Current)
                    {
                    case '*':
                        current = Kleene(current, accept);
                        pc.Advance();
                        break;

                    case '+':
                        current = Repeat(current, accept);
                        pc.Advance();
                        break;

                    case '?':
                        current = Optional(current, accept);
                        pc.Advance();
                        break;
                    }
                    f.IsAccepting = false;
                    f.EpsilonTransitions.Add(current);
                    break;

                case '\\':
                    if (-1 != (ch = _ParseEscape(pc)))
                    {
                        next = null;
                        switch (pc.Current)
                        {
                        case '*':
                            next = new CharFA <TAccept>();
                            next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                            next = Kleene(next, accept);
                            pc.Advance();
                            break;

                        case '+':
                            next = new CharFA <TAccept>();
                            next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                            next = Repeat(next, accept);
                            pc.Advance();
                            break;

                        case '?':
                            next = new CharFA <TAccept>();
                            next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                            next = Optional(next, accept);
                            pc.Advance();
                            break;

                        default:
                            current             = current.FirstAcceptingState as CharFA <TAccept>;
                            current.IsAccepting = false;
                            current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                            break;
                        }
                        if (null != next)
                        {
                            current             = current.FirstAcceptingState as CharFA <TAccept>;
                            current.IsAccepting = false;
                            current.EpsilonTransitions.Add(next);
                            current = next;
                        }
                    }
                    else
                    {
                        pc.Expecting();                              // throw an error
                        return(null);                                // doesn't execute
                    }
                    break;

                case ')':
                    return(result);

                case '(':
                    pc.Advance();
                    pc.Expecting();
                    f       = current.FirstAcceptingState as CharFA <TAccept>;
                    current = _Parse(pc, accept);
                    pc.Expecting(')');
                    pc.Advance();
                    switch (pc.Current)
                    {
                    case '*':
                        current = Kleene(current, accept);
                        pc.Advance();
                        break;

                    case '+':
                        current = Repeat(current, accept);
                        pc.Advance();
                        break;

                    case '?':
                        current = Optional(current, accept);
                        pc.Advance();
                        break;
                    }
                    var ff = f.FirstAcceptingState;
                    ff.EpsilonTransitions.Add(current);
                    ff.IsAccepting = false;
                    break;

                case '|':
                    if (-1 != pc.Advance())
                    {
                        current = _Parse(pc, accept);
                        result  = Or(new CharFA <TAccept>[] { result as CharFA <TAccept>, current as CharFA <TAccept> }, accept);
                    }
                    else
                    {
                        current = current.FirstAcceptingState as CharFA <TAccept>;
                        result  = Optional(result, accept);
                    }
                    break;

                case '[':
                    pc.ClearCapture();
                    pc.Advance();
                    pc.Expecting();
                    bool not = false;
                    if ('^' == pc.Current)
                    {
                        not = true;
                        pc.Advance();
                        pc.Expecting();
                    }
                    pc.TryReadUntil(']', '\\', false);
                    pc.Expecting(']');
                    pc.Advance();

                    var r = (!not && "." == pc.Capture) ?
                            new CharRange[] { new CharRange(char.MinValue, char.MaxValue) } :
                    _ParseRanges(pc.Capture, true);
                    if (not)
                    {
                        r = CharRange.NotRanges(r);
                    }
                    f       = current.FirstAcceptingState as CharFA <TAccept>;
                    current = Set(r, accept);
                    switch (pc.Current)
                    {
                    case '*':
                        current = Kleene(current, accept);
                        pc.Advance();
                        break;

                    case '+':
                        current = Repeat(current, accept);
                        pc.Advance();
                        break;

                    case '?':
                        current = Optional(current, accept);
                        pc.Advance();
                        break;
                    }
                    f.IsAccepting = false;
                    f.EpsilonTransitions.Add(current);
                    break;

                default:
                    ch = pc.Current;
                    pc.Advance();
                    next = null;
                    switch (pc.Current)
                    {
                    case '*':
                        next = new CharFA <TAccept>();
                        next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                        next = Kleene(next, accept);
                        pc.Advance();
                        break;

                    case '+':
                        next = new CharFA <TAccept>();
                        next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                        next = Repeat(next, accept);
                        pc.Advance();
                        break;

                    case '?':
                        next = new CharFA <TAccept>();

                        next.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                        next = Optional(next, accept);
                        pc.Advance();
                        break;

                    default:
                        current             = current.FirstAcceptingState as CharFA <TAccept>;
                        current.IsAccepting = false;
                        current.Transitions.Add((char)ch, new CharFA <TAccept>(true, accept));
                        break;
                    }
                    if (null != next)
                    {
                        current             = current.FirstAcceptingState as CharFA <TAccept>;
                        current.IsAccepting = false;
                        current.EpsilonTransitions.Add(next);
                        current = next;
                    }
                    break;
                }
            }
        }
Ejemplo n.º 6
0
        internal static RegexExpression Parse(ParseContext pc)
        {
            RegexExpression result = null, next = null;
            int             ich;

            pc.EnsureStarted();
            var line = pc.Line;
            var column   = pc.Column;
            var position = pc.Position;

            while (true)
            {
                switch (pc.Current)
                {
                case -1:
                    return(result);

                case '.':
                    var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false);
                    nset.SetLocation(line, column, position);
                    if (null == result)
                    {
                        result = nset;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, nset);
                        result.SetLocation(line, column, position);
                    }
                    pc.Advance();
                    result   = _ParseModifier(result, pc);
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '\\':
                    if (-1 != (ich = _ParseEscape(pc)))
                    {
                        next = new RegexLiteralExpression((char)ich);
                        next.SetLocation(line, column, position);
                        next = _ParseModifier(next, pc);
                        if (null != result)
                        {
                            result = new RegexConcatExpression(result, next);
                            result.SetLocation(line, column, position);
                        }
                        else
                        {
                            result = next;
                        }
                    }
                    else
                    {
                        pc.Expecting();                              // throw an error
                        return(null);                                // doesn't execute
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case ')':
                    return(result);

                case '(':
                    pc.Advance();
                    pc.Expecting();
                    next = Parse(pc);
                    pc.Expecting(')');
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '|':
                    if (-1 != pc.Advance())
                    {
                        next   = Parse(pc);
                        result = new RegexOrExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    else
                    {
                        result = new RegexOrExpression(result, null);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '[':
                    pc.ClearCapture();
                    pc.Advance();
                    pc.Expecting();
                    bool not = false;


                    if ('^' == pc.Current)
                    {
                        not = true;
                        pc.Advance();
                        pc.Expecting();
                    }
                    var ranges = _ParseRanges(pc);
                    if (ranges.Count == 0)
                    {
                        System.Diagnostics.Debugger.Break();
                    }
                    pc.Expecting(']');
                    pc.Advance();
                    next = new RegexCharsetExpression(ranges, not);
                    next.SetLocation(line, column, position);
                    next = _ParseModifier(next, pc);

                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(pc.Line, pc.Column, pc.Position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                default:
                    ich  = pc.Current;
                    next = new RegexLiteralExpression((char)ich);
                    next.SetLocation(line, column, position);
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;
                }
            }
        }
Ejemplo n.º 7
0
        static IList <RegexCharsetEntry> _ParseRanges(ParseContext pc)
        {
            pc.EnsureStarted();
            var result             = new List <RegexCharsetEntry>();
            RegexCharsetEntry next = null;
            bool readDash          = false;

            while (-1 != pc.Current && ']' != pc.Current)
            {
                switch (pc.Current)
                {
                case '[':                         // char class
                    if (null != next)
                    {
                        result.Add(next);
                        if (readDash)
                        {
                            result.Add(new RegexCharsetCharEntry('-'));
                        }
                        result.Add(new RegexCharsetCharEntry('-'));
                    }
                    pc.Advance();
                    pc.Expecting(':');
                    pc.Advance();
                    var l = pc.CaptureBuffer.Length;
                    pc.TryReadUntil(':', false);
                    var n = pc.GetCapture(l);
                    pc.Advance();
                    pc.Expecting(']');
                    pc.Advance();
                    result.Add(new RegexCharsetClassEntry(n));
                    readDash = false;
                    next     = null;
                    break;

                case '\\':
                    //pc.Advance();
                    //pc.Expecting();
                    var ch = (char)_ParseEscape(pc);
                    if (null == next)
                    {
                        next = new RegexCharsetCharEntry(ch);
                    }
                    else
                    {
                        if (readDash)
                        {
                            result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, ch));
                            next     = null;
                            readDash = false;
                        }
                        else
                        {
                            result.Add(next);
                            next = new RegexCharsetCharEntry(ch);
                        }
                    }
                    break;

                case '-':
                    pc.Advance();
                    if (null == next)
                    {
                        next     = new RegexCharsetCharEntry('-');
                        readDash = false;
                    }
                    else
                    {
                        if (readDash)
                        {
                            result.Add(next);
                        }
                        readDash = true;
                    }
                    break;

                default:
                    if (null == next)
                    {
                        next = new RegexCharsetCharEntry((char)pc.Current);
                    }
                    else
                    {
                        if (readDash)
                        {
                            result.Add(new RegexCharsetRangeEntry(((RegexCharsetCharEntry)next).Value, (char)pc.Current));
                            next     = null;
                            readDash = false;
                        }
                        else
                        {
                            result.Add(next);
                            next = new RegexCharsetCharEntry((char)pc.Current);
                        }
                    }
                    pc.Advance();
                    break;
                }
            }
            if (null != next)
            {
                result.Add(next);
                if (readDash)
                {
                    next = new RegexCharsetCharEntry('-');
                    result.Add(next);
                }
            }
            return(result);
        }