public static Token ProcessEndOfStatement(Cursor cursor, ref LexicalError err, ref uint skippedLines)
        {
            err          = LexicalError.None;
            skippedLines = 0;

            if (!Utils.IsNewLine(cursor.CurrChar()))
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }

            if (cursor.CurrChar() == '\r' && cursor.NextChar() == '\n')
            {
                cursor.Move(2);
                skippedLines++;
            }
            else if (cursor.CurrChar() == '\n')
            {
                cursor.Move();
                skippedLines++;
            }
            else
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }

            return(new Token(TokenType.EndOfStatement, null));
        }
Exemple #2
0
        public static void SkipEmptyLines(Cursor cursor, ref LexicalError err, ref uint skippedLines)
        {
            err          = LexicalError.None;
            skippedLines = 0;

            while (Utils.IsNewLine(cursor.CurrChar()) || Utils.IsSpaceOrTab(cursor.CurrChar()))
            {
                if (Utils.IsSpaceOrTab(cursor.CurrChar()))
                {
                    cursor.Move();
                }
                else if (cursor.CurrChar() == '\r' && cursor.NextChar() == '\n')
                {
                    cursor.Move(2);
                    skippedLines++;
                }
                else if (cursor.CurrChar() == '\n')
                {
                    cursor.Move();
                    skippedLines++;
                }
                else
                {
                    err = LexicalError.UnexpectedChar;
                    return;
                }
            }
        }
Exemple #3
0
        public (List <Token> tokens, List <LexicalError> errors) ParseLine(string codeLine, int lineNumber)
        {
            List <Token>        tokens  = new List <Token>();
            List <LexicalError> errors  = new List <LexicalError>();
            MatchCollection     matches = _regex.Matches(codeLine);

            string[] groupNames = _regex.GetGroupNames();

            foreach (Match match in matches)
            {
                GroupCollection groups = match.Groups;
                for (int i = 1; i < groupNames.Length; i++)
                {
                    if (groups[groupNames[i]].Success)
                    {
                        string trimmedValue = groups[groupNames[i]].Value.Trim(' ');
                        if (trimmedValue.Length == 0)
                        {
                            break;
                        }

                        TokenTypes type = GetTokenType(groupNames[i], trimmedValue);
                        // possible errors are collected, but tokens are still inserted into tokens list
                        if (type == TokenTypes.UNKNOWN)
                        {
                            LexicalError error = new LexicalError()
                            {
                                CodeLineNumber  = lineNumber,
                                Value           = groups[groupNames[i]].Value,
                                IndexInCodeLine = match.Index,
                                Length          = match.Length
                            };
                            error.CreateAndSetDescription(codeLine);
                            errors.Add(error);
                        }

                        tokens.Add(
                            new Token
                        {
                            Value          = groups[groupNames[i]].Value,
                            Group          = groupNames[i],
                            CodeLineNumber = lineNumber,
                            CodeLineIndex  = match.Index,
                            Length         = match.Length,
                            TokenType      = type
                        }
                            );
                    }
                }
            }
            return(tokens, errors);
        }
Exemple #4
0
        public static Token ProcessString(Cursor cursor, ref LexicalError err)
        {
            err = LexicalError.None;

            if (!Utils.IsQuote(cursor.CurrChar()))
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }
            StringBuilder buffer = new StringBuilder();

            cursor.Move(); // Skip first "

            while (!Utils.IsZeroChar(cursor.CurrChar()) && !Utils.IsQuote(cursor.CurrChar()))
            {
                if (cursor.CurrChar() == '\\')
                {
                    if (Utils.IsAllowedInEscapeSequence(cursor.NextChar()))
                    {
                        // Append current '\\'
                        buffer.Append(cursor.CurrChar());
                        cursor.Move();
                        // Append escaped char
                        buffer.Append(cursor.CurrChar());
                        cursor.Move();
                    }
                    else
                    {
                        err = LexicalError.UnexpectedChar;
                        return(null);
                    }
                }
                else
                {
                    buffer.Append(cursor.CurrChar());
                    cursor.Move();
                }
            }

            if (Utils.IsQuote(cursor.CurrChar()))
            {
                cursor.Move(); // Skip last "

                return(new Token(TokenType.StringLiteral, buffer.ToString()));
            }
            else
            {
                err = LexicalError.MissingClosingQuote;
                return(null);
            }
        }
Exemple #5
0
        public LexicalError Tokenize(TokenCollection collection, char character, int line, int column)
        {
            LexicalError error        = LexicalError.RuleNotMatch;
            int          ruleSetCount = 0;

            while (error == LexicalError.RuleNotMatch)
            {
                error = this.Ruleset[ruleSetCount](collection, character, line, column);
                ruleSetCount++;
                if (ruleSetCount >= this.Ruleset.Count)
                {
                    break;
                }
            }

            return(error);
        }
Exemple #6
0
        public void TestLexicalError()
        {
            ExpressionParser exprParser = new ExpressionParser();

            ParserBuilder <ExpressionToken, int> builder = new ParserBuilder <ExpressionToken, int>();
            Parser <ExpressionToken, int>        Parser  = builder.BuildParser(exprParser, ParserType.LL_RECURSIVE_DESCENT, "root").Result;
            ParseResult <ExpressionToken, int>   r       = Parser.Parse("2 @ 2");

            Assert.True(r.IsError);
            Assert.NotNull(r.Errors);
            Assert.True(r.Errors.Count > 0);
            Assert.IsType <LexicalError>(r.Errors[0]);
            LexicalError error = r.Errors[0] as LexicalError;

            Assert.Equal(1, error.Line);
            Assert.Equal(3, error.Column);
            Assert.Equal('@', error.UnexpectedChar);
        }
Exemple #7
0
        public static Token ProcessIdentifier(Cursor cursor, ref LexicalError err)
        {
            err = LexicalError.None;

            if (!Utils.IsLetter(cursor.CurrChar()))
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }

            StringBuilder buffer = new StringBuilder();

            while (Utils.IsLetter(cursor.CurrChar()) || Utils.IsDigit(cursor.CurrChar()))
            {
                buffer.Append(cursor.CurrChar());
                cursor.Move();
            }

            if (cursor.CurrChar() == '.')
            {
                // Append '.'
                buffer.Append(cursor.CurrChar());
                cursor.Move();

                while (Utils.IsLetter(cursor.CurrChar()) || Utils.IsDigit(cursor.CurrChar()))
                {
                    buffer.Append(cursor.CurrChar());
                    cursor.Move();
                }

                string lexem = buffer.ToString();
                if (!lexem.Equals("fmt.Print") && !lexem.Equals("fmt.Scan"))
                {
                    err = LexicalError.InvalidIdentifier;
                    return(null);
                }
            }

            return(new Token(TokenType.Identifier, buffer.ToString()));
        }
Exemple #8
0
        public List <Token> Parse(string filepath)
        {
            StreamReader input  = new StreamReader(filepath);
            Cursor       cursor = new Cursor(input);

            List <Token> tokens = new List <Token>();
            Token        token;

            State        state = State.ProceedToNextStatement;
            LexicalError err   = LexicalError.None;

            uint line         = 1;
            uint skippedLines = 0;

            while (true)
            {
                switch (state)
                {
                case State.ProceedToNextStatement:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    Parser.SkipEmptyLines(cursor, ref err, ref skippedLines);

                    line += skippedLines;

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        state = State.ProceedToNextToken;
                    }
                    break;

                case State.ProceedToNextToken:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    Parser.SkipSpaces(cursor);

                    if (Utils.IsLetter(cursor.CurrChar()))
                    {
                        state = State.Identifier;
                    }
                    else if (Utils.IsDigit(cursor.CurrChar()))
                    {
                        state = State.Number;
                    }
                    else if (Utils.IsQuote(cursor.CurrChar()))
                    {
                        state = State.String;
                    }
                    else if (Utils.IsSymbol(cursor.CurrChar()))
                    {
                        state = State.Symbol;
                    }
                    else if (Utils.IsNewLine(cursor.CurrChar()))
                    {
                        state = State.EndOfStatement;
                    }
                    else if (Utils.IsZeroChar(cursor.CurrChar()))
                    {
                        state = State.Final;
                    }
                    else
                    {
                        err   = LexicalError.UnexpectedChar;
                        state = State.Error;
                    }
                    break;

                case State.Identifier:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    token = Parser.ProcessIdentifier(cursor, ref err);

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        tokens.Add(token);
                        state = State.ProceedToNextToken;
                    }
                    break;

                case State.Number:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    token = Parser.ProcessNumber(cursor, ref err);

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        tokens.Add(token);
                        state = State.ProceedToNextToken;
                    }
                    break;

                case State.String:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    token = Parser.ProcessString(cursor, ref err);

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        tokens.Add(token);
                        state = State.ProceedToNextToken;
                    }
                    break;

                case State.Symbol:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    token = Parser.ProcessSymbol(cursor, ref err);

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        tokens.Add(token);
                        state = State.ProceedToNextToken;
                    }
                    break;

                case State.EndOfStatement:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif

                    token = Parser.ProcessEndOfStatement(cursor, ref err, ref skippedLines);

                    line += skippedLines;

                    if (err != LexicalError.None)
                    {
                        state = State.Error;
                    }
                    else
                    {
                        tokens.Add(token);
                        state = State.ProceedToNextStatement;
                    }
                    break;

                case State.Final:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
#endif
                    // maybe this will change
                    if (tokens[tokens.Count - 1].Type != TokenType.EndOfStatement)
                    {
                        tokens.Add(new Token(TokenType.EndOfStatement, ""));
                    }


                    Console.WriteLine("Lexical Parsing success!");

                    /*
                     * for (Token t in tokens)
                     * {
                     *  Console.WriteLine(t.ToString());
                     * }
                     */

                    input.Close();
                    return(tokens);

                //break;

                case State.Error:
                        #if DBG_SHIFTING_STATE
                    Console.WriteLine("dbg: Entering state " + state.ToString());
                        #endif
                    Console.WriteLine("Error: " + err.ToString() + " at line(" + line + ")");
                    input.Close();
                    return(null);
                    //break;
                }
            }
        }
Exemple #9
0
        public static Token ProcessNumber(Cursor cursor, ref LexicalError err)
        {
            err = LexicalError.None;

            if (!Utils.IsDigit(cursor.CurrChar()))
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }

            StringBuilder buffer = new StringBuilder();

            while (Utils.IsDigit(cursor.CurrChar()))
            {
                buffer.Append(cursor.CurrChar());
                cursor.Move();
            }

            if (cursor.CurrChar() != '.')
            {
                return(new Token(TokenType.IntLiteral, buffer.ToString()));
            }
            else
            {
                // Append '.'
                buffer.Append(cursor.CurrChar());
                cursor.Move();

                while (Utils.IsDigit(cursor.CurrChar()))
                {
                    buffer.Append(cursor.CurrChar());
                    cursor.Move();
                }

                if (cursor.CurrChar() == 'e' || cursor.CurrChar() == 'E')
                {
                    buffer.Append(cursor.CurrChar());
                    cursor.Move();
                    if (cursor.CurrChar() == '+' || cursor.CurrChar() == '-')
                    {
                        buffer.Append(cursor.CurrChar());
                        cursor.Move();
                    }
                    if (!Utils.IsDigit(cursor.CurrChar()))
                    {
                        err = LexicalError.InvalidFloatLiteral;
                        return(null);
                    }
                    else
                    {
                        while (Utils.IsDigit(cursor.CurrChar()))
                        {
                            buffer.Append(cursor.CurrChar());
                            cursor.Move();
                        }
                        return(new Token(TokenType.FloatLiteral, buffer.ToString()));
                    }
                }

                return(new Token(TokenType.FloatLiteral, buffer.ToString()));
            }
        }
Exemple #10
0
        public static Token ProcessSymbol(Cursor cursor, ref LexicalError err)
        {
            err = LexicalError.None;

            if (!Utils.IsSymbol(cursor.CurrChar()))
            {
                err = LexicalError.UnexpectedChar;
                return(null);
            }

            StringBuilder buffer = new StringBuilder();

            buffer.Append(cursor.CurrChar()); // currChar is Symbol, so append it, then check TokenType;
            TokenType tt = TokenType.Undefined;

            switch (cursor.CurrChar())
            {
            case '(': tt = TokenType.OpenningRoundBracket; break;

            case ')': tt = TokenType.ClosingRoundBracket; break;

            case '{': tt = TokenType.OpenningCurlyBracket; break;

            case '}': tt = TokenType.ClosingCurlyBracket; break;

            case ',': tt = TokenType.Comma; break;

            case ';': tt = TokenType.Semicolon; break;

            case '+': tt = TokenType.Addition; break;

            case '-': tt = TokenType.Addition; break;

            case '*': tt = TokenType.Multiplication; break;

            case '/': tt = TokenType.Multiplication; break;

            case '%': tt = TokenType.Multiplication; break;

            case '=':
                if (cursor.NextChar() == '=')
                {
                    tt = TokenType.Comparison;
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                else
                {
                    tt = TokenType.Assignment;
                }
                break;

            case ':':
                if (cursor.NextChar() == '=')
                {
                    tt = TokenType.ShortAssignment;
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                else
                {
                    tt = TokenType.Colon;
                }
                break;

            case '>':
                tt = TokenType.Comparison;
                if (cursor.NextChar() == '=')
                {
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                break;

            case '<':
                tt = TokenType.Comparison;
                if (cursor.NextChar() == '=')
                {
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                break;

            case '!':
                if (cursor.NextChar() == '=')
                {
                    tt = TokenType.Comparison;
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                else
                {
                    tt = TokenType.LogicalNegation;
                }
                break;

            case '|':
                if (cursor.NextChar() == '|')
                {
                    tt = TokenType.LogicalOr;
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                else
                {
                    err = LexicalError.UnexpectedChar;
                    return(null);
                }
                break;

            case '&':
                if (cursor.NextChar() == '&')
                {
                    tt = TokenType.LogicalAnd;
                    buffer.Append(cursor.NextChar());
                    cursor.Move();
                }
                else
                {
                    err = LexicalError.UnexpectedChar;
                    return(null);
                }
                break;
            }
            cursor.Move(); // so all the Appended chars are skipped

            return(new Token(tt, buffer.ToString()));
        }