public static Token ProcessEndOfStatement(Cursor cursor, ref LexicalError err, ref uint skippedLines) { err = LexicalError.None; skippedLines = 0; if (!Utils.IsNewLine(cursor.CurrChar())) { err = LexicalError.UnexpectedChar; return(null); } if (cursor.CurrChar() == '\r' && cursor.NextChar() == '\n') { cursor.Move(2); skippedLines++; } else if (cursor.CurrChar() == '\n') { cursor.Move(); skippedLines++; } else { err = LexicalError.UnexpectedChar; return(null); } return(new Token(TokenType.EndOfStatement, null)); }
public static void SkipEmptyLines(Cursor cursor, ref LexicalError err, ref uint skippedLines) { err = LexicalError.None; skippedLines = 0; while (Utils.IsNewLine(cursor.CurrChar()) || Utils.IsSpaceOrTab(cursor.CurrChar())) { if (Utils.IsSpaceOrTab(cursor.CurrChar())) { cursor.Move(); } else if (cursor.CurrChar() == '\r' && cursor.NextChar() == '\n') { cursor.Move(2); skippedLines++; } else if (cursor.CurrChar() == '\n') { cursor.Move(); skippedLines++; } else { err = LexicalError.UnexpectedChar; return; } } }
public (List <Token> tokens, List <LexicalError> errors) ParseLine(string codeLine, int lineNumber) { List <Token> tokens = new List <Token>(); List <LexicalError> errors = new List <LexicalError>(); MatchCollection matches = _regex.Matches(codeLine); string[] groupNames = _regex.GetGroupNames(); foreach (Match match in matches) { GroupCollection groups = match.Groups; for (int i = 1; i < groupNames.Length; i++) { if (groups[groupNames[i]].Success) { string trimmedValue = groups[groupNames[i]].Value.Trim(' '); if (trimmedValue.Length == 0) { break; } TokenTypes type = GetTokenType(groupNames[i], trimmedValue); // possible errors are collected, but tokens are still inserted into tokens list if (type == TokenTypes.UNKNOWN) { LexicalError error = new LexicalError() { CodeLineNumber = lineNumber, Value = groups[groupNames[i]].Value, IndexInCodeLine = match.Index, Length = match.Length }; error.CreateAndSetDescription(codeLine); errors.Add(error); } tokens.Add( new Token { Value = groups[groupNames[i]].Value, Group = groupNames[i], CodeLineNumber = lineNumber, CodeLineIndex = match.Index, Length = match.Length, TokenType = type } ); } } } return(tokens, errors); }
public static Token ProcessString(Cursor cursor, ref LexicalError err) { err = LexicalError.None; if (!Utils.IsQuote(cursor.CurrChar())) { err = LexicalError.UnexpectedChar; return(null); } StringBuilder buffer = new StringBuilder(); cursor.Move(); // Skip first " while (!Utils.IsZeroChar(cursor.CurrChar()) && !Utils.IsQuote(cursor.CurrChar())) { if (cursor.CurrChar() == '\\') { if (Utils.IsAllowedInEscapeSequence(cursor.NextChar())) { // Append current '\\' buffer.Append(cursor.CurrChar()); cursor.Move(); // Append escaped char buffer.Append(cursor.CurrChar()); cursor.Move(); } else { err = LexicalError.UnexpectedChar; return(null); } } else { buffer.Append(cursor.CurrChar()); cursor.Move(); } } if (Utils.IsQuote(cursor.CurrChar())) { cursor.Move(); // Skip last " return(new Token(TokenType.StringLiteral, buffer.ToString())); } else { err = LexicalError.MissingClosingQuote; return(null); } }
public LexicalError Tokenize(TokenCollection collection, char character, int line, int column) { LexicalError error = LexicalError.RuleNotMatch; int ruleSetCount = 0; while (error == LexicalError.RuleNotMatch) { error = this.Ruleset[ruleSetCount](collection, character, line, column); ruleSetCount++; if (ruleSetCount >= this.Ruleset.Count) { break; } } return(error); }
public void TestLexicalError() { ExpressionParser exprParser = new ExpressionParser(); ParserBuilder <ExpressionToken, int> builder = new ParserBuilder <ExpressionToken, int>(); Parser <ExpressionToken, int> Parser = builder.BuildParser(exprParser, ParserType.LL_RECURSIVE_DESCENT, "root").Result; ParseResult <ExpressionToken, int> r = Parser.Parse("2 @ 2"); Assert.True(r.IsError); Assert.NotNull(r.Errors); Assert.True(r.Errors.Count > 0); Assert.IsType <LexicalError>(r.Errors[0]); LexicalError error = r.Errors[0] as LexicalError; Assert.Equal(1, error.Line); Assert.Equal(3, error.Column); Assert.Equal('@', error.UnexpectedChar); }
public static Token ProcessIdentifier(Cursor cursor, ref LexicalError err) { err = LexicalError.None; if (!Utils.IsLetter(cursor.CurrChar())) { err = LexicalError.UnexpectedChar; return(null); } StringBuilder buffer = new StringBuilder(); while (Utils.IsLetter(cursor.CurrChar()) || Utils.IsDigit(cursor.CurrChar())) { buffer.Append(cursor.CurrChar()); cursor.Move(); } if (cursor.CurrChar() == '.') { // Append '.' buffer.Append(cursor.CurrChar()); cursor.Move(); while (Utils.IsLetter(cursor.CurrChar()) || Utils.IsDigit(cursor.CurrChar())) { buffer.Append(cursor.CurrChar()); cursor.Move(); } string lexem = buffer.ToString(); if (!lexem.Equals("fmt.Print") && !lexem.Equals("fmt.Scan")) { err = LexicalError.InvalidIdentifier; return(null); } } return(new Token(TokenType.Identifier, buffer.ToString())); }
public List <Token> Parse(string filepath) { StreamReader input = new StreamReader(filepath); Cursor cursor = new Cursor(input); List <Token> tokens = new List <Token>(); Token token; State state = State.ProceedToNextStatement; LexicalError err = LexicalError.None; uint line = 1; uint skippedLines = 0; while (true) { switch (state) { case State.ProceedToNextStatement: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif Parser.SkipEmptyLines(cursor, ref err, ref skippedLines); line += skippedLines; if (err != LexicalError.None) { state = State.Error; } else { state = State.ProceedToNextToken; } break; case State.ProceedToNextToken: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif Parser.SkipSpaces(cursor); if (Utils.IsLetter(cursor.CurrChar())) { state = State.Identifier; } else if (Utils.IsDigit(cursor.CurrChar())) { state = State.Number; } else if (Utils.IsQuote(cursor.CurrChar())) { state = State.String; } else if (Utils.IsSymbol(cursor.CurrChar())) { state = State.Symbol; } else if (Utils.IsNewLine(cursor.CurrChar())) { state = State.EndOfStatement; } else if (Utils.IsZeroChar(cursor.CurrChar())) { state = State.Final; } else { err = LexicalError.UnexpectedChar; state = State.Error; } break; case State.Identifier: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif token = Parser.ProcessIdentifier(cursor, ref err); if (err != LexicalError.None) { state = State.Error; } else { tokens.Add(token); state = State.ProceedToNextToken; } break; case State.Number: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif token = Parser.ProcessNumber(cursor, ref err); if (err != LexicalError.None) { state = State.Error; } else { tokens.Add(token); state = State.ProceedToNextToken; } break; case State.String: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif token = Parser.ProcessString(cursor, ref err); if (err != LexicalError.None) { state = State.Error; } else { tokens.Add(token); state = State.ProceedToNextToken; } break; case State.Symbol: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif token = Parser.ProcessSymbol(cursor, ref err); if (err != LexicalError.None) { state = State.Error; } else { tokens.Add(token); state = State.ProceedToNextToken; } break; case State.EndOfStatement: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif token = Parser.ProcessEndOfStatement(cursor, ref err, ref skippedLines); line += skippedLines; if (err != LexicalError.None) { state = State.Error; } else { tokens.Add(token); state = State.ProceedToNextStatement; } break; case State.Final: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif // maybe this will change if (tokens[tokens.Count - 1].Type != TokenType.EndOfStatement) { tokens.Add(new Token(TokenType.EndOfStatement, "")); } Console.WriteLine("Lexical Parsing success!"); /* * for (Token t in tokens) * { * Console.WriteLine(t.ToString()); * } */ input.Close(); return(tokens); //break; case State.Error: #if DBG_SHIFTING_STATE Console.WriteLine("dbg: Entering state " + state.ToString()); #endif Console.WriteLine("Error: " + err.ToString() + " at line(" + line + ")"); input.Close(); return(null); //break; } } }
public static Token ProcessNumber(Cursor cursor, ref LexicalError err) { err = LexicalError.None; if (!Utils.IsDigit(cursor.CurrChar())) { err = LexicalError.UnexpectedChar; return(null); } StringBuilder buffer = new StringBuilder(); while (Utils.IsDigit(cursor.CurrChar())) { buffer.Append(cursor.CurrChar()); cursor.Move(); } if (cursor.CurrChar() != '.') { return(new Token(TokenType.IntLiteral, buffer.ToString())); } else { // Append '.' buffer.Append(cursor.CurrChar()); cursor.Move(); while (Utils.IsDigit(cursor.CurrChar())) { buffer.Append(cursor.CurrChar()); cursor.Move(); } if (cursor.CurrChar() == 'e' || cursor.CurrChar() == 'E') { buffer.Append(cursor.CurrChar()); cursor.Move(); if (cursor.CurrChar() == '+' || cursor.CurrChar() == '-') { buffer.Append(cursor.CurrChar()); cursor.Move(); } if (!Utils.IsDigit(cursor.CurrChar())) { err = LexicalError.InvalidFloatLiteral; return(null); } else { while (Utils.IsDigit(cursor.CurrChar())) { buffer.Append(cursor.CurrChar()); cursor.Move(); } return(new Token(TokenType.FloatLiteral, buffer.ToString())); } } return(new Token(TokenType.FloatLiteral, buffer.ToString())); } }
public static Token ProcessSymbol(Cursor cursor, ref LexicalError err) { err = LexicalError.None; if (!Utils.IsSymbol(cursor.CurrChar())) { err = LexicalError.UnexpectedChar; return(null); } StringBuilder buffer = new StringBuilder(); buffer.Append(cursor.CurrChar()); // currChar is Symbol, so append it, then check TokenType; TokenType tt = TokenType.Undefined; switch (cursor.CurrChar()) { case '(': tt = TokenType.OpenningRoundBracket; break; case ')': tt = TokenType.ClosingRoundBracket; break; case '{': tt = TokenType.OpenningCurlyBracket; break; case '}': tt = TokenType.ClosingCurlyBracket; break; case ',': tt = TokenType.Comma; break; case ';': tt = TokenType.Semicolon; break; case '+': tt = TokenType.Addition; break; case '-': tt = TokenType.Addition; break; case '*': tt = TokenType.Multiplication; break; case '/': tt = TokenType.Multiplication; break; case '%': tt = TokenType.Multiplication; break; case '=': if (cursor.NextChar() == '=') { tt = TokenType.Comparison; buffer.Append(cursor.NextChar()); cursor.Move(); } else { tt = TokenType.Assignment; } break; case ':': if (cursor.NextChar() == '=') { tt = TokenType.ShortAssignment; buffer.Append(cursor.NextChar()); cursor.Move(); } else { tt = TokenType.Colon; } break; case '>': tt = TokenType.Comparison; if (cursor.NextChar() == '=') { buffer.Append(cursor.NextChar()); cursor.Move(); } break; case '<': tt = TokenType.Comparison; if (cursor.NextChar() == '=') { buffer.Append(cursor.NextChar()); cursor.Move(); } break; case '!': if (cursor.NextChar() == '=') { tt = TokenType.Comparison; buffer.Append(cursor.NextChar()); cursor.Move(); } else { tt = TokenType.LogicalNegation; } break; case '|': if (cursor.NextChar() == '|') { tt = TokenType.LogicalOr; buffer.Append(cursor.NextChar()); cursor.Move(); } else { err = LexicalError.UnexpectedChar; return(null); } break; case '&': if (cursor.NextChar() == '&') { tt = TokenType.LogicalAnd; buffer.Append(cursor.NextChar()); cursor.Move(); } else { err = LexicalError.UnexpectedChar; return(null); } break; } cursor.Move(); // so all the Appended chars are skipped return(new Token(tt, buffer.ToString())); }