private void GetLineComment(string lexeme) { while (CurrentSymbol.CurrentSymbol != '\n') { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } CurrentSymbol = SourceCode.GetNextSymbol(); }
private Token GetLiteralNumber(string lexeme, int tokenColumn, int tokenRow) { CurrentSymbol = SourceCode.GetNextSymbol(); //case score=(float)countr/countq*100-difftime(finaltime,initialtime)/3; // if (lexeme.Contains("e") || lexeme.Contains("E")) while (char.IsLetterOrDigit(CurrentSymbol.CurrentSymbol) || CurrentSymbol.CurrentSymbol == '.' || CurrentSymbol.CurrentSymbol == 'e' || CurrentSymbol.CurrentSymbol == 'E' || CurrentSymbol.CurrentSymbol == '-') { if (!lexeme.Contains("e") && !lexeme.Contains("E") && CurrentSymbol.CurrentSymbol == '-') { break; } lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } if (Regex.IsMatch(lexeme, @"^[0-9]*(?:\.[0-9]*)?$")) { if (lexeme.Contains(".")) { return(new Token { TokenType = TokenType.LiteralDecimal, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } return(new Token { TokenType = TokenType.LiteralNumber, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } //Floating point numbers if (Regex.IsMatch(lexeme, @"^[-]?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?$")) { return(new Token { TokenType = TokenType.LiteralFloat, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } throw new LexicalException($"Symbol {CurrentSymbol.CurrentSymbol} not recognized at Row:{CurrentSymbol.Row} Col: {CurrentSymbol.Column}"); }
private Token GetSeparator(string lexeme, int tokenColumn, int tokenRow) { CurrentSymbol = SourceCode.GetNextSymbol(); return(new Token { TokenType = ReservedWords._separators[lexeme], Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); }
private string ConsumeQuotationMark() { string lex = string.Empty; CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == '"') { lex += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } else { if (CurrentSymbol.CurrentSymbol == '\\') { CurrentSymbol = SourceCode.GetNextSymbol(); lex += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } } return(lex); }
private string GetBlockComment(string lexeme) { while (CurrentSymbol.CurrentSymbol != '*') { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } //Adding the * to the lexeme string lexeme += CurrentSymbol.CurrentSymbol; //Get the char right after the *, to check if it's a / so we can close the comment CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == '/') { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); return(lexeme); } return(GetBlockComment(lexeme)); }
private Token GetLiteralHexadecimal(string lexeme, int tokenColumn, int tokenRow) { CurrentSymbol = SourceCode.GetNextSymbol(); //Hexadecimal literal while (char.IsLetterOrDigit(CurrentSymbol.CurrentSymbol)) { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } if (Regex.IsMatch(lexeme, @"\A\b(0[xX])?[0-9a-fA-F]+\b\Z")) { return(new Token { TokenType = TokenType.LiteralHexadecimal, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } throw new LexicalException($"Symbol {CurrentSymbol.CurrentSymbol} not recognized at Row:{CurrentSymbol.Row} Col: {CurrentSymbol.Column}"); }
private Token GetOperator(string lexeme, int tokenColumn, int tokenRow) { CurrentSymbol = SourceCode.GetNextSymbol(); if (ReservedWords._specialSymbols.Contains(CurrentSymbol.CurrentSymbol.ToString()) && !(lexeme.Equals(">") && CurrentSymbol.CurrentSymbol == '/') && !(lexeme.Equals("*") && CurrentSymbol.CurrentSymbol == '*')) { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); if (lexeme == "%>") { // CMode = false; HtmlMode = true; CurrentSymbol = SourceCode.GetNextSymbol(); //return GetNextToken(); return(new Token { TokenType = ReservedWords._operators[lexeme.Substring(0, 2)], Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } //Special case for comments, we've got to get the line(S) of the comments if (lexeme == "//") { string str = string.Empty; GetLineComment(str); return(GetNextToken()); } //For block comments if (lexeme == "/*") { string str = string.Empty; GetBlockComment(str); return(GetNextToken()); } //special operators like >>= and <<= if (ReservedWords._specialSymbols.Contains(lexeme.Substring(0, 2))) { if (CurrentSymbol.CurrentSymbol == '=') { lexeme += CurrentSymbol.CurrentSymbol; if (lexeme == ">>=" || lexeme == "<<=") { CurrentSymbol = SourceCode.GetNextSymbol(); } return(new Token { TokenType = ReservedWords._operators[lexeme.Substring(0, 3)], Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } } return(new Token { TokenType = ReservedWords._operators[lexeme.Substring(0, 2)], Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } return(new Token { TokenType = ReservedWords._operators[lexeme], Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); }
public Token GetNextToken() { var lexeme = string.Empty; var tokenRow = 0; var tokenColumn = 0; if (HtmlMode) { do { if (CurrentSymbol.CurrentSymbol == '<') { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == '%') { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; // CMode = true; HtmlMode = false; CurrentSymbol = SourceCode.GetNextSymbol(); } } else { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == '\0') { HtmlMode = false; } } }while(HtmlMode); return(new Token { TokenType = TokenType.HTMLContent, Lexeme = lexeme, Row = tokenRow, Column = tokenColumn }); // return GetNextToken(); } while (char.IsWhiteSpace(CurrentSymbol.CurrentSymbol)) { CurrentSymbol = SourceCode.GetNextSymbol(); } if (CurrentSymbol.CurrentSymbol == '\0') { return(new Token { TokenType = TokenType.EndOfFile, Row = tokenRow, Column = tokenColumn }); } if (char.IsLetter(CurrentSymbol.CurrentSymbol) || CurrentSymbol.CurrentSymbol == '_') { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; return(GetIdentifier(lexeme, tokenColumn, tokenRow)); } //For octal and hexadecimal literals if (char.IsDigit(CurrentSymbol.CurrentSymbol) && (CurrentSymbol.CurrentSymbol) == '0') { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == 'x') { lexeme += CurrentSymbol.CurrentSymbol; return(GetLiteralHexadecimal(lexeme, tokenColumn, tokenRow)); } if (!char.IsDigit(CurrentSymbol.CurrentSymbol)) { //this applies for a single zero if (lexeme.Length == 1 && lexeme == "0") { return(new Token { TokenType = TokenType.LiteralNumber, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } } lexeme += CurrentSymbol.CurrentSymbol; return(GetLiteralOctal(lexeme, tokenColumn, tokenRow)); } if (char.IsDigit(CurrentSymbol.CurrentSymbol)) { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; return(GetLiteralNumber(lexeme, tokenColumn, tokenRow)); } if (ReservedWords._separators.ContainsKey(CurrentSymbol.CurrentSymbol.ToString())) { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; return(GetSeparator(lexeme, tokenColumn, tokenRow)); } if (ReservedWords._operators.ContainsKey(CurrentSymbol.CurrentSymbol.ToString())) { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; return(GetOperator(lexeme, tokenColumn, tokenRow)); } //Get Literals char if (CurrentSymbol.CurrentSymbol == '\'') { tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; //lexeme += _currentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); if (CurrentSymbol.CurrentSymbol == '\\') { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); } lexeme = GetLiteralStringOrChar(lexeme, '\''); if (lexeme.Length == 1 || lexeme.StartsWith('\\'.ToString())) { return(new Token { TokenType = TokenType.LiteralChar, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } } //Literals string if (CurrentSymbol.CurrentSymbol == '"') { tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; CurrentSymbol = SourceCode.GetNextSymbol(); lexeme = GetLiteralStringOrChar(lexeme, '"'); //Check if the string has escape characters, this is for special strings with a \ /*like : cout << "Line 4 - a is either less than \ * or euqal to b" << endl ;*/ //for a multiline string to be accepted, I split the lexeme into separate lines, it's required //for each line except the last to have the character \, if not, it's not a valid multiline string string[] lines = lexeme.Split(new[] { "\r\n", "\n" }, StringSplitOptions.None); for (int i = 0; i < lines.Length - 1; i++) { if (!lines[i].Contains("\\")) { throw new LexicalException($"Symbol {CurrentSymbol.CurrentSymbol} not recognized at Row:{CurrentSymbol.Row} Col: {CurrentSymbol.Column}"); } } return(new Token { TokenType = TokenType.LiteralString, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } //This one is used for #include and for date format #dd-MM-yyyy# if (CurrentSymbol.CurrentSymbol == '#') { lexeme += CurrentSymbol.CurrentSymbol; CurrentSymbol = SourceCode.GetNextSymbol(); //Fisrt option applies for #include, its a reserved word if (char.IsLetter(CurrentSymbol.CurrentSymbol)) { lexeme += CurrentSymbol.CurrentSymbol; tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; return(GetIdentifier(lexeme, tokenColumn, tokenRow)); } //For dates if (char.IsDigit(CurrentSymbol.CurrentSymbol)) { tokenColumn = CurrentSymbol.Column; tokenRow = CurrentSymbol.Row; lexeme = string.Empty; lexeme = GetLiteralStringOrChar(lexeme, '#'); } DateTime dt; bool isValid = DateTime.TryParseExact(lexeme.Replace('#', ' '), "dd-MM-yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out dt); if (isValid) { return(new Token { TokenType = TokenType.LiteralDate, Lexeme = lexeme, Column = tokenColumn, Row = tokenRow }); } } throw new LexicalException($"Symbol {CurrentSymbol.CurrentSymbol} not recognized at Row:{CurrentSymbol.Row} Col: {CurrentSymbol.Column}"); }