// Print token public static void PrintToken(string token, int tkCode) { Console.Write ("Lexeme: " + token.PadRight(10) + " Token Code: " + tkCode.ToString().PadRight(10)); Console.Write (" Mnemonic: " + CodeTable.LookupCode(tkCode).PadRight(10)); // If token is within the symbol table, report it index if (CodeTable.LookupName(CodeTable.LookupCode(tkCode)) == 50 || CodeTable.LookupName(CodeTable.LookupCode(tkCode)) == 51 || CodeTable.LookupName(CodeTable.LookupCode(tkCode)) == 52) { Console.Write (" Symbol Table Index: " + symbolTable.LookupSymbol(token)); } Console.WriteLine(); }
public static string GetNextToken(bool TraceOn) { Token newToken = new Token(); // Temporary Token storage int currentState = 0; // The state of the DFA now int previousState = 0; // The state of the DFA 1 step behind bool continueBuilding = true; // Used to see if the token is over string lexemeTobuild = ""; // The token string // If we are out of characters, stop if (fileToTokenize.EndOfStream) { EOF = true; } // If the token is over, or the file is completely read, stop while (continueBuilding && !fileToTokenize.EndOfStream) { // Set to false if we need to not consume a character during // this pass if (readNextChar) { currentChar = Convert.ToChar(fileToTokenize.Read()); currentChar = char.ToUpper(currentChar); } if (IsNewline(currentChar)) { currentLine += 1; } if (TraceOn && IsNewline(currentChar)) { Console.WriteLine(currentLine - 1 + ": " + Trace.ReadLine()); } // Save the state you are currently in in case the next state is exit previousState = currentState; // O P E R A T E T H E M A C H I N E currentState = LEXDFA.storedDFA[GetCharacterDFAIndex(currentChar), currentState]; // Machine is not in an escape or error state if (currentState != GET_OUT && currentState != COMMENT_NOT_ENDED && currentState != STRING_NOT_ENDED && currentState != UNREC_CHAR) { lexemeTobuild += currentChar; readNextChar = true; } // Machine has just read a \n token out of context // Escape this else if ((currentState == GET_OUT && previousState == START)) { continueBuilding = false; readNextChar = true; } // Machine has just been put into the escape state // Machine has constructed a valid token to this point (accept) // Find out what kind of token it is and return it else if (currentState == GET_OUT && LEXDFA.Includes(previousState)) { continueBuilding = false; readNextChar = false; switch (previousState) { case 1: // Identifier found (variable)[string] lexemeTobuild = TruncateIdentifier(lexemeTobuild); // Don't add reserve words to symbol table if (IsReserveWord(lexemeTobuild)) { newToken._lexeme = lexemeTobuild; newToken._tokenCode = ReserveWordsTable.LookupName(lexemeTobuild); } // Do add other identifiers else { newToken._lexeme = lexemeTobuild; newToken._tokenCode = 50; // Set global position in symbol table positionInTable = AddToSymbolTable(newToken, 1); } break; case 2: // Numeric Constant (constant)[integer] lexemeTobuild = TruncateNumericConstant(lexemeTobuild); newToken._lexeme = lexemeTobuild; newToken._tokenCode = 51; // Set global position in symbol table positionInTable = AddToSymbolTable(newToken, 2); break; case 3: // Numeric Constant (constant)[float] lexemeTobuild = TruncateNumericConstant(lexemeTobuild); newToken._lexeme = lexemeTobuild; newToken._tokenCode = 52; // Set global position in symbol table positionInTable = AddToSymbolTable(newToken, 3); break; case 6: // Numeric Constant (constant)[float+exp] lexemeTobuild = TruncateNumericConstant(lexemeTobuild); newToken._lexeme = lexemeTobuild; newToken._tokenCode = 52; // Set global position in symbol table positionInTable = AddToSymbolTable(newToken, 3); break; case 9: // String found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 53; positionInTable = AddToSymbolTable(newToken, 4); break; case 14: // (**) comment // Ignore this one newToken._lexeme = ""; newToken._tokenCode = 999; nextToken = newToken._lexeme; tokenCode = newToken._tokenCode; break; case 17: // ## comment // Also here newToken._lexeme = ""; newToken._tokenCode = 999; nextToken = newToken._lexeme; tokenCode = newToken._tokenCode; break; case 18: // / found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 30; break; case 19: // * found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 31; break; case 20: // + found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 32; break; case 21: // - found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 33; break; case 22: // ( found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 34; break; case 23: // ) found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 35; break; case 24: // ; found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 36; break; case 25: // : found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 47; break; case 26: // := found newToken._lexeme = ":="; newToken._tokenCode = 37; break; case 27: // > found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 38; break; case 28: // < found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 39; break; case 29: // >= found newToken._lexeme = ">="; newToken._tokenCode = 40; break; case 30: // <= found newToken._lexeme = "<="; newToken._tokenCode = 41; break; case 31: // = found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 42; break; case 32: // <> found newToken._lexeme = "<>"; newToken._tokenCode = 43; break; case 33: // , found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 44; break; case 34: // [ found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 45; break; case 35: // ] found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 46; break; case 36: // . found newToken._lexeme = lexemeTobuild; newToken._tokenCode = 48; break; } // Set the global string and get out nextToken = newToken._lexeme; tokenCode = newToken._tokenCode; return(nextToken); } // Machine has just been put into the escape state // Machine has constructed an invalid token (reject) else if (currentState == GET_OUT && !LEXDFA.Includes(previousState)) { continueBuilding = false; if (previousState == COMMENT_BODY || previousState == COMMENT_ENDING_STAR) { Console.WriteLine ("//////////////////ERROR - Comment not closed before EOF - ERROR//////////////////"); } } // Machine has encountered a newline character before // string fully terminates else if (currentState == STRING_NOT_ENDED) { Console.WriteLine ("//////////////////ERROR - Unterminated String Found - ERROR//////////////////"); continueBuilding = false; } // Machine encountered non recognized character else if (currentState == UNREC_CHAR) { lexemeTobuild += currentChar; // Some undefined input character newToken._lexeme = lexemeTobuild; newToken._tokenCode = 99; nextToken = newToken._lexeme; tokenCode = newToken._tokenCode; // Set token code to 99 and get out continueBuilding = false; readNextChar = true; return(nextToken); } } return(null); }