/// <name>Parser</name> /// <type>Constructor</type> /// <summary> /// Creates a parser object. /// </summary> public Parser() : base() { // Initalize class variables currentOperator = new Token(TokenType.NO_TOKEN, String.Empty); nextOperator = new Token(TokenType.NO_TOKEN, String.Empty); operand = null; literal = null; symbol = null; inExpression = false; nextLocation = 1; symbolTable = new Dictionary<string, Symbol>(); literalTable = new Dictionary<string, Literal>(); operatorStack = new Stack<Token>(); structureStack = new Stack<Structure>(); forwardRefStack = new Stack<ForwardReference>(); }
/// <name>Scanner</name> /// <type>Constructor</type> /// <summary> /// Creates a scanner object and initializes the operator lookup table and /// global token object for use during execution of the class's public methods. /// </summary> public Scanner() : base() { operatorTable = new Dictionary<string, TokenType>(); operatorTable["+"] = TokenType.ADD_OP; operatorTable["-"] = TokenType.ADD_OP; operatorTable["*"] = TokenType.MULT_OP; operatorTable["/"] = TokenType.MULT_OP; operatorTable["("] = TokenType.L_PAREN; operatorTable[")"] = TokenType.R_PAREN; operatorTable["["] = TokenType.L_BRACKET; operatorTable["]"] = TokenType.R_BRACKET; operatorTable[";"] = TokenType.SEMICOLON; operatorTable["<"] = TokenType.REL_OP; operatorTable["<="] = TokenType.REL_OP; operatorTable["="] = TokenType.REL_OP; operatorTable[">="] = TokenType.REL_OP; operatorTable[">"] = TokenType.REL_OP; operatorTable["!="] = TokenType.REL_OP; operatorTable[","] = TokenType.COMMA; operatorTable[":="] = TokenType.ASSIGN; operatorTable["call"] = TokenType.CALL; operatorTable["else"] = TokenType.ELSE; operatorTable["endif"] = TokenType.END; operatorTable["endproc"] = TokenType.END; operatorTable["endprogram"] = TokenType.END; operatorTable["endwhile"] = TokenType.END; operatorTable["if"] = TokenType.IF_WHILE; operatorTable["while"] = TokenType.IF_WHILE; operatorTable["proc"] = TokenType.PROC; operatorTable["then"] = TokenType.THEN_DO; operatorTable["do"] = TokenType.THEN_DO; operatorTable["write"] = TokenType.WRITE; operatorTable["program"] = TokenType.PROGRAM; operatorTable["const"] = TokenType.CONST; operatorTable["var"] = TokenType.VAR; operatorTable["{"] = TokenType.L_BRACE; operatorTable["}"] = TokenType.R_BRACE; currentToken = new Token(TokenType.NO_TOKEN, String.Empty); }
public object Clone() { Token copyToken = new Token(this.Type, this.Lexeme); return copyToken; }
/// <name>GetNextToken</name> /// <type>Method</type> /// <summary> /// Returns the next token in the file and consumes the token. /// </summary> /// <returns>Next valid token from the input file</returns> public Token GetNextToken() { if (sourceFile.EndOfStream) { // If we're at the end of the file stream then return end file token currentToken.Type = TokenType.END_FILE; currentToken.Lexeme = String.Empty; return currentToken; } else { int nextChar = sourceFile.Read(); // Skip over white space but process newline characters by updating // the line count (static variable to the compiler class) while (Char.IsWhiteSpace((char)nextChar)) { if (sourceFile.EndOfStream) { currentToken.Type = TokenType.END_FILE; currentToken.Lexeme = String.Empty; return currentToken; } else if (nextChar == '\n') { Compiler.LineNumber++; } nextChar = sourceFile.Read(); } if (Char.IsLetter((char)nextChar)) { // Symbol token - call ReadSymbol to handle the rest currentToken = ReadSymbol(nextChar); } else if (Char.IsDigit((char)nextChar)) { // Literal token - call ReadLiteral to handle the rest currentToken = ReadLiteral(nextChar); } else if (nextChar == '\'') { // Literal token in apostrophes - get the integer representation // of the token and skip over the second apostrophe nextChar = sourceFile.Read(); currentToken.Lexeme = nextChar.ToString(); currentToken.Type = TokenType.LITERAL; sourceFile.Read(); } else if (nextChar == '/') { // We either have a comment or an operator - run a series of checks // to determine which token type we've encountered if (sourceFile.Peek() == '/') { // Comment - skip over the rest of the line then get the next token while (nextChar != '\n') { nextChar = sourceFile.Read(); } Compiler.LineNumber++; currentToken = GetNextToken(); } else if (sourceFile.Peek() == '*') { // Multi-line comment - keep reading until we encounter the end // of the comment (and update line number on newline characters) // then get the next token while (nextChar != '*' || sourceFile.Peek() != '/') { nextChar = sourceFile.Read(); if (nextChar == '\n') { Compiler.LineNumber++; } } sourceFile.Read(); currentToken = GetNextToken(); } else { // Operator token - check the lookup table to get the type string op = ((char)nextChar).ToString(); if (operatorTable.ContainsKey(op)) { currentToken.Lexeme = op; currentToken.Type = operatorTable[op]; } else { Compiler.ThrowCompilerException("Illegal operator"); } } } else if (sourceFile.Peek() == '=') { // Two-character operator token - check the lookup table to get the type string op = ((char)nextChar).ToString() + ((char)sourceFile.Read()).ToString(); if (operatorTable.ContainsKey(op)) { currentToken.Lexeme = op; currentToken.Type = operatorTable[op]; } else { Compiler.ThrowCompilerException("Illegal operator"); } } else { // Remaining operator token - check the lookup table to get the type string op = ((char)nextChar).ToString(); if (operatorTable.ContainsKey(op)) { currentToken.Lexeme = op; currentToken.Type = operatorTable[op]; } else { Compiler.ThrowCompilerException("Illegal operator"); } } return currentToken; } }
/// <name>ReadSymbol</name> /// <type>Method</type> /// <summary> /// Advances through the input file and tokenizes character sets classified /// as symbols. The input file is read until a non-letter character is /// encountered and the entire token is returned. /// </summary> /// <param name="firstChar">First character of the token</param> /// <returns>Token object with the type and lexeme of the symbol</returns> private Token ReadSymbol(int firstChar) { Token newToken = new Token(); string symbol = Char.ToLower(((char)firstChar)).ToString(); // Keep reading letters until we have the entire literal (converting each // letter to lowercase along the way) while (Char.IsLetter((char)sourceFile.Peek())) { symbol += Char.ToLower(((char)sourceFile.Read())).ToString(); } // Check to see if we have a reserved word if (operatorTable.ContainsKey(symbol)) { newToken.Type = operatorTable[symbol]; } else { newToken.Type = TokenType.SYMBOL; } // Return the token newToken.Lexeme = symbol; return newToken; }
/// <name>ReadLiteral</name> /// <type>Method</type> /// <summary> /// Advances through the input file and tokenizes literal values. Literals are /// numbers that can be in base 10, base 8 (octal) or base 16 (hex). Return value /// is always in base 10. If the literal is already in base 10, a string is returned /// representing that number. If the literal is in base 8 or base 16, it is converted /// to base 10 and then returned. /// </summary> /// <param name="firstChar">First character of the token</param> /// <returns>Token object with the type and lexeme of the literal</returns> private Token ReadLiteral(int firstChar) { Token newToken = new Token(); string literal = String.Empty, tempString = String.Empty; int numberBase = 0, power = 0, result = 0; // If the token starts with a 0 then it is octal, 0x and it is hex if (firstChar == '0' && (sourceFile.Peek() == 'x' || sourceFile.Peek() == 'X')) { sourceFile.Read(); numberBase = 16; } else if (firstChar == '0') { numberBase = 8; } else { // First char is part of the literal in base 10 so append it tempString += ((char)firstChar).ToString(); numberBase = 10; } // Keep reading digits (or hex characters) until we have the entire literal while (Char.IsDigit((char)sourceFile.Peek()) || IsHexDigit((char)sourceFile.Peek())) { tempString += ((char)sourceFile.Read()).ToString(); } if (numberBase == 8) { // If the number is octal then convert it to base 10 and set the literal // to the string value of the base 10 number for (int i = tempString.Length - 1; i >= 0; i--) { int digit = 0; try { digit = Convert.ToInt32(tempString[i].ToString()); } catch { Compiler.ThrowCompilerException("Invalid literal"); } if (digit < 8) { result += digit * (int)Math.Pow(8, power); power++; } else { Compiler.ThrowCompilerException("Invalid literal"); } } literal = result.ToString(); } else if (numberBase == 16) { // If the number is hex then convert it to base 10 and set the literal // to the string value of the base 10 number for (int i = tempString.Length - 1; i >= 0; i--) { char tempChar = Char.ToLower(tempString[i]); int digit = 0; switch (tempChar) { case 'a': digit = 10; break; case 'b': digit = 11; break; case 'c': digit = 12; break; case 'd': digit = 13; break; case 'e': digit = 14; break; case 'f': digit = 15; break; default: digit = Convert.ToInt32(tempChar.ToString()); break; } result += digit * (int)Math.Pow(16, power); power++; } literal = result.ToString(); } else { // Already in base 10 so no conversion is needed for (int i = tempString.Length - 1; i >= 0; i--) { if (Char.IsDigit((char)tempString[i]) == false) { Compiler.ThrowCompilerException("Invalid literal"); } } literal = tempString; } // Return the token newToken.Lexeme = literal; newToken.Type = TokenType.LITERAL; return newToken; }
/// <name>Parentheses</name> /// <type>Method</type> /// <summary> /// Parentheses code generator. /// </summary> private void Parentheses() { Advance(); currentOperator = operatorStack.Pop(); status = Status.FREEZE; }
/// <name>EqualityForOperators</name> /// <type>Method</type> /// <summary> /// Equality for operators code generator. /// </summary> private void EqualityForOperators() { switch (currentOperator.Lexeme) { case "+": jvm.Emit(Opcode.IADD); break; case "-": jvm.Emit(Opcode.ISUB); break; case "*": jvm.Emit(Opcode.IMUL); break; case "/": jvm.Emit(Opcode.IDIV); break; default: Compiler.ThrowCompilerException("Invalid expression operator"); break; } currentOperator = operatorStack.Pop(); }
/// <name>CompileVariables</name> /// <type>Method</type> /// <summary> /// Generates code for all variable declarations in the program. /// </summary> private void CompileVariables() { // Keep processing until we reach the end of the declarations while (nextOperator.Type != TokenType.SEMICOLON) { // Advance to the next variable declaration and record the address Advance(); if (symbol.Address == -1) { symbolTable[operand.Lexeme].Address = nextLocation; } else { Compiler.ThrowCompilerException("Symbol has already been defined"); } // Now generate bytecode for the declaration if (nextOperator.Type == TokenType.L_BRACKET) { // Array declaration Token array = new Token(TokenType.SYMBOL, operand.Lexeme); symbolTable[operand.Lexeme].SymbolType = SymbolType.ARRAY_TYPE; Advance(); // Push the number representing the size of the array if (literal.NumberValue > 0 && literal.NumberValue < 101) { PushConstant(literal.NumberValue); } else { Compiler.ThrowCompilerException("Illegal array declaration"); } jvm.Emit(Opcode.NEWARRAY, (byte)10); jvm.ChooseOp(Opcode.ASTORE, Opcode.ASTORE_0, nextLocation); Advance(); if (nextOperator.Type == TokenType.REL_OP) { // If the next operator is an equal sign then we have an initalization Advance(); int arrayIndex = 0; while (nextOperator.Type != TokenType.R_BRACE) { // Keep going until we come to the closing bracket Advance(); PushOperand(array); PushConstant(arrayIndex); PushOperand(operand); jvm.Emit(Opcode.IASTORE); arrayIndex++; } // Advance past the right brace and comma Advance(); } } else { // Variable declaration symbolTable[operand.Lexeme].SymbolType = SymbolType.VAR_TYPE; if (nextOperator.Lexeme == "=") { // If the next operator is an equal sign then we have an initalization Advance(); if (operand.Type == TokenType.LITERAL) { // Assignment is a literal PushConstant(literalTable[operand.Lexeme].NumberValue); } else { // Assignment is a previously-defined symbol PushOperand(operand); } } else if (nextOperator.Type == TokenType.COMMA || nextOperator.Type == TokenType.SEMICOLON) { // Variable declaration with no initalization so initalize to zero PushConstant(0); } else { Compiler.ThrowCompilerException("Invalid operator found in variable declarations"); } // Generate code to store the inital value at the variable's address jvm.ChooseOp(Opcode.ISTORE, Opcode.ISTORE_0, nextLocation); } // Finally increase the location index since we assigned the current address nextLocation++; } Advance(); }
/// <name>CompileExpression</name> /// <type>Method</type> /// <summary> /// Compile arithmetic expression. /// </summary> private void CompileExpression() { // Perform initalization and generate code for the first operand inExpression = true; status = Status.FREEZE; if (operand != null) { if (operand.Lexeme == "length") { // First we need to save the new current operator operatorStack.Push(currentOperator); // Push array length onto the stack Advance(); jvm.ChooseOp(Opcode.ALOAD, Opcode.ALOAD_0, symbol.Address); jvm.Emit(Opcode.ARRAYLENGTH); Advance(); // Now retrieve the current operator that we started with currentOperator = operatorStack.Pop(); } else { PushOperand(operand); } } // Loop continues until we reach the end of the expression while (inExpression == true) { if (status == Status.EXIT) { inExpression = false; } else if (status == Status.FREEZE) { status = Status.CONTINUE; } else { // Advance but save the current operator on the stack operatorStack.Push(currentOperator); Advance(); // Generate code to push operand onto the stack if (operand != null) { if (operand.Lexeme == "length") { // First we need to save the new current operator operatorStack.Push(currentOperator); // Push array length onto the stack Advance(); jvm.ChooseOp(Opcode.ALOAD, Opcode.ALOAD_0, symbol.Address); jvm.Emit(Opcode.ARRAYLENGTH); Advance(); // Now retrieve the current operator that we started with currentOperator = operatorStack.Pop(); } else { PushOperand(operand); } } } // Call the appropriate code generator for the current operators CodeGenerator(currentOperator, nextOperator); } }
/// <name>CodeGenerator</name> /// <type>Method</type> /// <summary> /// Calls the appropriate code generator function based on the set of /// two-character codes used in the CONO table. Each valid operator /// type is converted to an integer representing a position in the table, /// which returns the string that identifies which code generator to call. /// </summary> /// <param name="currentOp">Current operator token</param> /// <param name="nexttOp">Next operator token</param> private void CodeGenerator(Token currentOp, Token nextOp) { if (currentOp.Type == TokenType.CALL && nextOp.Type != TokenType.SEMICOLON) { Compiler.ThrowCompilerException("No semicolon found following procedure call"); } else if (currentOp.Type == TokenType.PROC && nextOp.Type != TokenType.SEMICOLON) { Compiler.ThrowCompilerException("No semicolon found following procedure definition"); } else if (currentOp.Type == TokenType.END && nextOp.Type != TokenType.SEMICOLON) { Compiler.ThrowCompilerException("No semicolon found following end of control block"); } int first = GetTokenTypeIndex(currentOp.Type); int second = GetTokenTypeIndex(nextOp.Type); string code = CONO[first,second]; switch (code) { case "AS": Assignment(); break; case "BC": BeginCondition(); break; case "CA": CallProcedure(); break; case "EB": EndBlock(); break; case "EE": EndExpression(); break; case "EQ": EqualityForOperators(); break; case "GT": GreaterThanForOperators(); break; case "LT": LessThanForOperators(); break; case "NO": NoOperation(); break; case "PA": Parentheses(); break; case "PR": ProcedureDefinition(); break; case "SU": Subscript(); break; case "WR": Write(); break; default: Error(); break; } }
/// <name>Advance</name> /// <type>Method</type> /// <summary> /// Advances the current and next operators from the input file and /// populates the operand token between the operators if one exists. /// </summary> private void Advance() { // Advance one token currentOperator = (Token)nextOperator.Clone(); Token peekToken = source.GetNextToken(); // Make sure operand, symbol and literal are null operand = null; symbol = null; literal = null; if (peekToken.Type == TokenType.SYMBOL) { // If the new token is a symbol then set operand to that token operand = (Token)peekToken.Clone(); // Now check to see if the symbol is already defined - if // it is then assign to symbol, otherwise add it to the table if (symbolTable.ContainsKey(peekToken.Lexeme)) { symbol = symbolTable[peekToken.Lexeme]; } else { symbolTable.Add(peekToken.Lexeme, new Symbol(peekToken.Lexeme)); symbol = symbolTable[peekToken.Lexeme]; } // Finally get the next token nextOperator = source.GetNextToken(); } else if (peekToken.Type == TokenType.LITERAL) { // If the new token is a literal then set operand to that token operand = (Token)peekToken.Clone(); // Now check to see if the literal is already defined - if // it is then assign to literal, otherwise add it to the table if (literalTable.ContainsKey(peekToken.Lexeme)) { literal = literalTable[peekToken.Lexeme]; } else { literalTable.Add(peekToken.Lexeme, new Literal(peekToken.Lexeme)); literal = literalTable[peekToken.Lexeme]; } // Finally get the next token nextOperator = source.GetNextToken(); } else { // Set the next operator to the token we just retrieved nextOperator = (Token)peekToken.Clone(); } // Output debug information Compiler.WriteToDebug("Parser - Advance called (line " + Compiler.LineNumber + ")"); Compiler.WriteToDebug(" current operator:\t" + currentOperator.Lexeme); if (operand != null) { Compiler.WriteToDebug(" operand:\t\t" + operand.Lexeme); } Compiler.WriteToDebug(" next operator:\t" + nextOperator.Lexeme); Compiler.WriteToDebug(String.Empty); }
/// <name>PushOperand</name> /// <type>Method</type> /// <summary> /// Push an operand onto the runtime stack. /// </summary> private void PushOperand(Token op) { if (op.Type == TokenType.LITERAL) { // Literal value PushConstant(literalTable[op.Lexeme].NumberValue); } else { // Previously-declared symbol if (symbolTable[op.Lexeme].Address != -1) { Symbol s = symbolTable[op.Lexeme]; if (s.SymbolType == SymbolType.ARRAY_TYPE) { // Emit code to load the array jvm.ChooseOp(Opcode.ALOAD, Opcode.ALOAD_0, s.Address); } else { if (nextOperator.Type != TokenType.L_BRACKET) { // Emit code to load the constant or variable jvm.ChooseOp(Opcode.ILOAD, Opcode.ILOAD_0, s.Address); } else { Compiler.ThrowCompilerException("Subscript given for non-array symbol"); } } } else { Compiler.ThrowCompilerException("Reference to undefined symbol"); } } }
public object Clone() { Token copyToken = new Token(this.Type, this.Lexeme); return(copyToken); }