private void VerifyAddToken(int lineNum, Token token) { if (token != null) { if (token.Type == TokenType.Illegal) { throw new LexicalException(lineNum, token.ErrorMsg); } else tokens.Add(token); } }
private bool ProcessSingleQuoteState(Char character) { //this state is easy if (character == '\'') { //cool end of our 'string' //check it for length of 1 if (currentStream.Length > 1) { lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Invalid character token"; } else { lastFoundToken = new Token(TokenType.QuoteString, currentStream); currentState = State.InitialState; } return true; } else { //keep taking chars for our string currentStream += character; return false; } }
private bool ProcessOperatorState(Char character) { //lets figure out what symbol we should report back! if (Char.IsWhiteSpace(character)) { //lets just verify what we have so far if (ExpectedOperatorTokens.ContainsKey(currentStream)) { lastFoundToken = new Token(ExpectedOperatorTokens[currentStream], currentStream); //not much more to see here currentState = State.InitialState; return true; } else { //ok, u want to give me an invalid symbol lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Invalid Symbol/Operator!"; return true; } } else if (ExpectedOperatorTokens.ContainsKey(character.ToString())){ //your not a comment are you? i dont want those if (currentStream+character == "(*"){ currentState = State.Comment; return false; } if (currentStream + character == "*)") { //ugh...end comment without a beginning? thats a paddlin lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Unmatched end of comment"; return true; } //another symbol eh? check if still valid if (ExpectedOperatorTokens.ContainsKey(currentStream+character)){ lastFoundToken = new Token(ExpectedOperatorTokens[currentStream+character], currentStream+character); currentState = State.InitialState; return true; } else { if (currentStream.Length == 2){ //somethings fubared lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Illegal Symbol/Operator"; return true; } // just return the last token and spin up initial to delay parse of next symbol // on paper this should always be found lastFoundToken = new Token(ExpectedOperatorTokens[currentStream], currentStream); if ( ProcessInitialState(character)) return true; //ugh, invalid else return true; } } else if (character == '\'' || character == '\"') { //verify what we have, then send the char to initial state to take care of it all if (ExpectedOperatorTokens.ContainsKey(currentStream)) { lastFoundToken = new Token(ExpectedOperatorTokens[currentStream], currentStream); if (ProcessInitialState(character)) return true; //invalid yey else return true; } else { //invalid? report it lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Invalid Symbol/Operator"; return true; } } else if (Char.IsLetterOrDigit(character)) { //no longer a symbol, on paper, what we have should always be valid lastFoundToken = new Token(ExpectedOperatorTokens[currentStream], currentStream); //spin up initial to check next char if (ProcessInitialState(character)) return true; else return true; } else { //ugh what? lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "State Machine broke processing symbol"; return true; } }
private bool ProcessNumberState(Char character) { //simple state, only take in more numbers until space or symbol if (Char.IsNumber(character)) { currentStream += character; return false; } else if (Char.IsWhiteSpace(character)) { //got a token! lastFoundToken = new Token(TokenType.Number, currentStream); currentState = State.InitialState; return true; } else if (ExpectedOperatorTokens.ContainsKey(character.ToString())) { //still a token lastFoundToken = new Token(TokenType.Number, currentStream); //spin up initial state if (ProcessInitialState(character)) return true; //ugh, invalid character is next up else return true; } else { //should be everything else lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Invalid Number expression"; return true; } }
//initialstate function //true if invalid token, false for no token to report private bool ProcessInitialState(Char character) { currentStream = ""; //lets do this one first, not sure if any of the other cheatchecks will steal this from us if (character == '\'') { //eat the quote, go to state currentState = State.SingleQuote; } else if (character == '\"') { //must be a string currentState = State.DoubleQuote; } else if (Char.IsLetter(character)) { //parse this 'string' currentStream += character; currentState = State.Character; } else if (Char.IsWhiteSpace(character)) { //eat dat whitespace, nom nom } else if (Char.IsNumber(character)) { //k we got a number currentStream += character; currentState = State.Number; } //this one gets hairy, IsSymbol wont catch all our cases, //the idea is the piggyback off a single char in dict, then figure out if theres more in the state else if (ExpectedOperatorTokens.ContainsKey(character.ToString())) { currentStream += character; currentState = State.Operator; } else { //well, i got nothin, illegal token it is lastFoundToken = new Token(TokenType.Illegal, character.ToString()); return true; } return false; //only return true for an error }
private bool ProcessDoubleQuoteState(Char character) { //this state is easy if (character == '\"') { //cool end of our 'string' lastFoundToken = new Token(TokenType.DoubleQuoteString, currentStream); currentState = State.InitialState; return true; } else { //keep taking chars for our string currentStream += character; return false; } }
private bool ProcessCharacterState(Char character) { bool updateToken = false; string tempStream = ""; if (Char.IsWhiteSpace(character)) { //easy, eat the whitespace and check the token, hold in our buffer updateToken = true; currentState = State.InitialState; tempStream = currentStream; } else if (ExpectedOperatorTokens.ContainsKey(character.ToString())) { //little bit more tough, check token but also have to //spin up initialstate otherwise we will lose a char updateToken = true; tempStream = currentStream; currentStream += character; if (ProcessInitialState(character)) return true; //ugh..invalid next token, kill me now } else if (Char.IsLetterOrDigit(character)) { //easy, lets keep gettin more characters currentStream += character; return false; } else { //bad symbol lastFoundToken = new Token(TokenType.Illegal, character.ToString()); lastFoundToken.ErrorMsg = "Expected Letter or Digit"; return true; } if (updateToken) { //We have a token! Lets find a type for it, //dont touch state here if (ExpectedReserveTokens.ContainsKey(tempStream)) { lastFoundToken = new Token(ExpectedReserveTokens[tempStream], tempStream); } else { //must just be an identifier lastFoundToken = new Token(TokenType.Identifier, tempStream); } return true; } else return false; // no token found so idk /* //this shouldnt happen, i guess make it known lastFoundToken = new Token(TokenType.Illegal, character.ToString()); lastFoundToken.ErrorMsg = "Hit unknown condition for Character State! Token: " + character.ToString(); return true;*/ }
//call at end of line, will return last token, or null if theres none to report //or illegal token if theres a token error public Token EndOfLine() { switch (currentState) { case State.InitialState: //nothing to report, all whitespace return null; case State.Character: //cheat and call the function with a 'space' character; ProcessCharacterState(' '); break; case State.Operator: ProcessOperatorState(' '); break; case State.Number: ProcessNumberState(' '); break; case State.SingleQuote: // rut rho! lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Missing Matching Single Quote"; break; case State.DoubleQuote: lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Missing Matching double Quote"; break; case State.Comment: //rut rho! lastFoundToken = new Token(TokenType.Illegal, currentStream); lastFoundToken.ErrorMsg = "Missing Matching Comment Symbol!"; break; } return lastFoundToken; }