public List <Token> Tokenize(string sLine, int iLine) { List <Token> lTokens = new List <Token>(); //your code here char[] delimiter = { ' ', ',', ';', '*', '+', '-', '/', '<', '>', '&', '=', '|', '!', '(', ')', '[', ']', '{', '}', '\t' }; string sToken; int cChars; string code_line = sLine; int position = 0; while (code_line != null && code_line.Length > 0) { if (code_line.Length > 1 && code_line[0] == '/' && code_line[1] == '/') { break; } code_line = Next(code_line, delimiter, out sToken, out cChars); if (sToken != " " && sToken != "\t") { if (sToken.Length == 1) { if (Token.Parentheses.Contains(sToken[0])) { Parentheses token = new Parentheses(sToken[0], iLine, position); lTokens.Add(token); } else if (Token.Operators.Contains(sToken[0])) { Operator token = new Operator(sToken[0], iLine, position); lTokens.Add(token); } else if (Token.Separators.Contains(sToken[0])) { Separator token = new Separator(sToken[0], iLine, position); lTokens.Add(token); } else if (Token.Numbers.Contains(sToken[0])) { Number token = new Number(sToken, iLine, position); lTokens.Add(token); } else { if (sToken[0] >= 'a' && sToken[0] <= 'z' || sToken[0] >= 'A' && sToken[0] <= 'Z') { Identifier token = new Identifier(sToken, iLine, position); lTokens.Add(token); } else { Token token = new Token(); token.Line = iLine; token.Position = position; throw new SyntaxErrorException("syntaxError", token); } } } else if (sToken.Length > 1) { if (Token.Statements.Contains(sToken)) { Statement token = new Statement(sToken, iLine, position); lTokens.Add(token); } else if (Token.VarTypes.Contains(sToken)) { VarType token = new VarType(sToken, iLine, position); lTokens.Add(token); } else if (Token.Constants.Contains(sToken)) { Constant token = new Constant(sToken, iLine, position); lTokens.Add(token); } else if (isNumber(sToken)) { Number token = new Number(sToken, iLine, position); lTokens.Add(token); } else { if (sToken[0] >= 'a' && sToken[0] <= 'z' || sToken[0] >= 'A' && sToken[0] <= 'Z') { for (int j = 1; j < sToken.Length; j++) { if (!(sToken[j] >= 'a' && sToken[j] <= 'z' || sToken[j] >= 'A' && sToken[j] <= 'Z' || Token.Numbers.Contains(sToken[j]))) { Token corrupt_token = new Token(); corrupt_token.Line = iLine; corrupt_token.Position = position; throw new SyntaxErrorException("syntaxError", corrupt_token); } } Identifier token = new Identifier(sToken, iLine, position); lTokens.Add(token); } else { Token token = new Token(); token.Line = iLine; token.Position = position; throw new SyntaxErrorException("syntaxError", token); } } } } position = position + sToken.Length; } return(lTokens); }
// splits each line by the delimiters and then checks for each symbol in the splitted line which token is relevent public List <Token> Tokenize(List <string> lCodeLines) { List <string> string_tokens; List <Token> lTokens = new List <Token>(); char[] delemiters = { '(', ')', '{', '}', ',', ';', '*', '+', '<', '>', '=', '-', '\t', ' ', '/', '[', ']', '!', '\n' }; int line_index = 0, postion, n; string stripedLine; Token token; char symbolChar; // iterate through the line foreach (string sLine in lCodeLines) { stripedLine = Regex.Split(sLine, "//")[0]; if (stripedLine.Length == 0 || stripedLine.StartsWith("//")) { line_index++; continue; } string_tokens = Split(stripedLine, delemiters); postion = 0; // iterate through the tokens foreach (string symbol in string_tokens) { if (symbol.StartsWith("\t") || symbol.StartsWith(" ") || symbol.StartsWith("\n")) { postion++; continue; } if (Token.Statements.Contains(symbol)) { token = new Statement(symbol, line_index, postion); } else if (Token.VarTypes.Contains(symbol)) { token = new VarType(symbol, line_index, postion); } else if (Token.Constants.Contains(symbol)) { token = new Constant(symbol, line_index, postion); } else if (symbol.Length == 1) { symbolChar = symbol[0]; if (Token.Operators.Contains(symbolChar)) { token = new Operator(symbolChar, line_index, postion); } else if (Token.Parentheses.Contains(symbolChar)) { token = new Parentheses(symbolChar, line_index, postion); } else if (Token.Separators.Contains(symbolChar)) { token = new Separator(symbolChar, line_index, postion); } else if (int.TryParse(symbol, out n)) { token = new Number(symbol, line_index, postion); } else { token = new Identifier(symbol, line_index, postion); } } else if (int.TryParse(symbol, out n)) { token = new Number(symbol, line_index, postion); } else { token = new Identifier(symbol, line_index, postion); } lTokens.Add(token); n = symbol.Length; postion = postion + symbol.Length; } line_index++; } return(lTokens); }
//This is the main method for the Tokenizing assignment. //Takes a list of code lines, and returns a list of tokens. //For each token you must identify its type, and instantiate the correct subclass accordingly. //You need to identify the token position in the file (line, index within the line). //You also need to identify errors, in this assignement - illegal identifier names. public List <Token> Tokenize(List <string> lCodeLines) { List <Token> lTokens = new List <Token>(); int numOfLine = 0; foreach (string line in lCodeLines) { int placeOfToken = 0; List <string> tokens = Split(line, Delimiters); if (line.Contains("//")) { } else { int help; foreach (string token in tokens) { if (int.TryParse(token, out help)) { Number tmp = new Number(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else if (token.Length == 1 & token != " " & token != "\t") { if (Token.Operators.Contains(token[0])) { Operator tmp = new Operator(token[0], numOfLine, placeOfToken); placeOfToken++; lTokens.Add(tmp); } else if (Token.Parentheses.Contains(token[0])) { Parentheses tmp = new Parentheses(token[0], numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else if (Token.Separators.Contains(token[0])) { Separator tmp = new Separator(token[0], numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else if (!char.IsDigit(token[0]) & token[0] != '#') { Identifier tmp = new Identifier(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else { Token tmp = new Token(); tmp.Line = numOfLine; tmp.Position = placeOfToken; placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); throw new SyntaxErrorException("problem", tmp); } } else if (token.Equals(" ")) { placeOfToken++; } else if (token.Equals("\t")) { placeOfToken++; } else { if (Token.Statements.Contains(token)) { Statement tmp = new Statement(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else if (Token.VarTypes.Contains(token)) { VarType tmp = new VarType(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else if (Token.Constants.Contains(token)) { Constant tmp = new Constant(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else { /* bool isValid = false; * for(int i = 1; i<token.Length & !isValid; i++) * { * if (char.IsLetter(token[i])) * isValid = true; * }*/ if (char.IsLetter(token[0])) { Identifier tmp = new Identifier(token, numOfLine, placeOfToken); placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); } else { Token tmp = new Token(); tmp.Line = numOfLine; tmp.Position = placeOfToken; placeOfToken = placeOfToken + token.Length; lTokens.Add(tmp); throw new SyntaxErrorException("problem", tmp); } } } } } numOfLine++; } //throw new Exception(string.Join("\n", lCodeLines.ToArray())); return(lTokens); }
public List <Token> Tokenize(List <string> lCodeLines) { List <Token> lTokens = new List <Token>(); int lineNum = 0; foreach (string Line in lCodeLines) { if (Line.Contains("//")) { lineNum++; continue; } int pos = 0; List <string> afterSplit = new List <string>(); afterSplit = Split(Line, Delimiters); foreach (string lineToken in afterSplit) { Token thisToken; if (Contains(Statements, lineToken)) { thisToken = new Statement(lineToken, lineNum, pos); lTokens.Add(thisToken); pos += lineToken.Length; } else if (Contains(VarTypes, lineToken)) { thisToken = new VarType(lineToken, lineNum, pos); lTokens.Add(thisToken); pos += lineToken.Length; } else if (Contains(Constants, lineToken)) { thisToken = new Constant(lineToken, lineNum, pos); lTokens.Add(thisToken); pos += lineToken.Length; } else if (char.IsLetter(lineToken[0])) { thisToken = new Identifier(lineToken, lineNum, pos); lTokens.Add(thisToken); pos += lineToken.Length; } else if (char.IsDigit(lineToken[0])) { thisToken = new Number(lineToken, lineNum, pos); lTokens.Add(thisToken); pos += lineToken.Length; } else if (lineToken.Length < 2 && !char.IsDigit(lineToken[0])) { char clineToken = lineToken[0]; if (clineToken.Equals(' ') || clineToken.Equals('\t')) { pos++; } else if (Contains(Operators, clineToken)) { thisToken = new Operator(clineToken, lineNum, pos); lTokens.Add(thisToken); pos++; } else if (Contains(Parentheses, clineToken)) { thisToken = new Parentheses(clineToken, lineNum, pos); lTokens.Add(thisToken); pos++; } else if (Contains(Separators, clineToken)) { thisToken = new Separator(clineToken, lineNum, pos); lTokens.Add(thisToken); pos++; } else if (!Contains(Delimiters, clineToken)) { pos++; thisToken = new Separator(clineToken, lineNum, pos); throw new SyntaxErrorException("Wrong Syntax", thisToken); } } } lineNum++; } return(lTokens); }