private Token IsString(string text, int lineNumber) { Token token = null; //true when we find an invalid escape sequence \ bool isInvalid = false; //if it starts with double quote, search for the ending quote if (text.StartsWith("\"") && text.Length > 1) { for (int i = 1; i < text.Length; i++) { if (text[i] == '\\') { //isInvalid = true; } //l_num break else if (text[i] == '\r' || text[i] == '\n') { //don't include the new l_num character in the value part token = new Token(ClassPart.INVALID, text.Substring(0, i), lineNumber); break; } //find the end of string and also deal the escape condition if (text[i] == '\"' && text[i - 1] != '\\') { if (isInvalid) { return(new Token(ClassPart.INVALID, text.Substring(0, i + 1), lineNumber)); } return(new Token(ClassPart.STRING_CONSTANT, text.Substring(0, i + 1), lineNumber)); } //if text ends and we don't find the ending quote for string, means string is not closed if (i == text.Length - 1) { token = new Token(ClassPart.INVALID, text, lineNumber); } } } //if it starts with single quote, search for the ending single else if ((text.StartsWith("\'") && text.Length > 2)) { for (int i = 0; i < 3; i++) { Console.WriteLine(text[i]); if (text[2] == '\'') { token = new Token(ClassPart.CHAR_CONSTANT, text.Substring(0, i), lineNumber); break; } // //l_num break // if (text[i] == '\r' || text[i] == '\n') // { // //don't include the new l_num character in the value part // token = new Token(ClassPart.CHAR_CONSTANT, text.Substring(0, i), lineNumber); // break; // } // //find the end of string and also deal the escape condition // if (text[i] == '\'' && text[i - 1] != '\\') // { // return new Token(ClassPart.CHAR_CONSTANT, text.Substring(0, i + 1), lineNumber); // } // //if text ends and we don't find the ending quote for string, means string is not closed // if (i == text.Length - 1) // token = new Token(ClassPart.INVALID, text, lineNumber); } } //if string is too short and incomplete else if ((text.StartsWith("\"") || text.StartsWith("'")) && text.Length < 2) { token = new Token(ClassPart.INVALID, text, lineNumber); } return(token); }
/// <summary> /// Tokenize the code according to the available grammar /// </summary> /// <param name="rawText"></param> /// <returns></returns> public List <Token> Analyze(string rawText) { var tokens = new List <Token>(); //main ind to track the current position int postion = 0; int noOflines = 1; string text = rawText; int length = rawText.Length; while (postion < length) { // Remove spaces,tabs and new noOfLines from front of string and increase the current ind var items = TrimStart(text); noOflines += items.Item3; var count = items.Item1 + items.Item2 + items.Item3; postion += count; text = text.Substring(count); //if we are at the end of text if (string.IsNullOrEmpty(text)) { break; } Token token; //check keyword token = IsKeyword(text, noOflines); //check punctuators if (token == null) { token = IsPunctuator(text, noOflines); } //Check string if (token == null) { token = IsString(text, noOflines); } //Check comment if (token == null) { var val = IsComment(text, noOflines); token = val.Item1; noOflines += val.Item2; } //check operator if (token == null) { token = IsOperator(text, noOflines); } if (token == null) { //now we have to break the word in order to validate it as arithmetic operators or identifier var word = BreakWord(text); token = IsDouble(word, noOflines); if (token == null) { token = IsInt(word, noOflines); } if (token == null) { token = IsBool(word, noOflines); } if (token == null) { token = IsIdentifier(word, noOflines); } if (token == null) { token = new Token(ClassPart.INVALID, word, noOflines); } } //increase current ind postion += token.Value.Length; //remove the token from text text = text.Substring(token.Value.Length); //add token tokens.Add(token); } return(tokens); }