private UnicodeCategory validateChar() { char c = code[index]; if (c >= 0xD800 && c <= 0xDBFF) { // Leading surrogate char c2 = code[index + 1]; if (c2 >= 0xDC00 && c2 <= 0xDFFF) { // Trailing surrogate - ignore for now } else { reportError("L0007", "Illegal lone surrogate"); } } else if (c >= 0xDC00 && c <= 0xDFFF) { // Trailing surrogate reportError("L0007", "Illegal lone surrogate"); } UnicodeCategory cat = UnicodeLookup.GetUnicodeCategory(code, index); if (c == '\t') { reportError("L0001", "Tab characters are not permitted."); } if (cat == UnicodeCategory.ParagraphSeparator || cat == UnicodeCategory.SpaceSeparator) { if (c != ' ' && c != '\u2028') { reportError("L0002", new Dictionary <string, string>() { { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) }, { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) } }, "Illegal whitespace."); } } else if ((cat == UnicodeCategory.Control || cat == UnicodeCategory.Format || cat == UnicodeCategory.Surrogate ) && c != '\n' && c != '\r') { reportError("L0003", new Dictionary <string, string>() { { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) }, { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) } }, "Illegal control character. "); } return(cat); }
/// <summary>Get the next token from the stream and /// advance the lexer</summary> public Token NextToken() { previous = current; if (index >= code.Length) { current = new EndToken(moduleName, line, column); return(current); } char c = code[index]; column = index - lineStart; Token ret = null; UnicodeCategory cat = validateChar(); string cStr = StringInfo.GetNextTextElement(code, index); if (isIdentifierStartCharacter(c, cat)) { ret = lexIdentifier(); } if (isOperatorCharacter(c, cat)) { ret = lexOperator(); } if (isNumberStartCharacter(c)) { ret = lexNumber(); } if (c == ' ') { skipSpaces(); return(NextToken()); } if (c == '#' && allowShebang && column == 1) { // Eat the rest of the line, ignoring its // contents entirely. while (code[index] != '\n' && code[index] != '\u2028') { index++; } line++; lineStart = index; advanceIndex(); return(NextToken()); } else if (column == 1) { allowShebang = false; } if (c == '"') { ret = lexString(); } if (c == '(') { ret = lexLParen(); } if (c == ')') { ret = lexRParen(); } if (c == '{') { ret = lexLBrace(); } if (c == '}') { ret = lexRBrace(); } if (ret == null && UnicodeLookup.OpenBrackets.Contains(cStr)) { ret = lexOpenBracket(); } if (ret == null && UnicodeLookup.CloseBrackets.Contains(cStr)) { ret = lexCloseBracket(); } //if (UnicodeLookup.CloseBrackets.Contains(cStr)) // ret = lexCloseBracket(); if (c == ',') { ret = lexComma(); } if (c == ';') { ret = lexSemicolon(); } if (c == '\n' || c == '\u2028' || c == '\r') { ret = new NewLineToken(moduleName, line, column); lineStart = index; line++; advanceIndex(); if (c == '\r' && index < code.Length && code[index] == '\n') { advanceIndex(); lineStart++; } } if (ret == null) { reportError("L0000", new Dictionary <string, string>() { { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) }, { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) } }, "Character '" + c + "' may not appear here"); ret = new UnknownToken(moduleName, line, index - 1); } current = ret; return(ret); }