コード例 #1
0
ファイル: Lexer.cs プロジェクト: mwh/kernan
        private UnicodeCategory validateChar()
        {
            char c = code[index];

            if (c >= 0xD800 && c <= 0xDBFF)
            {
                // Leading surrogate
                char c2 = code[index + 1];
                if (c2 >= 0xDC00 && c2 <= 0xDFFF)
                {
                    // Trailing surrogate - ignore for now
                }
                else
                {
                    reportError("L0007", "Illegal lone surrogate");
                }
            }
            else if (c >= 0xDC00 && c <= 0xDFFF)
            {
                // Trailing surrogate
                reportError("L0007", "Illegal lone surrogate");
            }
            UnicodeCategory cat = UnicodeLookup.GetUnicodeCategory(code, index);

            if (c == '\t')
            {
                reportError("L0001", "Tab characters are not permitted.");
            }
            if (cat == UnicodeCategory.ParagraphSeparator ||
                cat == UnicodeCategory.SpaceSeparator)
            {
                if (c != ' ' && c != '\u2028')
                {
                    reportError("L0002", new Dictionary <string, string>()
                    {
                        { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) },
                        { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) }
                    },
                                "Illegal whitespace.");
                }
            }
            else if ((cat == UnicodeCategory.Control ||
                      cat == UnicodeCategory.Format ||
                      cat == UnicodeCategory.Surrogate
                      ) &&
                     c != '\n' && c != '\r')
            {
                reportError("L0003", new Dictionary <string, string>()
                {
                    { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) },
                    { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) }
                },
                            "Illegal control character. ");
            }
            return(cat);
        }
コード例 #2
0
ファイル: Lexer.cs プロジェクト: mwh/kernan
        /// <summary>Get the next token from the stream and
        /// advance the lexer</summary>
        public Token NextToken()
        {
            previous = current;
            if (index >= code.Length)
            {
                current = new EndToken(moduleName, line, column);
                return(current);
            }
            char c = code[index];

            column = index - lineStart;
            Token           ret  = null;
            UnicodeCategory cat  = validateChar();
            string          cStr = StringInfo.GetNextTextElement(code, index);

            if (isIdentifierStartCharacter(c, cat))
            {
                ret = lexIdentifier();
            }
            if (isOperatorCharacter(c, cat))
            {
                ret = lexOperator();
            }
            if (isNumberStartCharacter(c))
            {
                ret = lexNumber();
            }
            if (c == ' ')
            {
                skipSpaces();
                return(NextToken());
            }
            if (c == '#' && allowShebang && column == 1)
            {
                // Eat the rest of the line, ignoring its
                // contents entirely.
                while (code[index] != '\n' && code[index] != '\u2028')
                {
                    index++;
                }
                line++;
                lineStart = index;
                advanceIndex();
                return(NextToken());
            }
            else if (column == 1)
            {
                allowShebang = false;
            }
            if (c == '"')
            {
                ret = lexString();
            }
            if (c == '(')
            {
                ret = lexLParen();
            }
            if (c == ')')
            {
                ret = lexRParen();
            }
            if (c == '{')
            {
                ret = lexLBrace();
            }
            if (c == '}')
            {
                ret = lexRBrace();
            }
            if (ret == null && UnicodeLookup.OpenBrackets.Contains(cStr))
            {
                ret = lexOpenBracket();
            }
            if (ret == null && UnicodeLookup.CloseBrackets.Contains(cStr))
            {
                ret = lexCloseBracket();
            }
            //if (UnicodeLookup.CloseBrackets.Contains(cStr))
            //    ret = lexCloseBracket();
            if (c == ',')
            {
                ret = lexComma();
            }
            if (c == ';')
            {
                ret = lexSemicolon();
            }
            if (c == '\n' || c == '\u2028' || c == '\r')
            {
                ret       = new NewLineToken(moduleName, line, column);
                lineStart = index;
                line++;
                advanceIndex();
                if (c == '\r' && index < code.Length && code[index] == '\n')
                {
                    advanceIndex();
                    lineStart++;
                }
            }
            if (ret == null)
            {
                reportError("L0000", new Dictionary <string, string>()
                {
                    { "codepoint", formatCodepoint(Char.ConvertToUtf32(code, index)) },
                    { "name", UnicodeLookup.GetCodepointName(Char.ConvertToUtf32(code, index)) }
                },
                            "Character '" + c + "' may not appear here");
                ret = new UnknownToken(moduleName, line, index - 1);
            }
            current = ret;
            return(ret);
        }