예제 #1
0
파일: Tokenizer.cs 프로젝트: cakoose/cks
        public Token Lex()
        {
            LastWhitespaceHadEol = false;

            char Next;

            // Skip over whitespace.
            while (true) {
            int NextI = In.Read();
            if (NextI < 0) return new Token(new SourcePos(Line, Column), Codes.Eof);
            Next = (char) NextI;

            // Regular whitespace
            if (Next == ' ' || Next == '\t') {
                Column++;
            }
            else if (EatNewline(Next)) {
            }
            // Comments.
            else if (Next == '/') {
                Column++;
                Next = ReadChar("comment");
                if (Next == '*') {
                    Column++;
                    // Block comment.
                    while (true) {
                        Next = ReadChar("comment");
                        if (Next == '*' && In.Peek() == '/') {
                            In.Read();
                            Column += 2;
                            break;
                        }
                        else if (EatNewline(Next)) {
                        }
                        else if (Next >= 32 && Next <= 126 || Next == '\t') {
                            Column++;
                        }
                        else {
                            throw pex(Line, Column, "unexpected character: " + StringUtil.ReadableChar(Next));
                        }
                    }
                }
                else if (Next == '/') {
                    Column++;
                    // Line comment.
                    while (true) {
                        NextI = In.Peek();
                        if (NextI >= 32 && NextI <= 126 || NextI == '\t') {
                            In.Read();
                            Column++;
                        } else {
                            break;
                        }
                    }
                }
                else {
                    throw pex(Line, Column-1, "unexpected \"/\"");
                }
            }
            else {
                // Not a whitespace char.  Break out of whitespace-eating loop.
                break;
            }
            }

            if (IsIdentStart(Next)) {
            uint StartColumn = Column;
            Builder.Length = 0;
            while (true) {
                Column++;
                Builder.Append(Next);
                int NextI = In.Peek();
                if (NextI < 0) break;
                Next = (char) NextI;
                if (!IsIdentPart(Next)) break;
                In.Read();
            }
            return new Token.Ident(new SourcePos(Line, StartColumn), Builder.ToString());
            }
            else if (IsDigit(Next)) {
            uint StartColumn = Column;
            BigInteger Value = BigInteger.Zero;
            while (true) {
                Column++;

                Value *= 10;
                Value += (Next - '0');

                int NextI = In.Peek();
                if (NextI < 0) break;
                Next = (char) NextI;
                if (!IsDigit(Next)) break;
                In.Read();
            }
            return new Token.LitInt(new SourcePos(Line, StartColumn), Value);
            }
            else if (Next == '-') {
            uint StartColumn = Column;
            Column++;

            char After = ReadChar("token");

            if (IsDigit(After)) {
                BigInteger Value = BigInteger.Zero;

                Next = After;

                while (true) {
                    Value *= 10;
                    Value += (Next - '0');
                    Column++;

                    int NextI = In.Peek();
                    if (NextI < 0) break;
                    Next = (char) NextI;
                    if (!IsDigit(Next)) break;
                    In.Read();
                }

                Value = Value * -1;
                return new Token.LitInt(new SourcePos(Line, StartColumn), Value);
            }
            else if (After == '>') {
                Column++;
                return new Token(new SourcePos(Line, StartColumn), Codes.RightArrow);
            }
            else {
                throw pex(Line, StartColumn, "expecting either a negative integer or \"->\"");
            }
            }
            else {
            switch (Next) {
                case '+': case '{': case '}': case ',':
                case '<': case '>': case '[': case ']': case ':': case '=': {
                    Token t = new Token(new SourcePos(Line, Column), Next);
                    Column++;
                    return t;
                }

                case '"': {
                    uint StartColumn = Column;
                    Builder.Length = 0;
                    Column++;
                    while (true) {
                        Next = ReadChar("a string literal");
                        if (Next == '"') {
                            Column++;
                            break;
                        }
                        else if (Next == '\\') {
                            uint EscapeStart = Column;
                            Column++;
                            Next = ReadChar("a string literal");
                            switch (Next) {
                                case '"': case '\\': case '\'': Builder.Append(Next); break;
                                case 'n': Builder.Append('\n'); break;
                                case 't': Builder.Append('\t'); break;
                                case 'r': Builder.Append('\r'); break;
                                case '0': Builder.Append('\0'); break;
                                case 'x': {
                                    int Value = 0;
                                    for (uint j = 0; j < 2; j++) {
                                        Column++;
                                        Next = ReadChar("a string literal");
                                        int d = HexValue(Next);
                                        if (d < 0) throw pex(Line, EscapeStart+1+j, "in \"\\xHH\" escape sequence: expecting hex digit, found " + StringUtil.ReadableChar(Next));
                                        Value <<= 4;
                                        Value |= d;
                                    }
                                    Builder.Append((char) Value);
                                    break;
                                }
                                case '+': {
                                    // "u+"
                                    int Value = 0;
                                    for (uint j = 0; j < 6; j++) {
                                        Column++;
                                        Next = ReadChar("a string literal");
                                        int d = HexValue(Next);
                                        if (d < 0) throw pex(Line, EscapeStart+1+j, "in \"\\u+HHHHHH\" escape sequence: expecting hex digit, found " + StringUtil.ReadableChar(Next));
                                        Value <<= 4;
                                        Value |= d;
                                    }
                                    if (Value <= 0xFFFF) {
                                        if (Value >= 0xD800 && Value <= 0xDFFF) {
                                            throw pex(Line, EscapeStart, "escape sequence refers to a UTF-16 surrogate, which isn't a valid code point");
                                        }
                                        Builder.Append((char) Value);
                                    }
                                    else if (Value <= 0x10FFFF) {
                                        // Encode as surrogate pair.
                                        Value -= 0x10000;
                                        char HighSurrogate = (char) (0xD800 + (Value >> 10));
                                        char LowSurrogate = (char) (0xDC00 + (Value & 0x3FF));
                                        Builder.Append(HighSurrogate);
                                        Builder.Append(LowSurrogate);
                                    }
                                    else {
                                        throw pex(Line, EscapeStart, "escape sequence refers to a code point that is past the Unicode range");
                                    }
                                    break;
                                }
                                case 'u': {
                                    // "u"
                                    int Value = 0;
                                    for (uint j = 0; j < 4; j++) {
                                        Column++;
                                        Next = ReadChar("a string literal");
                                        int d = HexValue(Next);
                                        if (d < 0) throw pex(Line, EscapeStart+1+j, "in \"uHHHH\" escape sequence: expecting hex digit, found " + StringUtil.ReadableChar(Next));
                                        Value <<= 4;
                                        Value |= d;
                                    }
                                    if (Value >= 0xD800 && Value <= 0xDFFF) {
                                        throw pex(Line, EscapeStart, "escape sequence refers to a UTF-16 surrogate, which isn't a valid code point");
                                    }
                                    Builder.Append((char) Value);
                                    break;
                                }

                                default:
                                    throw pex(Line, EscapeStart, "invalid escape sequence character: " + StringUtil.ReadableChar(Next));
                            }
                            Column++;
                        }
                        else if (Next >= 32 && Next <= 126) {
                            Column++;
                            Builder.Append(Next);
                        }
                        else {
                            throw pex(Line, Column, "invalid character in string literal: " + StringUtil.ReadableChar(Next));
                        }
                    }
                    return new Token.LitString(new SourcePos(Line, StartColumn), Builder.ToString());
                }

                default:
                    throw pex(Line, Column, "unexpected character: " + StringUtil.ReadableChar(Next));
            }
            }
        }
예제 #2
0
파일: TextParser.cs 프로젝트: cakoose/cks
 private void Next()
 {
     Peek = Tokenizer.Lex();
 }
예제 #3
0
파일: TextParser.cs 프로젝트: cakoose/cks
 public TextParser(Tokenizer Tokenizer)
 {
     this.Tokenizer = Tokenizer;
     this.Peek = Tokenizer.Lex();
 }
예제 #4
0
파일: TextParser.cs 프로젝트: cakoose/cks
 private static ProblemException pex(Token t, string Message, SourcePos t2, string m2)
 {
     return new ProblemException(new Problem(t.SourcePos, Message, t2, m2));
 }
예제 #5
0
파일: TextParser.cs 프로젝트: cakoose/cks
 private Token Take()
 {
     Token t = Peek;
     Peek = Tokenizer.Lex();
     return t;
 }