/// <summary> /// Lex a line termination character. Transforms CRLF into a single LF. /// Updates the line mapping. When this "drops" a character and sb is not /// null, it adds the character to sb. It does NOT add the returned character /// to the sb. /// </summary> private char LexLineTerm(StringBuilder sb = null) { Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.LineTerm); int ichMin = _cursor.IchCur; if (ChCur == '\xD' && ChPeek(1) == '\xA') { if (sb != null) { sb.Append(ChCur); } ChNext(); } char ch = ChCur; ChNext(); if (_ichMinTok == ichMin) { // Not nested. _queue.Enqueue(new NewLineToken(GetSpan(), false)); } else { // Is nested. _queue.Enqueue(new NewLineToken(GetTextSpan(ichMin, _cursor.IchCur), true)); } _fLineStart = true; return(ch); }
/// <summary> /// Skip over an error character. Always returns null. /// REVIEW: Should we skip over multiple? /// </summary> private Token LexError() { _sb.Length = 0; do { _sb.AppendFormat("{0}({1})", ChCur, LexCharUtils.GetUniEscape(ChCur)); } while (LexCharUtils.StartKind(ChNext()) == LexStartKind.None && !Eof); return(new ErrorToken(GetSpan(), ErrId.BadChar, _sb.ToString())); }
/// <summary> /// Lex a sequence of spacing characters. /// Always returns null. /// </summary> private Token LexSpace() { Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.Space); while (LexCharUtils.IsSpace(ChNext())) { ; } return(null); }
private Token FetchToken() { Contracts.Assert(!Eof); StartTok(); LexStartKind kind = LexCharUtils.StartKind(ChCur); if (kind != LexStartKind.Space && kind != LexStartKind.PreProc) { _fLineStart = false; } switch (kind) { case LexStartKind.Punc: return(LexPunc()); case LexStartKind.NumLit: return(LexNumLit()); case LexStartKind.StrLit: return(LexStrLit()); case LexStartKind.Verbatim: if (ChPeek(1) == '"') { return(LexStrLit()); } if (LexCharUtils.StartKind(ChPeek(1)) == LexStartKind.Ident) { return(LexIdent()); } ChNext(); ReportError(ErrId.VerbatimLiteralExpected); return(null); case LexStartKind.Ident: return(LexIdent()); case LexStartKind.Comment: return(LexComment()); case LexStartKind.Space: return(LexSpace()); case LexStartKind.LineTerm: LexLineTerm(); return(null); case LexStartKind.PreProc: return(LexPreProc()); default: return(LexError()); } }
private NormStr LexIdentCore(ref bool fVerbatim) { Contracts.Assert(LexCharUtils.IsIdentStart(ChCur)); _sb.Length = 0; for (; ;) { char ch; if (ChCur == '\\') { uint u; int ichErr = _cursor.IchCur; if (!FLexEscChar(true, out u)) { break; } if (u > 0xFFFF || !LexCharUtils.IsIdent(ch = (char)u)) { ReportError(ichErr, _cursor.IchCur, ErrId.BadChar, LexCharUtils.GetUniEscape(u)); break; } fVerbatim = true; } else { if (!LexCharUtils.IsIdent(ChCur)) { break; } ch = ChCur; ChNext(); } Contracts.Assert(LexCharUtils.IsIdent(ch)); if (!LexCharUtils.IsFormat(ch)) { _sb.Append(ch); } } if (_sb.Length == 0) { return(null); } return(_lex._pool.Add(_sb)); }
/// <summary> /// Lex a decimal integer literal. The digits must be in _sb. /// </summary> private Token LexDecInt(IntLitKind ilk) { // Digits are in _sb. Contracts.Assert(_sb.Length > 0); ulong u = 0; try { for (int ich = 0; ich < _sb.Length; ich++) { u = checked (u * 10 + (ulong)LexCharUtils.GetDecVal(_sb[ich])); } } catch (System.OverflowException) { ReportError(ErrId.IntOverflow); u = ulong.MaxValue; } return(new IntLitToken(GetSpan(), u, ilk)); }
/// <summary> /// Called to lex a punctuator (operator). Asserts the current character lex type /// is LexCharType.Punc. /// </summary> private Token LexPunc() { int cchPunc = 0; TokKind tidPunc = TokKind.None; _sb.Length = 0; _sb.Append(ChCur); for (; ;) { TokKind tidCur; NormStr nstr = _lex._pool.Add(_sb); if (!_lex._kwt.IsPunctuator(nstr, out tidCur)) { break; } if (tidCur != TokKind.None) { // This is a real punctuator, not just a prefix. tidPunc = tidCur; cchPunc = _sb.Length; } char ch = ChPeek(_sb.Length); if (!LexCharUtils.IsPunc(ch)) { break; } _sb.Append(ch); } if (cchPunc == 0) { return(LexError()); } while (--cchPunc >= 0) { ChNext(); } return(KeyToken.Create(GetSpan(), tidPunc)); }
/// <summary> /// Lex a hex literal optionally followed by an integer suffix. Asserts the current /// character is a hex digit. /// </summary> private Token LexHexInt() { Contracts.Assert(LexCharUtils.IsHexDigit(ChCur)); ulong u = 0; bool fOverflow = false; do { if ((u & 0xF000000000000000) != 0 && !fOverflow) { ReportError(ErrId.IntOverflow); fOverflow = true; } u = (u << 4) + (ulong)LexCharUtils.GetHexVal(ChCur); } while (LexCharUtils.IsHexDigit(ChNext())); if (fOverflow) { u = ulong.MaxValue; } return(new IntLitToken(GetSpan(), u, LexIntSuffix() | IntLitKind.Hex)); }
/// <summary> /// Lex a comment. /// </summary> private Token LexComment() { Contracts.Assert(ChCur == '/'); int ichErr = _cursor.IchCur; switch (ChPeek(1)) { default: return(LexPunc()); case '/': // Single line comment. ChNext(); _sb.Length = 0; _sb.Append("//"); for (; ;) { if (LexCharUtils.IsLineTerm(ChNext()) || Eof) { return(new CommentToken(GetSpan(), _sb.ToString(), 0)); } _sb.Append(ChCur); } case '*': /* block comment */ ChNext(); _sb.Length = 0; _sb.Append("/*"); ChNext(); int lines = 0; for (; ;) { if (Eof) { ReportError(ichErr, _cursor.IchCur, ErrId.UnterminatedComment); break; } char ch = ChCur; if (LexCharUtils.IsLineTerm(ch)) { ch = LexLineTerm(_sb); lines++; } else { ChNext(); } _sb.Append(ch); if (ch == '*' && ChCur == '/') { _sb.Append('/'); ChNext(); break; } } // We support comment keywords. KeyWordTable.KeyWordKind kind; NormStr nstr = _lex._pool.Add(_sb); if (_lex._kwt.IsKeyWord(nstr, out kind)) { return(KeyToken.CreateKeyWord(GetSpan(), nstr.ToString(), kind.Kind, kind.IsContextKeyWord)); } return(new CommentToken(GetSpan(), _sb.ToString(), lines)); } }
/// <summary> /// Lex a character escape. Returns true if successful (ch is valid). /// </summary> private bool FLexEscChar(bool fUniOnly, out uint u) { Contracts.Assert(ChCur == '\\'); int ichErr = _cursor.IchCur; bool fUni; int cchHex; switch (ChNext()) { case 'u': fUni = true; cchHex = 4; goto LHex; case 'U': fUni = true; cchHex = 8; goto LHex; default: if (!fUniOnly) { switch (ChCur) { default: goto LBad; case 'x': case 'X': fUni = false; cchHex = 4; goto LHex; case '\'': u = 0x0027; break; case '"': u = 0x0022; break; case '\\': u = 0x005C; break; case '0': u = 0x0000; break; case 'a': u = 0x0007; break; case 'b': u = 0x0008; break; case 'f': u = 0x000C; break; case 'n': u = 0x000A; break; case 'r': u = 0x000D; break; case 't': u = 0x0009; break; case 'v': u = 0x000B; break; } ChNext(); return(true); } LBad: ReportError(ichErr, _cursor.IchCur, ErrId.BadEscape); u = 0; return(false); } LHex: bool fRet = true; ChNext(); u = 0; for (int ich = 0; ich < cchHex; ich++) { if (!LexCharUtils.IsHexDigit(ChCur)) { fRet = (ich > 0); if (fUni || !fRet) { ReportError(ichErr, _cursor.IchCur, ErrId.BadEscape); } break; } u = (u << 4) + (uint)LexCharUtils.GetHexVal(ChCur); ChNext(); } return(fRet); }
/// <summary> /// Lex a string or character literal. /// </summary> private Token LexStrLit() { char chQuote; _sb.Length = 0; if (ChCur == '@') { chQuote = '"'; ChNext(); Contracts.Assert(ChCur == '"'); ChNext(); for (; ;) { char ch = ChCur; if (ch == '"') { ChNext(); if (ChCur != '"') { break; } ChNext(); } else if (LexCharUtils.IsLineTerm(ch)) { ch = LexLineTerm(_sb); } else if (Eof) { ReportError(ErrId.UnterminatedString); break; } else { ChNext(); } _sb.Append(ch); } } else { Contracts.Assert(ChCur == '"' || ChCur == '\''); chQuote = ChCur; ChNext(); for (; ;) { char ch = ChCur; if (ch == chQuote || Eof || LexCharUtils.IsLineTerm(ch)) { break; } if (ch == '\\') { uint u; if (!FLexEscChar(false, out u)) { continue; } if (u < 0x10000) { ch = (char)u; } else { char chT; if (!ConvertToSurrogatePair(u, out chT, out ch)) { continue; } _sb.Append(chT); } } else { ChNext(); } _sb.Append(ch); } if (ChCur != chQuote) { ReportError(ErrId.NewlineInConst); } else { ChNext(); } } if (chQuote == '"') { return(new StrLitToken(GetSpan(), _sb.ToString())); } if (_sb.Length != 1) { ReportError(_sb.Length == 0 ? ErrId.CharConstEmpty : ErrId.CharConstTooLong); } return(new CharLitToken(GetSpan(), _sb.Length > 0 ? _sb[0] : '\0')); }
/// <summary> /// Called to lex a numeric literal or a Dot token. Asserts the current /// character lex type is LexCharType.NumLit. /// </summary> private Token LexNumLit() { Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.NumLit); Contracts.Assert(LexCharUtils.IsDigit(ChCur) || ChCur == '.'); // A dot not followed by a digit is just a Dot. This is a very common case (hence first). if (ChCur == '.' && !LexCharUtils.IsDigit(ChPeek(1))) { return(LexPunc()); } // Check for a hex literal. Note that 0x followed by a non-hex-digit is really a 0 followed // by an identifier. if (ChCur == '0' && (ChPeek(1) == 'x' || ChPeek(1) == 'X') && LexCharUtils.IsHexDigit(ChPeek(2))) { // Advance to first hex digit. ChNext(); ChNext(); return(LexHexInt()); } // Decimal literal (possible floating point). Contracts.Assert(LexCharUtils.IsDigit(ChCur) || ChCur == '.' && LexCharUtils.IsDigit(ChPeek(1))); bool fExp = false; bool fDot = ChCur == '.'; _sb.Length = 0; _sb.Append(ChCur); for (; ;) { if (ChNext() == '.') { if (fDot || !LexCharUtils.IsDigit(ChPeek(1))) { break; } fDot = true; } else if (!LexCharUtils.IsDigit(ChCur)) { break; } _sb.Append(ChCur); } // Check for an exponent. if (ChCur == 'e' || ChCur == 'E') { char chTmp = ChPeek(1); if (LexCharUtils.IsDigit(chTmp) || (chTmp == '+' || chTmp == '-') && LexCharUtils.IsDigit(ChPeek(2))) { fExp = true; _sb.Append(ChCur); _sb.Append(ChNext()); while (LexCharUtils.IsDigit(chTmp = ChNext())) { _sb.Append(chTmp); } } } bool fReal = fDot || fExp; char chSuf = LexRealSuffix(fReal); if (fReal || chSuf != '\0') { return(LexRealNum(chSuf)); } // Integer type. return(LexDecInt(LexIntSuffix())); }