private SyntaxToken QuickScanSyntaxToken() { this.Start(); var state = QuickScanState.Initial; int i = TextWindow.Offset; int n = TextWindow.CharacterWindowCount; n = Math.Min(n, i + MaxCachedTokenSize); int hashCode = Hash.FnvOffsetBias; //localize frequently accessed fields var charWindow = TextWindow.CharacterWindow; var charPropLength = s_charProperties.Length; for (; i < n; i++) { char c = charWindow[i]; int uc = unchecked ((int)c); var flags = uc < charPropLength ? (CharFlags)s_charProperties[uc] : CharFlags.Complex; state = (QuickScanState)s_stateTransitions[(int)state, (int)flags]; // NOTE: that Bad > Done and it is the only state like that // as a result, we will exit the loop on either Bad or Done. // the assert below will validate that these are the only states on which we exit // Also note that we must exit on Done or Bad // since the state machine does not have transitions for these states // and will promptly fail if we do not exit. if (state >= QuickScanState.Done) { goto exitWhile; } hashCode = unchecked ((hashCode ^ uc) * Hash.FnvPrime); } state = QuickScanState.Bad; // ran out of characters in window exitWhile: TextWindow.AdvanceChar(i - TextWindow.Offset); Debug.Assert(state == QuickScanState.Bad || state == QuickScanState.Done, "can only exit with Bad or Done"); if (state == QuickScanState.Done) { // this is a good token! var token = _cache.LookupToken( TextWindow.CharacterWindow, TextWindow.LexemeRelativeStart, i - TextWindow.LexemeRelativeStart, hashCode, _createQuickTokenFunction); return(token); } else { TextWindow.Reset(TextWindow.LexemeStartPosition); return(null); } }
private void ScanISLNestedVerbatimString(ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == '@'); TextWindow.AdvanceChar(); Debug.Assert(TextWindow.PeekChar() == '\"'); TextWindow.AdvanceChar(); // move past quote while (true) { if (IsAtEnd()) { // we'll get an error in the enclosing construct return; } char ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); if (ch == '\"') { if (TextWindow.PeekChar(1) == '\"') { TextWindow.AdvanceChar(); // move past escaped quote } else { return; } } } }
private void ScanSingleLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) { info.Kind = SyntaxKind.SingleLineRawStringLiteralToken; while (true) { var currentChar = TextWindow.PeekChar(); // See if we reached the end of the line or file before hitting the end. if (SyntaxFacts.IsNewLine(currentChar)) { this.AddError(TextWindow.Position, width: TextWindow.GetNewLineWidth(), ErrorCode.ERR_UnterminatedRawString); return; } else if (IsAtEndOfText(currentChar)) { this.AddError(TextWindow.Position, width: 0, ErrorCode.ERR_UnterminatedRawString); return; } if (currentChar != '"') { // anything not a quote sequence just moves it forward. TextWindow.AdvanceChar(); continue; } var beforeEndDelimiter = TextWindow.Position; var currentQuoteCount = ConsumeQuoteSequence(); // A raw string literal starting with some number of quotes can contain a quote sequence with fewer quotes. if (currentQuoteCount < startingQuoteCount) { continue; } // A raw string could never be followed by another string. So once we've consumed all the closing quotes // if we have any more closing quotes then that's an error we can give a message for. if (currentQuoteCount > startingQuoteCount) { var excessQuoteCount = currentQuoteCount - startingQuoteCount; this.AddError( position: TextWindow.Position - excessQuoteCount, width: excessQuoteCount, ErrorCode.ERR_TooManyQuotesForRawString); } // We have enough quotes to finish this string at this point. var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; var valueLength = beforeEndDelimiter - afterStartDelimiter; info.StringValue = TextWindow.GetText( position: afterStartDelimiter, length: valueLength, intern: true); return; } }
private void AddMultiLineRawStringLiteralLineContents( StringBuilder indentationWhitespace, StringBuilder currentLineWhitespace, bool firstContentLine) { Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); var newLineWidth = TextWindow.GetNewLineWidth(); for (var i = 0; i < newLineWidth; i++) { // the initial newline in `""" \r\n` is not added to the contents. if (!firstContentLine) { _builder.Append(TextWindow.PeekChar()); } TextWindow.AdvanceChar(); } var lineStartPosition = TextWindow.Position; currentLineWhitespace.Clear(); ConsumeWhitespace(currentLineWhitespace); if (!StartsWith(currentLineWhitespace, indentationWhitespace)) { // We have a line where the indentation of that line isn't a prefix of indentation // whitespace. // // If we're not on a blank line then this is bad. That's a content line that doesn't start // with the indentation whitespace. If we are on a blank line then it's ok if the whitespace // we do have is a prefix of the indentation whitespace. var isBlankLine = SyntaxFacts.IsNewLine(TextWindow.PeekChar()); var isLegalBlankLine = isBlankLine && StartsWith(indentationWhitespace, currentLineWhitespace); if (!isLegalBlankLine) { // Specialized error message if this is a spacing difference. if (CheckForSpaceDifference( currentLineWhitespace, indentationWhitespace, out var currentLineWhitespaceChar, out var indentationWhitespaceChar)) { this.AddError( lineStartPosition, width: TextWindow.Position - lineStartPosition, ErrorCode.ERR_LineContainsDifferentWhitespace, currentLineWhitespaceChar, indentationWhitespaceChar); } else { this.AddError( lineStartPosition, width: TextWindow.Position - lineStartPosition, ErrorCode.ERR_LineDoesNotStartWithSameWhitespace); } return; } }
private void ScanISLContents(ArrayBuilder <Interpolation> interpolations, ref SyntaxDiagnosticInfo error) { while (true) { if (IsAtEnd()) { // error: end of line before end of string return; } switch (TextWindow.PeekChar()) { case '\"': // found the end of the string return; case '\\': TextWindow.AdvanceChar(); if (IsAtEnd()) { // the caller will complain about unclosed quote return; } else if (TextWindow.PeekChar() == '{') { int interpolationStart = TextWindow.Position; TextWindow.AdvanceChar(); ScanISLHoleBalancedText('}', true, ref error); int end = TextWindow.Position; if (TextWindow.PeekChar() == '}') { TextWindow.AdvanceChar(); } else { if (error == null) { error = MakeError(interpolationStart - 1, 2, ErrorCode.ERR_UnclosedExpressionHole); } } if (interpolations != null) { interpolations.Add(new Interpolation(interpolationStart, end)); } } else { TextWindow.AdvanceChar(); // skip past a single escaped character } continue; default: // found some other character in the string portion TextWindow.AdvanceChar(); continue; } } }
/// <returns>The number of quotes that were consumed</returns> private int ConsumeCharSequence(char ch) { var start = TextWindow.Position; while (TextWindow.PeekChar() == ch) { TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
private SyntaxToken QuickScanSyntaxToken() { this.Start(); var state = QuickScanState.Initial; int i = TextWindow.Offset; int n = TextWindow.CharacterWindowCount; n = Math.Min(n, i + MaxCachedTokenSize); int hashCode = Hash.FnvOffsetBias; //localize frequently accessed fields var charWindow = TextWindow.CharacterWindow; var charPropLength = charProperties.Length; for (; i < n; i++) { char c = charWindow[i]; int uc = unchecked ((int)c); var flags = uc < charPropLength ? (CharFlags)charProperties[uc] : CharFlags.Complex; state = (QuickScanState)stateTransitions[(int)state, (int)flags]; if (state == QuickScanState.Done || state == QuickScanState.Bad) { goto exitWhile; } hashCode = unchecked ((hashCode ^ uc) * Hash.FnvPrime); } state = QuickScanState.Bad; // ran out of characters in window exitWhile: TextWindow.AdvanceChar(i - TextWindow.Offset); Debug.Assert(state == QuickScanState.Bad || state == QuickScanState.Done); if (state == QuickScanState.Done) { // this is a good token! var token = this._cache.LookupToken( TextWindow.CharacterWindow, TextWindow.LexemeRelativeStart, i - TextWindow.LexemeRelativeStart, hashCode, createQuickTokenFunction); return(token); } else { TextWindow.Reset(TextWindow.LexemeStartPosition); return(null); } }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } // If we hit a new line when it's not allowed. Give an error at that new line, but keep on consuming // the verbatim literal to the end to avoid the contents of the string being lexed as C# (which will // cause a ton of cascaded errors). Only need to do this on the first newline we hit. if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { error ??= ErrorCode.ERR_NewlinesAreNotAllowedInsideANonVerbatimInterpolatedString; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
private char ScanEscapeSequence() { var start = TextWindow.Position; TextWindow.AdvanceChar(); var ch = TextWindow.PeekChar(); switch (ch) { case '\'': case '\"': case '\\': break; case 'a': ch = '\u0007'; break; case 'b': ch = '\u0008'; break; case 'f': ch = '\u000c'; break; case 'n': ch = '\u000a'; break; case 'r': ch = '\u000d'; break; case 't': ch = '\u0009'; break; case 'v': ch = '\u000b'; break; case '0': ch = '\u0000'; break; default: throw new Exception(); } return(ch); }
private void ConsumeWhitespace(StringBuilder?builder) { while (true) { var ch = TextWindow.PeekChar(); if (!SyntaxFacts.IsWhitespace(ch)) { break; } builder?.Append(ch); TextWindow.AdvanceChar(); } }
void ScanISLHoleBracketed(char start, char end, ref SyntaxDiagnosticInfo error) { Debug.Assert(start == TextWindow.PeekChar()); TextWindow.AdvanceChar(); ScanISLHoleBalancedText(end, false, ref error); if (TextWindow.PeekChar() == end) { TextWindow.AdvanceChar(); } else { // an error was given by the caller } }
private int ConsumeHexDigits() { var start = TextWindow.Position; char digit; while (CharUtils.IsHexadecimal(digit = TextWindow.PeekChar()) || digit == '_') { if (digit != '_') { _builder.Append(digit); } TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
private int ConsumeDecimalDigits(StringBuilder?builder) { var start = TextWindow.Position; char digit; while (CharUtils.IsDecimal(digit = TextWindow.PeekChar()) || digit == '_') { if (digit != '_') { builder?.Append(digit); } TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
private void ScanNumericLiteral(TokenInfo info) { builder.Clear(); while (true) { var ch = TextWindow.PeekChar(); if (!(ch >= '0' && ch <= '9')) { break; } TextWindow.AdvanceChar(); builder.Append(ch); } info.IntValue = Int32.Parse(builder.ToString()); info.Kind = SyntaxKind.NumericLiteralToken; }
private void ScanISLNestedString(char quote, ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == quote); TextWindow.AdvanceChar(); // move past quote while (true) { if (IsAtEnd()) { // we'll get an error in the enclosing construct return; } char ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); switch (ch) { case '\"': case '\'': if (ch == quote) { return; } break; case '\\': ch = TextWindow.PeekChar(); if (IsAtEnd()) { return; } else if (ch == '{' && quote == '"') { TextWindow.AdvanceChar(); // move past { ScanISLHoleBalancedText('}', true, ref error); if (TextWindow.PeekChar() == '}') { TextWindow.AdvanceChar(); } } else { TextWindow.AdvanceChar(); // move past one escaped character } break; } } }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
internal void ScanISLTop(ArrayBuilder <Interpolation> interpolations, ref TokenInfo info, ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == '\"'); TextWindow.AdvanceChar(); // " ScanISLContents(interpolations, ref error); if (IsAtEnd() || TextWindow.PeekChar() != '\"') { if (error == null) { error = MakeError(TextWindow.Position, 1, ErrorCode.ERR_NewlineInConst); } } else { // found the closing quote TextWindow.AdvanceChar(); // " } info.Kind = SyntaxKind.InterpolatedStringToken; }
private void ScanISLNestedComment() { Debug.Assert(TextWindow.PeekChar() == '/'); TextWindow.AdvanceChar(); Debug.Assert(TextWindow.PeekChar() == '*'); TextWindow.AdvanceChar(); while (true) { if (IsAtEnd()) { return; // let the caller complain about the unterminated quote } var ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); if (ch == '*' && TextWindow.PeekChar() == '/') { TextWindow.AdvanceChar(); // skip */ return; } } }
private void ScanStringLiteral(TokenInfo info) { builder.Clear(); var quoteCharacter = TextWindow.PeekChar(); TextWindow.AdvanceChar(); while (true) { var ch = TextWindow.PeekChar(); if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (ch == '\\') { ch = ScanEscapeSequence(); builder.Append(ch); } else { TextWindow.AdvanceChar(); builder.Append(ch); } } if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; info.CharValue = builder[0]; } else { info.Kind = SyntaxKind.StringLiteralToken; info.StringValue = builder.ToString(); } }
private void ParseBinaryNumber(ref TokenInfo info) { var num = 0UL; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsBinary(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // Next shift will overflow if 63rd bit is set if ((num & 0x8000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 1) | (ulong)CharUtils.DecimalValue(digit); digits++; } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { isUnsignedLong = true; } else if (TextWindow.AdvanceIfMatches("ll", true)) { isSignedLong = true; } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } if (!_options.SyntaxOptions.AcceptBinaryNumbers) { AddError(ErrorCode.ERR_BinaryNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown || (num > long.MaxValue && !isUnsignedLong)) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); if (isUnsignedLong) { info.ValueKind = ValueKind.ULong; info.ULongValue = num; } else if (isSignedLong) { info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); } else if (isComplex) { info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, num); } else { switch (_options.SyntaxOptions.BinaryIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.BinaryIntegerFormat); } } }
#pragma warning restore IDE0079 // Remove unnecessary suppression private void ParseHexadecimalNumber(ref TokenInfo info) { _builder.Clear(); ConsumeHexDigits(); var isHexFloat = false; if (TextWindow.PeekChar() == '.') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('.'); ConsumeHexDigits(); } if (CharUtils.AsciiLowerCase(TextWindow.PeekChar()) == 'p') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('p'); if (TextWindow.PeekChar() is '+' or '-') { _builder.Append(TextWindow.NextChar()); } ConsumeDecimalDigits(_builder); } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isUnsignedLong = true; } } else if (TextWindow.AdvanceIfMatches("ll", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isSignedLong = true; } } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } info.Text = TextWindow.GetText(intern: true); if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && info.Text.IndexOf('_') >= 0) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (isUnsignedLong) { if (!ulong.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.ULong; info.ULongValue = result; } else if (isSignedLong) { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.Long; info.LongValue = result; } else if (isComplex) { var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, result); } else if (isHexFloat || _options.SyntaxOptions.HexIntegerFormat == IntegerFormats.NotSupported) { if (!_options.SyntaxOptions.AcceptHexFloatLiterals) { AddError(ErrorCode.ERR_HexFloatLiteralNotSupportedInVersion); } var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Double; info.DoubleValue = result; } else { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } switch (_options.SyntaxOptions.HexIntegerFormat) { case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = result; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = result; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.HexIntegerFormat); } } }
private void ParseOctalNumber(ref TokenInfo info) { var num = 0L; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsOctal(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // If any of these bits are set, we'll overflow if ((num & 0x7000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 3) | CharUtils.DecimalValue(digit); digits++; } if (!_options.SyntaxOptions.AcceptOctalNumbers) { AddError(ErrorCode.ERR_OctalNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); switch (_options.SyntaxOptions.OctalIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = num; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.OctalIntegerFormat); } }
private void ScanSyntaxToken(TokenInfo info) { var character = TextWindow.PeekChar(); switch (character) { case '"': case '\'': this.ScanStringLiteral(info); break; #region Punctuation case '&': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.AmpersandToken; break; case '*': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.AsteriskToken; break; case '\\': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.BackslashToken; break; case '|': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.BarToken; break; case '^': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.CaretToken; break; case '}': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.CloseBraceToken; break; case ']': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.CloseBracketToken; break; case ':': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.ColonToken; break; case ',': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.CommaToken; break; case '$': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.DollarToken; break; case '.': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.DotToken; break; case '=': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.EqualsToken; break; case '!': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.ExclamationToken; break; case '>': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.GreaterThanToken; break; case '#': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.HashToken; break; case '<': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.LessThanToken; break; case '-': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.MinusToken; break; case '{': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.OpenBraceToken; break; case '[': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.OpenBracketToken; break; case '(': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.OpenParenToken; break; case '%': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.PercentToken; break; case '+': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.PlusToken; break; case '?': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.QuestionToken; break; case ';': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.SemicolonToken; break; case '/': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.SlashToken; break; case '~': TextWindow.AdvanceChar(); info.Kind = SyntaxKind.TildeToken; break; #endregion #region Identifier and Keyword case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': ScanIdentifierOrKeyword(info); break; #endregion #region Numeric case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ScanNumericLiteral(info); break; #endregion } }
private bool ScanMultiLineRawStringLiteralLine( int startingQuoteCount, StringBuilder indentationWhitespace) { TextWindow.AdvancePastNewLine(); indentationWhitespace.Clear(); ConsumeWhitespace(indentationWhitespace); // after the whitespace see if this the line that ends the multiline literal. var currentQuoteCount = ConsumeQuoteSequence(); if (currentQuoteCount >= startingQuoteCount) { // A raw string could never be followed by another string. So once we've consumed all the closing quotes // if we have any more closing quotes then that's an error we can give a message for. if (currentQuoteCount > startingQuoteCount) { var excessQuoteCount = currentQuoteCount - startingQuoteCount; this.AddError( position: TextWindow.Position - excessQuoteCount, width: excessQuoteCount, ErrorCode.ERR_TooManyQuotesForRawString); } // Done scanning lines. return(false); } // We're not on the terminating line. Consume a normal content line. Eat to the end of line (or file in the // case of errors). while (true) { var currentChar = TextWindow.PeekChar(); if (IsAtEndOfText(currentChar)) { this.AddError(TextWindow.Position, width: 0, ErrorCode.ERR_UnterminatedRawString); return(false); } if (SyntaxFacts.IsNewLine(currentChar)) { return(true); } if (currentChar == '"') { // Don't allow a content line to contain a quote sequence that looks like a delimiter (or longer) currentQuoteCount = ConsumeQuoteSequence(); if (currentQuoteCount >= startingQuoteCount) { this.AddError( position: TextWindow.Position - currentQuoteCount, width: currentQuoteCount, ErrorCode.ERR_RawStringDelimiterOnOwnLine); return(false); } } else { TextWindow.AdvanceChar(); } } }
private bool ScanIdentifier_SlowPath(ref TokenInfo info) { int start = TextWindow.Position; this.ResetIdentBuffer(); info.IsVerbatim = TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'; var isAnnoStart = !info.IsVerbatim && TextWindow.PeekChar() == '@'; if (info.IsVerbatim || isAnnoStart) { TextWindow.AdvanceChar(); } while (true) { char surrogateCharacter = SlidingTextWindow.InvalidCharacter; bool isEscaped = false; char ch = TextWindow.PeekChar(); top: switch (ch) { case '\\': if (!isEscaped && TextWindow.IsUnicodeEscape()) { // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) info.HasIdentifierEscapeSequence = true; isEscaped = true; ch = TextWindow.PeekUnicodeEscape(out surrogateCharacter); goto top; } goto default; case '$': goto LoopExit; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } goto LoopExit; case '_': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { // Again, these are the 'common' identifier characters... break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (this._identLen == 0) { goto LoopExit; } // Again, these are the 'common' identifier characters... break; } case ' ': case '\t': case '.': case ';': case '(': case ')': case ',': // ...and these are the 'common' stop characters. goto LoopExit; case '<': if (this._identLen == 0 && this.ModeIs(LexerMode.DebuggerSyntax) && TextWindow.PeekChar(1) == '>') { // In DebuggerSyntax mode, identifiers are allowed to begin with <>. TextWindow.AdvanceChar(2); this.AddIdentChar('<'); this.AddIdentChar('>'); continue; } goto LoopExit; default: { // This is the 'expensive' call if (this._identLen == 0 && ch > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(ch)) { break; } else if (this._identLen > 0 && ch > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(ch)) { //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs if (SyntaxKindFacts.IsFormattingChar(ch)) { if (isEscaped) { SyntaxDiagnosticInfo error; TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else { TextWindow.AdvanceChar(); } continue; // Ignore formatting characters } break; } else { // Not a valid identifier character, so bail. goto LoopExit; } } } if (isEscaped) { SyntaxDiagnosticInfo error; TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else { TextWindow.AdvanceChar(); } this.AddIdentChar(ch); if (surrogateCharacter != SlidingTextWindow.InvalidCharacter) { this.AddIdentChar(surrogateCharacter); } } LoopExit: var width = TextWindow.Width; // exact size of input characters if (this._identLen > 0) { info.Text = TextWindow.GetInternedText(); // id buffer is identical to width in input if (this._identLen == width) { info.StringValue = info.Text; } else { info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen); } return(true); } else { info.Text = null; info.StringValue = null; TextWindow.Reset(start); return(false); } }
private void ScanStringLiteral(ref TokenInfo info, bool allowEscapes = true) { var quoteCharacter = TextWindow.PeekChar(); if (quoteCharacter == '\'' || quoteCharacter == '"') { TextWindow.AdvanceChar(); _builder.Length = 0; while (true) { char ch = TextWindow.PeekChar(); if (ch == '\\' && allowEscapes) { // normal string & char constants can have escapes char c2; ch = this.ScanEscapeSequence(out c2); _builder.Append(ch); if (c2 != SlidingTextWindow.InvalidCharacter) { _builder.Append(c2); } } else if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (SyntaxFacts.IsNewLine(ch) || (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd())) { //String and character literals can contain any Unicode character. They are not limited //to valid UTF-16 characters. So if we get the SlidingTextWindow's sentinel value, //double check that it was not real user-code contents. This will be rare. Debug.Assert(TextWindow.Width > 0); this.AddError(ErrorCode.ERR_NewlineInConst); break; } else { TextWindow.AdvanceChar(); _builder.Append(ch); } } info.Text = TextWindow.GetText(true); if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; if (_builder.Length != 1) { this.AddError((_builder.Length != 0) ? ErrorCode.ERR_TooManyCharsInConst : ErrorCode.ERR_EmptyCharConst); } if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); info.CharValue = info.StringValue[0]; } else { info.StringValue = string.Empty; info.CharValue = SlidingTextWindow.InvalidCharacter; } } else { info.Kind = SyntaxKind.StringLiteralToken; if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); } else { info.StringValue = string.Empty; } } } else { info.Kind = SyntaxKind.None; info.Text = null; } }
private void ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines = true) { _builder.Length = 0; if (TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"') { TextWindow.AdvanceChar(2); bool done = false; char ch; _builder.Length = 0; while (!done) { switch (ch = TextWindow.PeekChar()) { case '"': TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string TextWindow.AdvanceChar(); _builder.Append(ch); } else { done = true; } break; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } // Reached the end of the source without finding the end-quote. Give // an error back at the starting point. this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; default: if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; } TextWindow.AdvanceChar(); _builder.Append(ch); break; } } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(false); info.StringValue = _builder.ToString(); } else { info.Kind = SyntaxKind.None; info.Text = null; info.StringValue = null; } }
private string ParseShortString() { _builder.Clear(); var delim = TextWindow.NextChar(); LorettaDebug.Assert(delim is '"' or '\'' or '`'); char ch; while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != delim) { var charStart = TextWindow.Position; switch (ch) { #region Escapes case '\\': { var escapeStart = TextWindow.Position; TextWindow.AdvanceChar(); switch (ch = TextWindow.PeekChar()) { case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } break; } case 'a': TextWindow.AdvanceChar(); _builder.Append('\a'); break; case 'b': TextWindow.AdvanceChar(); _builder.Append('\b'); break; case 'f': TextWindow.AdvanceChar(); _builder.Append('\f'); break; case 'n': TextWindow.AdvanceChar(); _builder.Append('\n'); break; case 'r': TextWindow.AdvanceChar(); _builder.Append('\r'); break; case 't': TextWindow.AdvanceChar(); _builder.Append('\t'); break; case 'v': TextWindow.AdvanceChar(); _builder.Append('\v'); break; case '\\': TextWindow.AdvanceChar(); _builder.Append('\\'); break; case '\'': TextWindow.AdvanceChar(); _builder.Append('\''); break; case '"': TextWindow.AdvanceChar(); _builder.Append('"'); break; case 'z': TextWindow.AdvanceChar(); while (CharUtils.IsWhitespace(TextWindow.PeekChar())) { TextWindow.AdvanceChar(); } if (!_options.SyntaxOptions.AcceptWhitespaceEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_WhitespaceEscapeNotSupportedInVersion); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { var parsedCharInteger = parseDecimalInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } break; } case 'x': { TextWindow.AdvanceChar(); var parsedCharInteger = parseHexadecimalEscapeInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } if (!_options.SyntaxOptions.AcceptHexEscapesInStrings) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_HexStringEscapesNotSupportedInVersion); } } break; case 'u': { TextWindow.AdvanceChar(); var parsed = parseUnicodeEscape(escapeStart); _builder.Append(parsed); if (!_options.SyntaxOptions.AcceptUnicodeEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_UnicodeEscapesNotSupportedLuaInVersion); } } break; default: if (!_options.SyntaxOptions.AcceptInvalidEscapes) { // Skip the character after the escape. TextWindow.AdvanceChar(); AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_InvalidStringEscape); } break; } } break; #endregion Escapes case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } AddError(charStart, TextWindow.Position - charStart, ErrorCode.ERR_UnescapedLineBreakInString); } break; default: _builder.Append(TextWindow.NextChar()); break; } } if (TextWindow.PeekChar() == delim) { TextWindow.AdvanceChar(); } else { AddError(ErrorCode.ERR_UnfinishedString); } return(TextWindow.Intern(_builder)); char parseDecimalInteger(int start) { var readChars = 0; var num = 0; char ch; while (readChars < 3 && CharUtils.IsDecimal(ch = TextWindow.PeekChar())) { TextWindow.AdvanceChar(); num = (num * 10) + (ch - '0'); readChars++; } if (readChars < 1 || num > 255) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_InvalidStringEscape); return(char.MaxValue); } return((char)num); } ulong parseHexadecimalNumber(int start, int maxDigits, ErrorCode lessThanZeroErrorCode) { var readChars = 0; var num = 0L; while (readChars < maxDigits) { var peek = TextWindow.PeekChar(); if (CharUtils.IsDecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(peek - '0'); } else if (CharUtils.IsHexadecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(10 + CharUtils.AsciiLowerCase(peek) - 'a'); } else { break; } readChars++; } if (readChars < 1) { AddError(start, TextWindow.Position - start, lessThanZeroErrorCode); return(0UL); } return((ulong)num); } char parseHexadecimalEscapeInteger(int start) => (char)parseHexadecimalNumber(start, 2, ErrorCode.ERR_InvalidStringEscape); string parseUnicodeEscape(int start) { var missingOpeningBrace = TextWindow.PeekChar() is not '{'; if (!missingOpeningBrace) { TextWindow.AdvanceChar(); } var codepoint = parseHexadecimalNumber(start, 16, ErrorCode.ERR_HexDigitExpected); var missingClosingBrace = TextWindow.PeekChar() is not '}'; if (!missingClosingBrace) { TextWindow.AdvanceChar(); } if (missingOpeningBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingOpenBrace); } if (missingClosingBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingCloseBrace); } if (codepoint > 0x10FFFF) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_EscapeTooLarge, "10FFFF"); codepoint = 0x10FFFF; } // Return the codepoint itself if it's in the BMP. // NOTE: It *is* technically incorrect to consider a surrogate // an Unicode codepoint but Lua accepts it so we do it as well. if (codepoint <= 0xFFFF) { return(char.ToString((char)codepoint)); } return(char.ConvertFromUtf32((int)codepoint)); } }
/// <summary> /// Scan past the hole inside an interpolated string literal, leaving the current character on the '}' (if any) /// </summary> private void ScanISLHoleBalancedText(char endingChar, bool isHole, ref SyntaxDiagnosticInfo error) { while (true) { if (IsAtEnd()) { // the caller will complain return; } char ch = TextWindow.PeekChar(); switch (ch) { case '}': case ')': case ']': if (ch == endingChar) { return; } if (error == null) { error = MakeError(TextWindow.Position, 1, ErrorCode.ERR_SyntaxError, endingChar.ToString()); } goto default; case '\"': case '\'': // handle string or character literal inside an expression hole. ScanISLNestedString(ch, ref error); continue; case '@': if (TextWindow.PeekChar(1) == '\"') { // check for verbatim string inside an expression hole. ScanISLNestedVerbatimString(ref error); } goto default; case '/': switch (TextWindow.PeekChar(1)) { case '/': // error: single-line comment not allowed in an interpolated string if (error == null) { error = MakeError(TextWindow.Position, 2, ErrorCode.ERR_SingleLineCommentInExpressionHole); } TextWindow.AdvanceChar(); TextWindow.AdvanceChar(); continue; case '*': // check for and scan /* comment */ ScanISLNestedComment(); continue; default: TextWindow.AdvanceChar(); continue; } case '{': ScanISLHoleBracketed('{', '}', ref error); continue; case '(': ScanISLHoleBracketed('(', ')', ref error); continue; case '[': ScanISLHoleBracketed('[', ']', ref error); continue; default: // part of code in the expression hole TextWindow.AdvanceChar(); continue; } } }
// Implements a faster identifier lexer for the common case in the // language where: // // a) identifiers are not verbatim // b) identifiers don't contain unicode characters // c) identifiers don't contain unicode escapes // // Given that nearly all identifiers will contain [_a-zA-Z0-9] and will // be terminated by a small set of known characters (like dot, comma, // etc.), we can sit in a tight loop looking for this pattern and only // falling back to the slower (but correct) path if we see something we // can't handle. // // Note: this function also only works if the identifier (and terminator) // can be found in the current sliding window of chars we have from our // source text. With this constraint we can avoid the costly overhead // incurred with peek/advance/next. Because of this we can also avoid // the unecessary stores/reads from identBuffer and all other instance // state while lexing. Instead we just keep track of our start, end, // and max positions and use those for quick checks internally. // // Note: it is critical that this method must only be called from a // codepath that checked for IsIdentifierStartChar or '@' first. private bool ScanIdentifier_FastPath(ref TokenInfo info) { if ((_mode & LexerMode.MaskLexMode) == LexerMode.DebuggerSyntax) { // Debugger syntax is wonky. Can't use the fast path for it. return(false); } var currentOffset = TextWindow.Offset; var characterWindow = TextWindow.CharacterWindow; var characterWindowCount = TextWindow.CharacterWindowCount; var startOffset = currentOffset; while (true) { if (currentOffset == characterWindowCount) { // no more contiguous characters. Fall back to slow path return(false); } switch (characterWindow[currentOffset]) { case '&': // CONSIDER: This method is performance critical, so // it might be safer to kick out at the top (as for // LexerMode.DebuggerSyntax). // If we're in a cref, this could be the start of an // xml entity that belongs in the identifier. if (InXmlCrefOrNameAttributeValue) { // Fall back on the slow path. return(false); } // Otherwise, end the identifier. goto case '\0'; case '\0': case ' ': case '\r': case '\n': case '\t': case '!': case '%': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case ':': case ';': case '<': case '=': case '>': case '?': case '[': case ']': case '^': case '{': case '|': case '}': case '~': case '"': case '\'': // All of the following characters are not valid in an // identifier. If we see any of them, then we know we're // done. var length = currentOffset - startOffset; TextWindow.AdvanceChar(length); info.Text = info.StringValue = TextWindow.Intern(characterWindow, startOffset, length); info.IsVerbatim = false; return(true); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (currentOffset == startOffset) { return(false); } else { goto case 'A'; } case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': // All of these characters are valid inside an identifier. // consume it and keep processing. currentOffset++; continue; // case '@': verbatim identifiers are handled in the slow path // case '\\': unicode escapes are handled in the slow path default: // Any other character is something we cannot handle. i.e. // unicode chars or an escape. Just break out and move to // the fast path. return(false); } } }