internal void ScanInterpolatedStringLiteralTop(ArrayBuilder <Interpolation> interpolations, bool isVerbatim, ref TokenInfo info, ref SyntaxDiagnosticInfo error, out bool closeQuoteMissing) { var subScanner = new InterpolatedStringScanner(this, isVerbatim); subScanner.ScanInterpolatedStringLiteralTop(interpolations, ref info, out closeQuoteMissing); error = subScanner.error; info.Text = TextWindow.GetText(false); }
private void ScanSingleLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) { info.Kind = SyntaxKind.SingleLineRawStringLiteralToken; while (true) { var currentChar = TextWindow.PeekChar(); // See if we reached the end of the line or file before hitting the end. if (SyntaxFacts.IsNewLine(currentChar)) { this.AddError(TextWindow.Position, width: TextWindow.GetNewLineWidth(), ErrorCode.ERR_UnterminatedRawString); return; } else if (IsAtEndOfText(currentChar)) { this.AddError(TextWindow.Position, width: 0, ErrorCode.ERR_UnterminatedRawString); return; } if (currentChar != '"') { // anything not a quote sequence just moves it forward. TextWindow.AdvanceChar(); continue; } var beforeEndDelimiter = TextWindow.Position; var currentQuoteCount = ConsumeQuoteSequence(); // A raw string literal starting with some number of quotes can contain a quote sequence with fewer quotes. if (currentQuoteCount < startingQuoteCount) { continue; } // A raw string could never be followed by another string. So once we've consumed all the closing quotes // if we have any more closing quotes then that's an error we can give a message for. if (currentQuoteCount > startingQuoteCount) { var excessQuoteCount = currentQuoteCount - startingQuoteCount; this.AddError( position: TextWindow.Position - excessQuoteCount, width: excessQuoteCount, ErrorCode.ERR_TooManyQuotesForRawString); } // We have enough quotes to finish this string at this point. var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; var valueLength = beforeEndDelimiter - afterStartDelimiter; info.StringValue = TextWindow.GetText( position: afterStartDelimiter, length: valueLength, intern: true); return; } }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } // If we hit a new line when it's not allowed. Give an error at that new line, but keep on consuming // the verbatim literal to the end to avoid the contents of the string being lexed as C# (which will // cause a ton of cascaded errors). Only need to do this on the first newline we hit. if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { error ??= ErrorCode.ERR_NewlinesAreNotAllowedInsideANonVerbatimInterpolatedString; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
private void ScanRawStringLiteral(ref TokenInfo info) { _builder.Length = 0; var startingQuoteCount = ConsumeQuoteSequence(); Debug.Assert(startingQuoteCount >= 3); // Keep consuming whitespace after the initial quote sequence. ConsumeWhitespace(builder: null); if (SyntaxFacts.IsNewLine(TextWindow.PeekChar())) { // Past the initial whitespace, and we hit a newline, this is a multi line raw string literal. ScanMultiLineRawStringLiteral(ref info, startingQuoteCount); } else { // Past the initial whitespace, and we hit anything else, this is a single line raw string literal. ScanSingleLineRawStringLiteral(ref info, startingQuoteCount); } // If we encounter any errors while scanning this raw string then we can't really determine the true // value of the string. So just do what we do with the normal strings and treat the contents as the // value from after the starting quote to the current position. Note that for normal strings this will // have interpreted things like escape sequences. However, as we're a raw string and there are no // escapes, we can just grab the text block directly. This does mean that things like leading indentation // will not be stripped, and that multiline raw strings will contain the contents of their first line. // However, as this is error code anyways, the interpretation of the value is fine for us to define // however we want. The user can (and should) check for the presence of diagnostics before blindly // trusting the contents. if (this.HasErrors) { var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; var valueLength = TextWindow.Position - afterStartDelimiter; info.StringValue = TextWindow.GetText( position: afterStartDelimiter, length: valueLength, intern: true); } else { // If we didn't have an error, the subroutines better have set the string value for this literal. Debug.Assert(info.StringValue != null); } info.Text = TextWindow.GetText(intern: true); }
internal void ScanInterpolatedStringLiteralTop( ref TokenInfo info, out SyntaxDiagnosticInfo?error, out InterpolatedStringKind kind, out Range openQuoteRange, ArrayBuilder <Interpolation>?interpolations, out Range closeQuoteRange) { var subScanner = new InterpolatedStringScanner(this); subScanner.ScanInterpolatedStringLiteralTop(out kind, out openQuoteRange, interpolations, out closeQuoteRange); error = subScanner.Error; info.Kind = SyntaxKind.InterpolatedStringToken; info.Text = TextWindow.GetText(intern: false); }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
private void ScanInterpolatedStringLiteral(ref TokenInfo info) { // We have a string of the form // " ... " // Where the contents contains one or more sequences // \{ STUFF } // where these curly braces delimit STUFF in expression "holes". // In order to properly find the closing quote of the whole string, // we need to locate the closing brace of each hole, as strings // may appear in expressions in the holes. So we // need to match up any braces that appear between them. // But in order to do that, we also need to match up any // /**/ comments, ' characters quotes, () parens // [] brackets, and "" strings, including interpolated holes in the latter. SyntaxDiagnosticInfo error = null; ScanISLTop(null, ref info, ref error); this.AddError(error); info.Text = TextWindow.GetText(false); }
private void ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines = true) { _builder.Length = 0; if (TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"') { TextWindow.AdvanceChar(2); bool done = false; char ch; _builder.Length = 0; while (!done) { switch (ch = TextWindow.PeekChar()) { case '"': TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string TextWindow.AdvanceChar(); _builder.Append(ch); } else { done = true; } break; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } // Reached the end of the source without finding the end-quote. Give // an error back at the starting point. this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; default: if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; } TextWindow.AdvanceChar(); _builder.Append(ch); break; } } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(false); info.StringValue = _builder.ToString(); } else { info.Kind = SyntaxKind.None; info.Text = null; info.StringValue = null; } }
private void ScanStringLiteral(ref TokenInfo info, bool allowEscapes = true) { var quoteCharacter = TextWindow.PeekChar(); if (quoteCharacter == '\'' || quoteCharacter == '"') { TextWindow.AdvanceChar(); _builder.Length = 0; while (true) { char ch = TextWindow.PeekChar(); if (ch == '\\' && allowEscapes) { // normal string & char constants can have escapes char c2; ch = this.ScanEscapeSequence(out c2); _builder.Append(ch); if (c2 != SlidingTextWindow.InvalidCharacter) { _builder.Append(c2); } } else if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (SyntaxFacts.IsNewLine(ch) || (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd())) { //String and character literals can contain any Unicode character. They are not limited //to valid UTF-16 characters. So if we get the SlidingTextWindow's sentinel value, //double check that it was not real user-code contents. This will be rare. Debug.Assert(TextWindow.Width > 0); this.AddError(ErrorCode.ERR_NewlineInConst); break; } else { TextWindow.AdvanceChar(); _builder.Append(ch); } } info.Text = TextWindow.GetText(true); if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; if (_builder.Length != 1) { this.AddError((_builder.Length != 0) ? ErrorCode.ERR_TooManyCharsInConst : ErrorCode.ERR_EmptyCharConst); } if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); info.CharValue = info.StringValue[0]; } else { info.StringValue = string.Empty; info.CharValue = SlidingTextWindow.InvalidCharacter; } } else { info.Kind = SyntaxKind.StringLiteralToken; if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); } else { info.StringValue = string.Empty; } } } else { info.Kind = SyntaxKind.None; info.Text = null; } }
private void ParseBinaryNumber(ref TokenInfo info) { var num = 0UL; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsBinary(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // Next shift will overflow if 63rd bit is set if ((num & 0x8000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 1) | (ulong)CharUtils.DecimalValue(digit); digits++; } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { isUnsignedLong = true; } else if (TextWindow.AdvanceIfMatches("ll", true)) { isSignedLong = true; } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } if (!_options.SyntaxOptions.AcceptBinaryNumbers) { AddError(ErrorCode.ERR_BinaryNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown || (num > long.MaxValue && !isUnsignedLong)) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); if (isUnsignedLong) { info.ValueKind = ValueKind.ULong; info.ULongValue = num; } else if (isSignedLong) { info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); } else if (isComplex) { info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, num); } else { switch (_options.SyntaxOptions.BinaryIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.BinaryIntegerFormat); } } }
#pragma warning restore IDE0079 // Remove unnecessary suppression private void ParseHexadecimalNumber(ref TokenInfo info) { _builder.Clear(); ConsumeHexDigits(); var isHexFloat = false; if (TextWindow.PeekChar() == '.') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('.'); ConsumeHexDigits(); } if (CharUtils.AsciiLowerCase(TextWindow.PeekChar()) == 'p') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('p'); if (TextWindow.PeekChar() is '+' or '-') { _builder.Append(TextWindow.NextChar()); } ConsumeDecimalDigits(_builder); } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isUnsignedLong = true; } } else if (TextWindow.AdvanceIfMatches("ll", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isSignedLong = true; } } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } info.Text = TextWindow.GetText(intern: true); if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && info.Text.IndexOf('_') >= 0) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (isUnsignedLong) { if (!ulong.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.ULong; info.ULongValue = result; } else if (isSignedLong) { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.Long; info.LongValue = result; } else if (isComplex) { var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, result); } else if (isHexFloat || _options.SyntaxOptions.HexIntegerFormat == IntegerFormats.NotSupported) { if (!_options.SyntaxOptions.AcceptHexFloatLiterals) { AddError(ErrorCode.ERR_HexFloatLiteralNotSupportedInVersion); } var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Double; info.DoubleValue = result; } else { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } switch (_options.SyntaxOptions.HexIntegerFormat) { case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = result; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = result; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.HexIntegerFormat); } } }
private void ParseOctalNumber(ref TokenInfo info) { var num = 0L; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsOctal(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // If any of these bits are set, we'll overflow if ((num & 0x7000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 3) | CharUtils.DecimalValue(digit); digits++; } if (!_options.SyntaxOptions.AcceptOctalNumbers) { AddError(ErrorCode.ERR_OctalNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); switch (_options.SyntaxOptions.OctalIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = num; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.OctalIntegerFormat); } }
/// <summary> /// This method is essentially the same as ScanIdentifier_SlowPath, /// except that it can handle XML entities. Since ScanIdentifier /// is hot code and since this method does extra work, it seem /// worthwhile to separate it from the common case. /// </summary> /// <param name="info"></param> /// <returns></returns> private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info) { Debug.Assert(InXmlCrefOrNameAttributeValue); int start = TextWindow.Position; this.ResetIdentBuffer(); if (AdvanceIfMatches('@')) { // In xml name _annotation values, the '@' is part of the value text of the identifier // (to match dev11). if (InXmlNameAttributeValue) { AddIdentChar('@'); } else { info.IsVerbatim = true; } } while (true) { int beforeConsumed = TextWindow.Position; char consumedChar; char consumedSurrogate; if (TextWindow.PeekChar() == '&') { if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate)) { // If it's not a valid entity, then it's not part of the identifier. TextWindow.Reset(beforeConsumed); goto LoopExit; } } else { consumedChar = TextWindow.NextChar(); consumedSurrogate = SlidingTextWindow.InvalidCharacter; } // NOTE: If the surrogate is non-zero, then consumedChar won't match // any of the cases below (UTF-16 guarantees that members of surrogate // pairs aren't separately valid). bool isEscaped = false; top: switch (consumedChar) { case '\\': // NOTE: For completeness, we should allow xml entities in unicode escape // sequences (DevDiv #16321). Since it is not currently a priority, we will // try to make the interim behavior sensible: we will only attempt to scan // a unicode escape if NONE of the characters are XML entities (including // the backslash, which we have already consumed). // When we're ready to implement this behavior, we can drop the position // check and use AdvanceIfMatches instead of PeekChar. if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) && (TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U')) { Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'"); info.HasIdentifierEscapeSequence = true; TextWindow.Reset(beforeConsumed); // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) isEscaped = true; SyntaxDiagnosticInfo error; consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error); AddCrefError(error); goto top; } goto default; case '_': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { // Again, these are the 'common' identifier characters... break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (this._identLen == 0) { TextWindow.Reset(beforeConsumed); goto LoopExit; } // Again, these are the 'common' identifier characters... break; } case ' ': case '$': case '\t': case '.': case ';': case '(': case ')': case ',': case '<': // ...and these are the 'common' stop characters. TextWindow.Reset(beforeConsumed); goto LoopExit; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } TextWindow.Reset(beforeConsumed); goto LoopExit; default: { // This is the 'expensive' call if (this._identLen == 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(consumedChar)) { break; } else if (this._identLen > 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(consumedChar)) { //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs if (SyntaxKindFacts.IsFormattingChar(consumedChar)) { continue; // Ignore formatting characters } break; } else { // Not a valid identifier character, so bail. TextWindow.Reset(beforeConsumed); goto LoopExit; } } } this.AddIdentChar(consumedChar); if (consumedSurrogate != SlidingTextWindow.InvalidCharacter) { this.AddIdentChar(consumedSurrogate); } } LoopExit: if (this._identLen > 0) { // NOTE: If we don't intern the string value, then we won't get a hit // in the keyword dictionary! (It searches for a key using identity.) // The text does not have to be interned (and probalbly shouldn't be // if it contains entities (else-case). var width = TextWindow.Width; // exact size of input characters // id buffer is identical to width in input if (this._identLen == width) { info.StringValue = TextWindow.GetInternedText(); info.Text = info.StringValue; } else { info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen); info.Text = TextWindow.GetText(intern: false); } return(true); } else { info.Text = null; info.StringValue = null; TextWindow.Reset(start); return(false); } }