public void ReturnCharsInCorrectOrder() { var text = "abc"; var sut = new TextWindow(text); Assert.Equal('a', sut.PeekChar()); sut.ConsumeChar(); Assert.Equal('b', sut.PeekChar()); sut.ConsumeChar(); Assert.Equal('c', sut.PeekChar()); sut.ConsumeChar(); Assert.True(sut.IsEof()); }
private void ScanISLNestedVerbatimString(ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == '@'); TextWindow.AdvanceChar(); Debug.Assert(TextWindow.PeekChar() == '\"'); TextWindow.AdvanceChar(); // move past quote while (true) { if (IsAtEnd()) { // we'll get an error in the enclosing construct return; } char ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); if (ch == '\"') { if (TextWindow.PeekChar(1) == '\"') { TextWindow.AdvanceChar(); // move past escaped quote } else { return; } } } }
private void ScanSingleLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) { info.Kind = SyntaxKind.SingleLineRawStringLiteralToken; while (true) { var currentChar = TextWindow.PeekChar(); // See if we reached the end of the line or file before hitting the end. if (SyntaxFacts.IsNewLine(currentChar)) { this.AddError(TextWindow.Position, width: TextWindow.GetNewLineWidth(), ErrorCode.ERR_UnterminatedRawString); return; } else if (IsAtEndOfText(currentChar)) { this.AddError(TextWindow.Position, width: 0, ErrorCode.ERR_UnterminatedRawString); return; } if (currentChar != '"') { // anything not a quote sequence just moves it forward. TextWindow.AdvanceChar(); continue; } var beforeEndDelimiter = TextWindow.Position; var currentQuoteCount = ConsumeQuoteSequence(); // A raw string literal starting with some number of quotes can contain a quote sequence with fewer quotes. if (currentQuoteCount < startingQuoteCount) { continue; } // A raw string could never be followed by another string. So once we've consumed all the closing quotes // if we have any more closing quotes then that's an error we can give a message for. if (currentQuoteCount > startingQuoteCount) { var excessQuoteCount = currentQuoteCount - startingQuoteCount; this.AddError( position: TextWindow.Position - excessQuoteCount, width: excessQuoteCount, ErrorCode.ERR_TooManyQuotesForRawString); } // We have enough quotes to finish this string at this point. var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; var valueLength = beforeEndDelimiter - afterStartDelimiter; info.StringValue = TextWindow.GetText( position: afterStartDelimiter, length: valueLength, intern: true); return; } }
private void AddMultiLineRawStringLiteralLineContents( StringBuilder indentationWhitespace, StringBuilder currentLineWhitespace, bool firstContentLine) { Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); var newLineWidth = TextWindow.GetNewLineWidth(); for (var i = 0; i < newLineWidth; i++) { // the initial newline in `""" \r\n` is not added to the contents. if (!firstContentLine) { _builder.Append(TextWindow.PeekChar()); } TextWindow.AdvanceChar(); } var lineStartPosition = TextWindow.Position; currentLineWhitespace.Clear(); ConsumeWhitespace(currentLineWhitespace); if (!StartsWith(currentLineWhitespace, indentationWhitespace)) { // We have a line where the indentation of that line isn't a prefix of indentation // whitespace. // // If we're not on a blank line then this is bad. That's a content line that doesn't start // with the indentation whitespace. If we are on a blank line then it's ok if the whitespace // we do have is a prefix of the indentation whitespace. var isBlankLine = SyntaxFacts.IsNewLine(TextWindow.PeekChar()); var isLegalBlankLine = isBlankLine && StartsWith(indentationWhitespace, currentLineWhitespace); if (!isLegalBlankLine) { // Specialized error message if this is a spacing difference. if (CheckForSpaceDifference( currentLineWhitespace, indentationWhitespace, out var currentLineWhitespaceChar, out var indentationWhitespaceChar)) { this.AddError( lineStartPosition, width: TextWindow.Position - lineStartPosition, ErrorCode.ERR_LineContainsDifferentWhitespace, currentLineWhitespaceChar, indentationWhitespaceChar); } else { this.AddError( lineStartPosition, width: TextWindow.Position - lineStartPosition, ErrorCode.ERR_LineDoesNotStartWithSameWhitespace); } return; } }
private void ScanISLContents(ArrayBuilder <Interpolation> interpolations, ref SyntaxDiagnosticInfo error) { while (true) { if (IsAtEnd()) { // error: end of line before end of string return; } switch (TextWindow.PeekChar()) { case '\"': // found the end of the string return; case '\\': TextWindow.AdvanceChar(); if (IsAtEnd()) { // the caller will complain about unclosed quote return; } else if (TextWindow.PeekChar() == '{') { int interpolationStart = TextWindow.Position; TextWindow.AdvanceChar(); ScanISLHoleBalancedText('}', true, ref error); int end = TextWindow.Position; if (TextWindow.PeekChar() == '}') { TextWindow.AdvanceChar(); } else { if (error == null) { error = MakeError(interpolationStart - 1, 2, ErrorCode.ERR_UnclosedExpressionHole); } } if (interpolations != null) { interpolations.Add(new Interpolation(interpolationStart, end)); } } else { TextWindow.AdvanceChar(); // skip past a single escaped character } continue; default: // found some other character in the string portion TextWindow.AdvanceChar(); continue; } } }
/// <returns>The number of quotes that were consumed</returns> private int ConsumeCharSequence(char ch) { var start = TextWindow.Position; while (TextWindow.PeekChar() == ch) { TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } // If we hit a new line when it's not allowed. Give an error at that new line, but keep on consuming // the verbatim literal to the end to avoid the contents of the string being lexed as C# (which will // cause a ton of cascaded errors). Only need to do this on the first newline we hit. if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { error ??= ErrorCode.ERR_NewlinesAreNotAllowedInsideANonVerbatimInterpolatedString; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
private char ScanEscapeSequence() { var start = TextWindow.Position; TextWindow.AdvanceChar(); var ch = TextWindow.PeekChar(); switch (ch) { case '\'': case '\"': case '\\': break; case 'a': ch = '\u0007'; break; case 'b': ch = '\u0008'; break; case 'f': ch = '\u000c'; break; case 'n': ch = '\u000a'; break; case 'r': ch = '\u000d'; break; case 't': ch = '\u0009'; break; case 'v': ch = '\u000b'; break; case '0': ch = '\u0000'; break; default: throw new Exception(); } return(ch); }
void ScanISLHoleBracketed(char start, char end, ref SyntaxDiagnosticInfo error) { Debug.Assert(start == TextWindow.PeekChar()); TextWindow.AdvanceChar(); ScanISLHoleBalancedText(end, false, ref error); if (TextWindow.PeekChar() == end) { TextWindow.AdvanceChar(); } else { // an error was given by the caller } }
private void ConsumeWhitespace(StringBuilder?builder) { while (true) { var ch = TextWindow.PeekChar(); if (!SyntaxFacts.IsWhitespace(ch)) { break; } builder?.Append(ch); TextWindow.AdvanceChar(); } }
private void ScanRawStringLiteral(ref TokenInfo info) { _builder.Length = 0; var startingQuoteCount = ConsumeQuoteSequence(); Debug.Assert(startingQuoteCount >= 3); // Keep consuming whitespace after the initial quote sequence. ConsumeWhitespace(builder: null); if (SyntaxFacts.IsNewLine(TextWindow.PeekChar())) { // Past the initial whitespace, and we hit a newline, this is a multi line raw string literal. ScanMultiLineRawStringLiteral(ref info, startingQuoteCount); } else { // Past the initial whitespace, and we hit anything else, this is a single line raw string literal. ScanSingleLineRawStringLiteral(ref info, startingQuoteCount); } // If we encounter any errors while scanning this raw string then we can't really determine the true // value of the string. So just do what we do with the normal strings and treat the contents as the // value from after the starting quote to the current position. Note that for normal strings this will // have interpreted things like escape sequences. However, as we're a raw string and there are no // escapes, we can just grab the text block directly. This does mean that things like leading indentation // will not be stripped, and that multiline raw strings will contain the contents of their first line. // However, as this is error code anyways, the interpretation of the value is fine for us to define // however we want. The user can (and should) check for the presence of diagnostics before blindly // trusting the contents. if (this.HasErrors) { var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; var valueLength = TextWindow.Position - afterStartDelimiter; info.StringValue = TextWindow.GetText( position: afterStartDelimiter, length: valueLength, intern: true); } else { // If we didn't have an error, the subroutines better have set the string value for this literal. Debug.Assert(info.StringValue != null); } info.Text = TextWindow.GetText(intern: true); }
private int ConsumeDecimalDigits(StringBuilder?builder) { var start = TextWindow.Position; char digit; while (CharUtils.IsDecimal(digit = TextWindow.PeekChar()) || digit == '_') { if (digit != '_') { builder?.Append(digit); } TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
private int ConsumeHexDigits() { var start = TextWindow.Position; char digit; while (CharUtils.IsHexadecimal(digit = TextWindow.PeekChar()) || digit == '_') { if (digit != '_') { _builder.Append(digit); } TextWindow.AdvanceChar(); } return(TextWindow.Position - start); }
private void ScanNumericLiteral(TokenInfo info) { builder.Clear(); while (true) { var ch = TextWindow.PeekChar(); if (!(ch >= '0' && ch <= '9')) { break; } TextWindow.AdvanceChar(); builder.Append(ch); } info.IntValue = Int32.Parse(builder.ToString()); info.Kind = SyntaxKind.NumericLiteralToken; }
private void ScanISLNestedString(char quote, ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == quote); TextWindow.AdvanceChar(); // move past quote while (true) { if (IsAtEnd()) { // we'll get an error in the enclosing construct return; } char ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); switch (ch) { case '\"': case '\'': if (ch == quote) { return; } break; case '\\': ch = TextWindow.PeekChar(); if (IsAtEnd()) { return; } else if (ch == '{' && quote == '"') { TextWindow.AdvanceChar(); // move past { ScanISLHoleBalancedText('}', true, ref error); if (TextWindow.PeekChar() == '}') { TextWindow.AdvanceChar(); } } else { TextWindow.AdvanceChar(); // move past one escaped character } break; } } }
/// <summary> /// Returns an appropriate error code if scanning this verbatim literal ran into an error. /// </summary> private ErrorCode?ScanVerbatimStringLiteral(ref TokenInfo info) { _builder.Length = 0; Debug.Assert(TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'); TextWindow.AdvanceChar(2); ErrorCode?error = null; while (true) { var ch = TextWindow.PeekChar(); if (ch == '"') { TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string and keep going. TextWindow.AdvanceChar(); _builder.Append(ch); continue; } // otherwise, the string is finished. break; } if (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd()) { // Reached the end of the source without finding the end-quote. Give an error back at the // starting point. And finish lexing this string. error ??= ErrorCode.ERR_UnterminatedStringLit; break; } TextWindow.AdvanceChar(); _builder.Append(ch); } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(intern: false); info.StringValue = _builder.ToString(); return(error); }
internal void ScanISLTop(ArrayBuilder <Interpolation> interpolations, ref TokenInfo info, ref SyntaxDiagnosticInfo error) { Debug.Assert(TextWindow.PeekChar() == '\"'); TextWindow.AdvanceChar(); // " ScanISLContents(interpolations, ref error); if (IsAtEnd() || TextWindow.PeekChar() != '\"') { if (error == null) { error = MakeError(TextWindow.Position, 1, ErrorCode.ERR_NewlineInConst); } } else { // found the closing quote TextWindow.AdvanceChar(); // " } info.Kind = SyntaxKind.InterpolatedStringToken; }
private void ScanISLNestedComment() { Debug.Assert(TextWindow.PeekChar() == '/'); TextWindow.AdvanceChar(); Debug.Assert(TextWindow.PeekChar() == '*'); TextWindow.AdvanceChar(); while (true) { if (IsAtEnd()) { return; // let the caller complain about the unterminated quote } var ch = TextWindow.PeekChar(); TextWindow.AdvanceChar(); if (ch == '*' && TextWindow.PeekChar() == '/') { TextWindow.AdvanceChar(); // skip */ return; } } }
private void ScanStringLiteral(TokenInfo info) { builder.Clear(); var quoteCharacter = TextWindow.PeekChar(); TextWindow.AdvanceChar(); while (true) { var ch = TextWindow.PeekChar(); if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (ch == '\\') { ch = ScanEscapeSequence(); builder.Append(ch); } else { TextWindow.AdvanceChar(); builder.Append(ch); } } if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; info.CharValue = builder[0]; } else { info.Kind = SyntaxKind.StringLiteralToken; info.StringValue = builder.ToString(); } }
private void ParseBinaryNumber(ref TokenInfo info) { var num = 0UL; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsBinary(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // Next shift will overflow if 63rd bit is set if ((num & 0x8000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 1) | (ulong)CharUtils.DecimalValue(digit); digits++; } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { isUnsignedLong = true; } else if (TextWindow.AdvanceIfMatches("ll", true)) { isSignedLong = true; } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } if (!_options.SyntaxOptions.AcceptBinaryNumbers) { AddError(ErrorCode.ERR_BinaryNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown || (num > long.MaxValue && !isUnsignedLong)) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); if (isUnsignedLong) { info.ValueKind = ValueKind.ULong; info.ULongValue = num; } else if (isSignedLong) { info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); } else if (isComplex) { info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, num); } else { switch (_options.SyntaxOptions.BinaryIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = unchecked ((long)num); break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.BinaryIntegerFormat); } } }
private bool ScanMultiLineRawStringLiteralLine( int startingQuoteCount, StringBuilder indentationWhitespace) { TextWindow.AdvancePastNewLine(); indentationWhitespace.Clear(); ConsumeWhitespace(indentationWhitespace); // after the whitespace see if this the line that ends the multiline literal. var currentQuoteCount = ConsumeQuoteSequence(); if (currentQuoteCount >= startingQuoteCount) { // A raw string could never be followed by another string. So once we've consumed all the closing quotes // if we have any more closing quotes then that's an error we can give a message for. if (currentQuoteCount > startingQuoteCount) { var excessQuoteCount = currentQuoteCount - startingQuoteCount; this.AddError( position: TextWindow.Position - excessQuoteCount, width: excessQuoteCount, ErrorCode.ERR_TooManyQuotesForRawString); } // Done scanning lines. return(false); } // We're not on the terminating line. Consume a normal content line. Eat to the end of line (or file in the // case of errors). while (true) { var currentChar = TextWindow.PeekChar(); if (IsAtEndOfText(currentChar)) { this.AddError(TextWindow.Position, width: 0, ErrorCode.ERR_UnterminatedRawString); return(false); } if (SyntaxFacts.IsNewLine(currentChar)) { return(true); } if (currentChar == '"') { // Don't allow a content line to contain a quote sequence that looks like a delimiter (or longer) currentQuoteCount = ConsumeQuoteSequence(); if (currentQuoteCount >= startingQuoteCount) { this.AddError( position: TextWindow.Position - currentQuoteCount, width: currentQuoteCount, ErrorCode.ERR_RawStringDelimiterOnOwnLine); return(false); } } else { TextWindow.AdvanceChar(); } } }
/// <summary> /// This method is essentially the same as ScanIdentifier_SlowPath, /// except that it can handle XML entities. Since ScanIdentifier /// is hot code and since this method does extra work, it seem /// worthwhile to separate it from the common case. /// </summary> /// <param name="info"></param> /// <returns></returns> private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info) { Debug.Assert(InXmlCrefOrNameAttributeValue); int start = TextWindow.Position; this.ResetIdentBuffer(); if (AdvanceIfMatches('@')) { // In xml name _annotation values, the '@' is part of the value text of the identifier // (to match dev11). if (InXmlNameAttributeValue) { AddIdentChar('@'); } else { info.IsVerbatim = true; } } while (true) { int beforeConsumed = TextWindow.Position; char consumedChar; char consumedSurrogate; if (TextWindow.PeekChar() == '&') { if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate)) { // If it's not a valid entity, then it's not part of the identifier. TextWindow.Reset(beforeConsumed); goto LoopExit; } } else { consumedChar = TextWindow.NextChar(); consumedSurrogate = SlidingTextWindow.InvalidCharacter; } // NOTE: If the surrogate is non-zero, then consumedChar won't match // any of the cases below (UTF-16 guarantees that members of surrogate // pairs aren't separately valid). bool isEscaped = false; top: switch (consumedChar) { case '\\': // NOTE: For completeness, we should allow xml entities in unicode escape // sequences (DevDiv #16321). Since it is not currently a priority, we will // try to make the interim behavior sensible: we will only attempt to scan // a unicode escape if NONE of the characters are XML entities (including // the backslash, which we have already consumed). // When we're ready to implement this behavior, we can drop the position // check and use AdvanceIfMatches instead of PeekChar. if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) && (TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U')) { Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'"); info.HasIdentifierEscapeSequence = true; TextWindow.Reset(beforeConsumed); // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) isEscaped = true; SyntaxDiagnosticInfo error; consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error); AddCrefError(error); goto top; } goto default; case '_': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { // Again, these are the 'common' identifier characters... break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (this._identLen == 0) { TextWindow.Reset(beforeConsumed); goto LoopExit; } // Again, these are the 'common' identifier characters... break; } case ' ': case '$': case '\t': case '.': case ';': case '(': case ')': case ',': case '<': // ...and these are the 'common' stop characters. TextWindow.Reset(beforeConsumed); goto LoopExit; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } TextWindow.Reset(beforeConsumed); goto LoopExit; default: { // This is the 'expensive' call if (this._identLen == 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(consumedChar)) { break; } else if (this._identLen > 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(consumedChar)) { //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs if (SyntaxKindFacts.IsFormattingChar(consumedChar)) { continue; // Ignore formatting characters } break; } else { // Not a valid identifier character, so bail. TextWindow.Reset(beforeConsumed); goto LoopExit; } } } this.AddIdentChar(consumedChar); if (consumedSurrogate != SlidingTextWindow.InvalidCharacter) { this.AddIdentChar(consumedSurrogate); } } LoopExit: if (this._identLen > 0) { // NOTE: If we don't intern the string value, then we won't get a hit // in the keyword dictionary! (It searches for a key using identity.) // The text does not have to be interned (and probalbly shouldn't be // if it contains entities (else-case). var width = TextWindow.Width; // exact size of input characters // id buffer is identical to width in input if (this._identLen == width) { info.StringValue = TextWindow.GetInternedText(); info.Text = info.StringValue; } else { info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen); info.Text = TextWindow.GetText(intern: false); } return(true); } else { info.Text = null; info.StringValue = null; TextWindow.Reset(start); return(false); } }
#pragma warning restore IDE0079 // Remove unnecessary suppression private void ParseHexadecimalNumber(ref TokenInfo info) { _builder.Clear(); ConsumeHexDigits(); var isHexFloat = false; if (TextWindow.PeekChar() == '.') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('.'); ConsumeHexDigits(); } if (CharUtils.AsciiLowerCase(TextWindow.PeekChar()) == 'p') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('p'); if (TextWindow.PeekChar() is '+' or '-') { _builder.Append(TextWindow.NextChar()); } ConsumeDecimalDigits(_builder); } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isUnsignedLong = true; } } else if (TextWindow.AdvanceIfMatches("ll", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isSignedLong = true; } } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } info.Text = TextWindow.GetText(intern: true); if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && info.Text.IndexOf('_') >= 0) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (isUnsignedLong) { if (!ulong.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.ULong; info.ULongValue = result; } else if (isSignedLong) { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.Long; info.LongValue = result; } else if (isComplex) { var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, result); } else if (isHexFloat || _options.SyntaxOptions.HexIntegerFormat == IntegerFormats.NotSupported) { if (!_options.SyntaxOptions.AcceptHexFloatLiterals) { AddError(ErrorCode.ERR_HexFloatLiteralNotSupportedInVersion); } var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Double; info.DoubleValue = result; } else { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } switch (_options.SyntaxOptions.HexIntegerFormat) { case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = result; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = result; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.HexIntegerFormat); } } }
private void ParseOctalNumber(ref TokenInfo info) { var num = 0L; var digits = 0; var hasUnderscores = false; var hasOverflown = false; char digit; while (CharUtils.IsOctal(digit = TextWindow.PeekChar()) || digit == '_') { TextWindow.AdvanceChar(); if (digit == '_') { hasUnderscores = true; continue; } // If any of these bits are set, we'll overflow if ((num & 0x7000_0000_0000_0000) != 0) { hasOverflown = true; } num = (num << 3) | CharUtils.DecimalValue(digit); digits++; } if (!_options.SyntaxOptions.AcceptOctalNumbers) { AddError(ErrorCode.ERR_OctalNumericLiteralNotSupportedInVersion); } if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && hasUnderscores) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (digits < 1) { num = 0; // Safe default AddError(ErrorCode.ERR_InvalidNumber); } if (hasOverflown) { num = 0; // Safe default AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.Text = TextWindow.GetText(intern: true); switch (_options.SyntaxOptions.OctalIntegerFormat) { case IntegerFormats.NotSupported: case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = num; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = num; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.OctalIntegerFormat); } }
private void ScanMultiLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) { info.Kind = SyntaxKind.MultiLineRawStringLiteralToken; // The indentation-whitespace computed from the very last line of the raw string literal var indentationWhitespace = PooledStringBuilder.GetInstance(); // The leading whitespace of whatever line we are currently on. var currentLineWhitespace = PooledStringBuilder.GetInstance(); try { // Do the first pass, finding the end of the raw string, and determining the 'indentation whitespace' // that must be complimentary with all content lines of the raw string literal. var afterStartDelimiter = TextWindow.Position; Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); var contentLineCount = 0; while (ScanMultiLineRawStringLiteralLine(startingQuoteCount, indentationWhitespace.Builder)) { contentLineCount++; } // If the initial scan failed then just bail out without a constant value. if (this.HasErrors) { return; } // The trivial raw string literal is not legal in the language. if (contentLineCount == 0) { this.AddError( position: TextWindow.Position - startingQuoteCount, width: startingQuoteCount, ErrorCode.ERR_RawStringMustContainContent); return; } // Now, do the second pass, building up the literal value. This may produce an error as well if the // indentation whitespace of the lines isn't complimentary. // Reset us to right after the starting delimiter. Note: if we fail to generate a constant value we'll // ensure that we reset back to the original end we scanned to above. var tokenEnd = TextWindow.Position; TextWindow.Reset(afterStartDelimiter); Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); for (var currentLine = 0; currentLine < contentLineCount; currentLine++) { AddMultiLineRawStringLiteralLineContents( indentationWhitespace.Builder, currentLineWhitespace.Builder, firstContentLine: currentLine == 0); // If processing the line produced errors, then bail out from continued processing. if (this.HasErrors) { break; } } info.StringValue = this.HasErrors ? "" : TextWindow.Intern(_builder); // Make sure that even if we fail to determine the constant content value of the string that // we still consume all the way to original end that we computed. TextWindow.Reset(tokenEnd); } finally { indentationWhitespace.Free(); currentLineWhitespace.Free(); } }
private string ParseShortString() { _builder.Clear(); var delim = TextWindow.NextChar(); LorettaDebug.Assert(delim is '"' or '\'' or '`'); char ch; while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != delim) { var charStart = TextWindow.Position; switch (ch) { #region Escapes case '\\': { var escapeStart = TextWindow.Position; TextWindow.AdvanceChar(); switch (ch = TextWindow.PeekChar()) { case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } break; } case 'a': TextWindow.AdvanceChar(); _builder.Append('\a'); break; case 'b': TextWindow.AdvanceChar(); _builder.Append('\b'); break; case 'f': TextWindow.AdvanceChar(); _builder.Append('\f'); break; case 'n': TextWindow.AdvanceChar(); _builder.Append('\n'); break; case 'r': TextWindow.AdvanceChar(); _builder.Append('\r'); break; case 't': TextWindow.AdvanceChar(); _builder.Append('\t'); break; case 'v': TextWindow.AdvanceChar(); _builder.Append('\v'); break; case '\\': TextWindow.AdvanceChar(); _builder.Append('\\'); break; case '\'': TextWindow.AdvanceChar(); _builder.Append('\''); break; case '"': TextWindow.AdvanceChar(); _builder.Append('"'); break; case 'z': TextWindow.AdvanceChar(); while (CharUtils.IsWhitespace(TextWindow.PeekChar())) { TextWindow.AdvanceChar(); } if (!_options.SyntaxOptions.AcceptWhitespaceEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_WhitespaceEscapeNotSupportedInVersion); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { var parsedCharInteger = parseDecimalInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } break; } case 'x': { TextWindow.AdvanceChar(); var parsedCharInteger = parseHexadecimalEscapeInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } if (!_options.SyntaxOptions.AcceptHexEscapesInStrings) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_HexStringEscapesNotSupportedInVersion); } } break; case 'u': { TextWindow.AdvanceChar(); var parsed = parseUnicodeEscape(escapeStart); _builder.Append(parsed); if (!_options.SyntaxOptions.AcceptUnicodeEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_UnicodeEscapesNotSupportedLuaInVersion); } } break; default: if (!_options.SyntaxOptions.AcceptInvalidEscapes) { // Skip the character after the escape. TextWindow.AdvanceChar(); AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_InvalidStringEscape); } break; } } break; #endregion Escapes case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } AddError(charStart, TextWindow.Position - charStart, ErrorCode.ERR_UnescapedLineBreakInString); } break; default: _builder.Append(TextWindow.NextChar()); break; } } if (TextWindow.PeekChar() == delim) { TextWindow.AdvanceChar(); } else { AddError(ErrorCode.ERR_UnfinishedString); } return(TextWindow.Intern(_builder)); char parseDecimalInteger(int start) { var readChars = 0; var num = 0; char ch; while (readChars < 3 && CharUtils.IsDecimal(ch = TextWindow.PeekChar())) { TextWindow.AdvanceChar(); num = (num * 10) + (ch - '0'); readChars++; } if (readChars < 1 || num > 255) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_InvalidStringEscape); return(char.MaxValue); } return((char)num); } ulong parseHexadecimalNumber(int start, int maxDigits, ErrorCode lessThanZeroErrorCode) { var readChars = 0; var num = 0L; while (readChars < maxDigits) { var peek = TextWindow.PeekChar(); if (CharUtils.IsDecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(peek - '0'); } else if (CharUtils.IsHexadecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(10 + CharUtils.AsciiLowerCase(peek) - 'a'); } else { break; } readChars++; } if (readChars < 1) { AddError(start, TextWindow.Position - start, lessThanZeroErrorCode); return(0UL); } return((ulong)num); } char parseHexadecimalEscapeInteger(int start) => (char)parseHexadecimalNumber(start, 2, ErrorCode.ERR_InvalidStringEscape); string parseUnicodeEscape(int start) { var missingOpeningBrace = TextWindow.PeekChar() is not '{'; if (!missingOpeningBrace) { TextWindow.AdvanceChar(); } var codepoint = parseHexadecimalNumber(start, 16, ErrorCode.ERR_HexDigitExpected); var missingClosingBrace = TextWindow.PeekChar() is not '}'; if (!missingClosingBrace) { TextWindow.AdvanceChar(); } if (missingOpeningBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingOpenBrace); } if (missingClosingBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingCloseBrace); } if (codepoint > 0x10FFFF) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_EscapeTooLarge, "10FFFF"); codepoint = 0x10FFFF; } // Return the codepoint itself if it's in the BMP. // NOTE: It *is* technically incorrect to consider a surrogate // an Unicode codepoint but Lua accepts it so we do it as well. if (codepoint <= 0xFFFF) { return(char.ToString((char)codepoint)); } return(char.ConvertFromUtf32((int)codepoint)); } }
private void ScanStringLiteral(ref TokenInfo info, bool allowEscapes = true) { var quoteCharacter = TextWindow.PeekChar(); if (quoteCharacter == '\'' || quoteCharacter == '"') { TextWindow.AdvanceChar(); _builder.Length = 0; while (true) { char ch = TextWindow.PeekChar(); if (ch == '\\' && allowEscapes) { // normal string & char constants can have escapes char c2; ch = this.ScanEscapeSequence(out c2); _builder.Append(ch); if (c2 != SlidingTextWindow.InvalidCharacter) { _builder.Append(c2); } } else if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (SyntaxFacts.IsNewLine(ch) || (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd())) { //String and character literals can contain any Unicode character. They are not limited //to valid UTF-16 characters. So if we get the SlidingTextWindow's sentinel value, //double check that it was not real user-code contents. This will be rare. Debug.Assert(TextWindow.Width > 0); this.AddError(ErrorCode.ERR_NewlineInConst); break; } else { TextWindow.AdvanceChar(); _builder.Append(ch); } } info.Text = TextWindow.GetText(true); if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; if (_builder.Length != 1) { this.AddError((_builder.Length != 0) ? ErrorCode.ERR_TooManyCharsInConst : ErrorCode.ERR_EmptyCharConst); } if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); info.CharValue = info.StringValue[0]; } else { info.StringValue = string.Empty; info.CharValue = SlidingTextWindow.InvalidCharacter; } } else { info.Kind = SyntaxKind.StringLiteralToken; if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); } else { info.StringValue = string.Empty; } } } else { info.Kind = SyntaxKind.None; info.Text = null; } }
/// <summary> /// Scan past the hole inside an interpolated string literal, leaving the current character on the '}' (if any) /// </summary> private void ScanISLHoleBalancedText(char endingChar, bool isHole, ref SyntaxDiagnosticInfo error) { while (true) { if (IsAtEnd()) { // the caller will complain return; } char ch = TextWindow.PeekChar(); switch (ch) { case '}': case ')': case ']': if (ch == endingChar) { return; } if (error == null) { error = MakeError(TextWindow.Position, 1, ErrorCode.ERR_SyntaxError, endingChar.ToString()); } goto default; case '\"': case '\'': // handle string or character literal inside an expression hole. ScanISLNestedString(ch, ref error); continue; case '@': if (TextWindow.PeekChar(1) == '\"') { // check for verbatim string inside an expression hole. ScanISLNestedVerbatimString(ref error); } goto default; case '/': switch (TextWindow.PeekChar(1)) { case '/': // error: single-line comment not allowed in an interpolated string if (error == null) { error = MakeError(TextWindow.Position, 2, ErrorCode.ERR_SingleLineCommentInExpressionHole); } TextWindow.AdvanceChar(); TextWindow.AdvanceChar(); continue; case '*': // check for and scan /* comment */ ScanISLNestedComment(); continue; default: TextWindow.AdvanceChar(); continue; } case '{': ScanISLHoleBracketed('{', '}', ref error); continue; case '(': ScanISLHoleBracketed('(', ')', ref error); continue; case '[': ScanISLHoleBracketed('[', ']', ref error); continue; default: // part of code in the expression hole TextWindow.AdvanceChar(); continue; } } }
private void ScanVerbatimStringLiteral(ref TokenInfo info, bool allowNewlines = true) { _builder.Length = 0; if (TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"') { TextWindow.AdvanceChar(2); bool done = false; char ch; _builder.Length = 0; while (!done) { switch (ch = TextWindow.PeekChar()) { case '"': TextWindow.AdvanceChar(); if (TextWindow.PeekChar() == '"') { // Doubled quote -- skip & put the single quote in the string TextWindow.AdvanceChar(); _builder.Append(ch); } else { done = true; } break; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } // Reached the end of the source without finding the end-quote. Give // an error back at the starting point. this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; default: if (!allowNewlines && SyntaxFacts.IsNewLine(ch)) { this.AddError(ErrorCode.ERR_UnterminatedStringLit); done = true; break; } TextWindow.AdvanceChar(); _builder.Append(ch); break; } } info.Kind = SyntaxKind.StringLiteralToken; info.Text = TextWindow.GetText(false); info.StringValue = _builder.ToString(); } else { info.Kind = SyntaxKind.None; info.Text = null; info.StringValue = null; } }
private bool IsAtEnd() { char ch = TextWindow.PeekChar(); return(SyntaxFacts.IsNewLine(ch) || (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd())); }