#pragma warning restore IDE0079 // Remove unnecessary suppression private void ParseHexadecimalNumber(ref TokenInfo info) { _builder.Clear(); ConsumeHexDigits(); var isHexFloat = false; if (TextWindow.PeekChar() == '.') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('.'); ConsumeHexDigits(); } if (CharUtils.AsciiLowerCase(TextWindow.PeekChar()) == 'p') { TextWindow.AdvanceChar(); isHexFloat = true; _builder.Append('p'); if (TextWindow.PeekChar() is '+' or '-') { _builder.Append(TextWindow.NextChar()); } ConsumeDecimalDigits(_builder); } var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false); if (TextWindow.AdvanceIfMatches("ull", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isUnsignedLong = true; } } else if (TextWindow.AdvanceIfMatches("ll", true)) { if (isHexFloat) { AddError(ErrorCode.ERR_LuajitSuffixInFloat); } else { isSignedLong = true; } } else if (TextWindow.AdvanceIfMatches("i", true)) { isComplex = true; } info.Text = TextWindow.GetText(intern: true); if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && info.Text.IndexOf('_') >= 0) { AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion); } if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex)) { AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion); } if (isUnsignedLong) { if (!ulong.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.ULong; info.ULongValue = result; } else if (isSignedLong) { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } info.ValueKind = ValueKind.Long; info.LongValue = result; } else if (isComplex) { var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Complex; info.ComplexValue = new Complex(0, result); } else if (isHexFloat || _options.SyntaxOptions.HexIntegerFormat == IntegerFormats.NotSupported) { if (!_options.SyntaxOptions.AcceptHexFloatLiterals) { AddError(ErrorCode.ERR_HexFloatLiteralNotSupportedInVersion); } var result = 0d; try { result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder)); } catch (OverflowException) { AddError(ErrorCode.ERR_DoubleOverflow); } info.ValueKind = ValueKind.Double; info.DoubleValue = result; } else { if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result)) { AddError(ErrorCode.ERR_NumericLiteralTooLarge); } switch (_options.SyntaxOptions.HexIntegerFormat) { case IntegerFormats.Double: info.ValueKind = ValueKind.Double; info.DoubleValue = result; break; case IntegerFormats.Int64: info.ValueKind = ValueKind.Long; info.LongValue = result; break; default: throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.HexIntegerFormat); } } }
private void ScanStringLiteral(ref TokenInfo info, bool allowEscapes = true) { var quoteCharacter = TextWindow.PeekChar(); if (quoteCharacter == '\'' || quoteCharacter == '"') { TextWindow.AdvanceChar(); _builder.Length = 0; while (true) { char ch = TextWindow.PeekChar(); if (ch == '\\' && allowEscapes) { // normal string & char constants can have escapes char c2; ch = this.ScanEscapeSequence(out c2); _builder.Append(ch); if (c2 != SlidingTextWindow.InvalidCharacter) { _builder.Append(c2); } } else if (ch == quoteCharacter) { TextWindow.AdvanceChar(); break; } else if (SyntaxFacts.IsNewLine(ch) || (ch == SlidingTextWindow.InvalidCharacter && TextWindow.IsReallyAtEnd())) { //String and character literals can contain any Unicode character. They are not limited //to valid UTF-16 characters. So if we get the SlidingTextWindow's sentinel value, //double check that it was not real user-code contents. This will be rare. Debug.Assert(TextWindow.Width > 0); this.AddError(ErrorCode.ERR_NewlineInConst); break; } else { TextWindow.AdvanceChar(); _builder.Append(ch); } } info.Text = TextWindow.GetText(true); if (quoteCharacter == '\'') { info.Kind = SyntaxKind.CharacterLiteralToken; if (_builder.Length != 1) { this.AddError((_builder.Length != 0) ? ErrorCode.ERR_TooManyCharsInConst : ErrorCode.ERR_EmptyCharConst); } if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); info.CharValue = info.StringValue[0]; } else { info.StringValue = string.Empty; info.CharValue = SlidingTextWindow.InvalidCharacter; } } else { info.Kind = SyntaxKind.StringLiteralToken; if (_builder.Length > 0) { info.StringValue = TextWindow.Intern(_builder); } else { info.StringValue = string.Empty; } } } else { info.Kind = SyntaxKind.None; info.Text = null; } }
private string ParseShortString() { _builder.Clear(); var delim = TextWindow.NextChar(); LorettaDebug.Assert(delim is '"' or '\'' or '`'); char ch; while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != delim) { var charStart = TextWindow.Position; switch (ch) { #region Escapes case '\\': { var escapeStart = TextWindow.Position; TextWindow.AdvanceChar(); switch (ch = TextWindow.PeekChar()) { case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } break; } case 'a': TextWindow.AdvanceChar(); _builder.Append('\a'); break; case 'b': TextWindow.AdvanceChar(); _builder.Append('\b'); break; case 'f': TextWindow.AdvanceChar(); _builder.Append('\f'); break; case 'n': TextWindow.AdvanceChar(); _builder.Append('\n'); break; case 'r': TextWindow.AdvanceChar(); _builder.Append('\r'); break; case 't': TextWindow.AdvanceChar(); _builder.Append('\t'); break; case 'v': TextWindow.AdvanceChar(); _builder.Append('\v'); break; case '\\': TextWindow.AdvanceChar(); _builder.Append('\\'); break; case '\'': TextWindow.AdvanceChar(); _builder.Append('\''); break; case '"': TextWindow.AdvanceChar(); _builder.Append('"'); break; case 'z': TextWindow.AdvanceChar(); while (CharUtils.IsWhitespace(TextWindow.PeekChar())) { TextWindow.AdvanceChar(); } if (!_options.SyntaxOptions.AcceptWhitespaceEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_WhitespaceEscapeNotSupportedInVersion); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { var parsedCharInteger = parseDecimalInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } break; } case 'x': { TextWindow.AdvanceChar(); var parsedCharInteger = parseHexadecimalEscapeInteger(escapeStart); if (parsedCharInteger != char.MaxValue) { _builder.Append(parsedCharInteger); } if (!_options.SyntaxOptions.AcceptHexEscapesInStrings) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_HexStringEscapesNotSupportedInVersion); } } break; case 'u': { TextWindow.AdvanceChar(); var parsed = parseUnicodeEscape(escapeStart); _builder.Append(parsed); if (!_options.SyntaxOptions.AcceptUnicodeEscape) { AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_UnicodeEscapesNotSupportedLuaInVersion); } } break; default: if (!_options.SyntaxOptions.AcceptInvalidEscapes) { // Skip the character after the escape. TextWindow.AdvanceChar(); AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_InvalidStringEscape); } break; } } break; #endregion Escapes case '\n': case '\r': { _builder.Append(TextWindow.NextChar()); char ch2; if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) && ch != ch2) { _builder.Append(TextWindow.NextChar()); } AddError(charStart, TextWindow.Position - charStart, ErrorCode.ERR_UnescapedLineBreakInString); } break; default: _builder.Append(TextWindow.NextChar()); break; } } if (TextWindow.PeekChar() == delim) { TextWindow.AdvanceChar(); } else { AddError(ErrorCode.ERR_UnfinishedString); } return(TextWindow.Intern(_builder)); char parseDecimalInteger(int start) { var readChars = 0; var num = 0; char ch; while (readChars < 3 && CharUtils.IsDecimal(ch = TextWindow.PeekChar())) { TextWindow.AdvanceChar(); num = (num * 10) + (ch - '0'); readChars++; } if (readChars < 1 || num > 255) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_InvalidStringEscape); return(char.MaxValue); } return((char)num); } ulong parseHexadecimalNumber(int start, int maxDigits, ErrorCode lessThanZeroErrorCode) { var readChars = 0; var num = 0L; while (readChars < maxDigits) { var peek = TextWindow.PeekChar(); if (CharUtils.IsDecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(peek - '0'); } else if (CharUtils.IsHexadecimal(peek)) { TextWindow.AdvanceChar(); num = (num << 4) | (uint)(10 + CharUtils.AsciiLowerCase(peek) - 'a'); } else { break; } readChars++; } if (readChars < 1) { AddError(start, TextWindow.Position - start, lessThanZeroErrorCode); return(0UL); } return((ulong)num); } char parseHexadecimalEscapeInteger(int start) => (char)parseHexadecimalNumber(start, 2, ErrorCode.ERR_InvalidStringEscape); string parseUnicodeEscape(int start) { var missingOpeningBrace = TextWindow.PeekChar() is not '{'; if (!missingOpeningBrace) { TextWindow.AdvanceChar(); } var codepoint = parseHexadecimalNumber(start, 16, ErrorCode.ERR_HexDigitExpected); var missingClosingBrace = TextWindow.PeekChar() is not '}'; if (!missingClosingBrace) { TextWindow.AdvanceChar(); } if (missingOpeningBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingOpenBrace); } if (missingClosingBrace) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingCloseBrace); } if (codepoint > 0x10FFFF) { AddError(start, TextWindow.Position - start, ErrorCode.ERR_EscapeTooLarge, "10FFFF"); codepoint = 0x10FFFF; } // Return the codepoint itself if it's in the BMP. // NOTE: It *is* technically incorrect to consider a surrogate // an Unicode codepoint but Lua accepts it so we do it as well. if (codepoint <= 0xFFFF) { return(char.ToString((char)codepoint)); } return(char.ConvertFromUtf32((int)codepoint)); } }
private void ScanMultiLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) { info.Kind = SyntaxKind.MultiLineRawStringLiteralToken; // The indentation-whitespace computed from the very last line of the raw string literal var indentationWhitespace = PooledStringBuilder.GetInstance(); // The leading whitespace of whatever line we are currently on. var currentLineWhitespace = PooledStringBuilder.GetInstance(); try { // Do the first pass, finding the end of the raw string, and determining the 'indentation whitespace' // that must be complimentary with all content lines of the raw string literal. var afterStartDelimiter = TextWindow.Position; Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); var contentLineCount = 0; while (ScanMultiLineRawStringLiteralLine(startingQuoteCount, indentationWhitespace.Builder)) { contentLineCount++; } // If the initial scan failed then just bail out without a constant value. if (this.HasErrors) { return; } // The trivial raw string literal is not legal in the language. if (contentLineCount == 0) { this.AddError( position: TextWindow.Position - startingQuoteCount, width: startingQuoteCount, ErrorCode.ERR_RawStringMustContainContent); return; } // Now, do the second pass, building up the literal value. This may produce an error as well if the // indentation whitespace of the lines isn't complimentary. // Reset us to right after the starting delimiter. Note: if we fail to generate a constant value we'll // ensure that we reset back to the original end we scanned to above. var tokenEnd = TextWindow.Position; TextWindow.Reset(afterStartDelimiter); Debug.Assert(SyntaxFacts.IsNewLine(TextWindow.PeekChar())); for (var currentLine = 0; currentLine < contentLineCount; currentLine++) { AddMultiLineRawStringLiteralLineContents( indentationWhitespace.Builder, currentLineWhitespace.Builder, firstContentLine: currentLine == 0); // If processing the line produced errors, then bail out from continued processing. if (this.HasErrors) { break; } } info.StringValue = this.HasErrors ? "" : TextWindow.Intern(_builder); // Make sure that even if we fail to determine the constant content value of the string that // we still consume all the way to original end that we computed. TextWindow.Reset(tokenEnd); } finally { indentationWhitespace.Free(); currentLineWhitespace.Free(); } }
// Implements a faster identifier lexer for the common case in the // language where: // // a) identifiers are not verbatim // b) identifiers don't contain unicode characters // c) identifiers don't contain unicode escapes // // Given that nearly all identifiers will contain [_a-zA-Z0-9] and will // be terminated by a small set of known characters (like dot, comma, // etc.), we can sit in a tight loop looking for this pattern and only // falling back to the slower (but correct) path if we see something we // can't handle. // // Note: this function also only works if the identifier (and terminator) // can be found in the current sliding window of chars we have from our // source text. With this constraint we can avoid the costly overhead // incurred with peek/advance/next. Because of this we can also avoid // the unecessary stores/reads from identBuffer and all other instance // state while lexing. Instead we just keep track of our start, end, // and max positions and use those for quick checks internally. // // Note: it is critical that this method must only be called from a // codepath that checked for IsIdentifierStartChar or '@' first. private bool ScanIdentifier_FastPath(ref TokenInfo info) { if ((_mode & LexerMode.MaskLexMode) == LexerMode.DebuggerSyntax) { // Debugger syntax is wonky. Can't use the fast path for it. return(false); } var currentOffset = TextWindow.Offset; var characterWindow = TextWindow.CharacterWindow; var characterWindowCount = TextWindow.CharacterWindowCount; var startOffset = currentOffset; while (true) { if (currentOffset == characterWindowCount) { // no more contiguous characters. Fall back to slow path return(false); } switch (characterWindow[currentOffset]) { case '&': // CONSIDER: This method is performance critical, so // it might be safer to kick out at the top (as for // LexerMode.DebuggerSyntax). // If we're in a cref, this could be the start of an // xml entity that belongs in the identifier. if (InXmlCrefOrNameAttributeValue) { // Fall back on the slow path. return(false); } // Otherwise, end the identifier. goto case '\0'; case '\0': case ' ': case '\r': case '\n': case '\t': case '!': case '%': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case ':': case ';': case '<': case '=': case '>': case '?': case '[': case ']': case '^': case '{': case '|': case '}': case '~': case '"': case '\'': // All of the following characters are not valid in an // identifier. If we see any of them, then we know we're // done. var length = currentOffset - startOffset; TextWindow.AdvanceChar(length); info.Text = info.StringValue = TextWindow.Intern(characterWindow, startOffset, length); info.IsVerbatim = false; return(true); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (currentOffset == startOffset) { return(false); } else { goto case 'A'; } case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': // All of these characters are valid inside an identifier. // consume it and keep processing. currentOffset++; continue; // case '@': verbatim identifiers are handled in the slow path // case '\\': unicode escapes are handled in the slow path default: // Any other character is something we cannot handle. i.e. // unicode chars or an escape. Just break out and move to // the fast path. return(false); } } }
/// <summary> /// This method is essentially the same as ScanIdentifier_SlowPath, /// except that it can handle XML entities. Since ScanIdentifier /// is hot code and since this method does extra work, it seem /// worthwhile to separate it from the common case. /// </summary> /// <param name="info"></param> /// <returns></returns> private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info) { Debug.Assert(InXmlCrefOrNameAttributeValue); int start = TextWindow.Position; this.ResetIdentBuffer(); if (AdvanceIfMatches('@')) { // In xml name _annotation values, the '@' is part of the value text of the identifier // (to match dev11). if (InXmlNameAttributeValue) { AddIdentChar('@'); } else { info.IsVerbatim = true; } } while (true) { int beforeConsumed = TextWindow.Position; char consumedChar; char consumedSurrogate; if (TextWindow.PeekChar() == '&') { if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate)) { // If it's not a valid entity, then it's not part of the identifier. TextWindow.Reset(beforeConsumed); goto LoopExit; } } else { consumedChar = TextWindow.NextChar(); consumedSurrogate = SlidingTextWindow.InvalidCharacter; } // NOTE: If the surrogate is non-zero, then consumedChar won't match // any of the cases below (UTF-16 guarantees that members of surrogate // pairs aren't separately valid). bool isEscaped = false; top: switch (consumedChar) { case '\\': // NOTE: For completeness, we should allow xml entities in unicode escape // sequences (DevDiv #16321). Since it is not currently a priority, we will // try to make the interim behavior sensible: we will only attempt to scan // a unicode escape if NONE of the characters are XML entities (including // the backslash, which we have already consumed). // When we're ready to implement this behavior, we can drop the position // check and use AdvanceIfMatches instead of PeekChar. if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) && (TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U')) { Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'"); info.HasIdentifierEscapeSequence = true; TextWindow.Reset(beforeConsumed); // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) isEscaped = true; SyntaxDiagnosticInfo error; consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error); AddCrefError(error); goto top; } goto default; case '_': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { // Again, these are the 'common' identifier characters... break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (this._identLen == 0) { TextWindow.Reset(beforeConsumed); goto LoopExit; } // Again, these are the 'common' identifier characters... break; } case ' ': case '$': case '\t': case '.': case ';': case '(': case ')': case ',': case '<': // ...and these are the 'common' stop characters. TextWindow.Reset(beforeConsumed); goto LoopExit; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } TextWindow.Reset(beforeConsumed); goto LoopExit; default: { // This is the 'expensive' call if (this._identLen == 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(consumedChar)) { break; } else if (this._identLen > 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(consumedChar)) { //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs if (SyntaxKindFacts.IsFormattingChar(consumedChar)) { continue; // Ignore formatting characters } break; } else { // Not a valid identifier character, so bail. TextWindow.Reset(beforeConsumed); goto LoopExit; } } } this.AddIdentChar(consumedChar); if (consumedSurrogate != SlidingTextWindow.InvalidCharacter) { this.AddIdentChar(consumedSurrogate); } } LoopExit: if (this._identLen > 0) { // NOTE: If we don't intern the string value, then we won't get a hit // in the keyword dictionary! (It searches for a key using identity.) // The text does not have to be interned (and probalbly shouldn't be // if it contains entities (else-case). var width = TextWindow.Width; // exact size of input characters // id buffer is identical to width in input if (this._identLen == width) { info.StringValue = TextWindow.GetInternedText(); info.Text = info.StringValue; } else { info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen); info.Text = TextWindow.GetText(intern: false); } return(true); } else { info.Text = null; info.StringValue = null; TextWindow.Reset(start); return(false); } }
private bool ScanIdentifier_SlowPath(ref TokenInfo info) { int start = TextWindow.Position; this.ResetIdentBuffer(); info.IsVerbatim = TextWindow.PeekChar() == '@' && TextWindow.PeekChar(1) == '"'; var isAnnoStart = !info.IsVerbatim && TextWindow.PeekChar() == '@'; if (info.IsVerbatim || isAnnoStart) { TextWindow.AdvanceChar(); } while (true) { char surrogateCharacter = SlidingTextWindow.InvalidCharacter; bool isEscaped = false; char ch = TextWindow.PeekChar(); top: switch (ch) { case '\\': if (!isEscaped && TextWindow.IsUnicodeEscape()) { // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) info.HasIdentifierEscapeSequence = true; isEscaped = true; ch = TextWindow.PeekUnicodeEscape(out surrogateCharacter); goto top; } goto default; case '$': goto LoopExit; case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { goto default; } goto LoopExit; case '_': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { // Again, these are the 'common' identifier characters... break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { if (this._identLen == 0) { goto LoopExit; } // Again, these are the 'common' identifier characters... break; } case ' ': case '\t': case '.': case ';': case '(': case ')': case ',': // ...and these are the 'common' stop characters. goto LoopExit; case '<': if (this._identLen == 0 && this.ModeIs(LexerMode.DebuggerSyntax) && TextWindow.PeekChar(1) == '>') { // In DebuggerSyntax mode, identifiers are allowed to begin with <>. TextWindow.AdvanceChar(2); this.AddIdentChar('<'); this.AddIdentChar('>'); continue; } goto LoopExit; default: { // This is the 'expensive' call if (this._identLen == 0 && ch > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(ch)) { break; } else if (this._identLen > 0 && ch > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(ch)) { //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs if (SyntaxKindFacts.IsFormattingChar(ch)) { if (isEscaped) { SyntaxDiagnosticInfo error; TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else { TextWindow.AdvanceChar(); } continue; // Ignore formatting characters } break; } else { // Not a valid identifier character, so bail. goto LoopExit; } } } if (isEscaped) { SyntaxDiagnosticInfo error; TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else { TextWindow.AdvanceChar(); } this.AddIdentChar(ch); if (surrogateCharacter != SlidingTextWindow.InvalidCharacter) { this.AddIdentChar(surrogateCharacter); } } LoopExit: var width = TextWindow.Width; // exact size of input characters if (this._identLen > 0) { info.Text = TextWindow.GetInternedText(); // id buffer is identical to width in input if (this._identLen == width) { info.StringValue = info.Text; } else { info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen); } return(true); } else { info.Text = null; info.StringValue = null; TextWindow.Reset(start); return(false); } }