コード例 #1
0
        private char ScanEscapeSequence(out char surrogateCharacter)
        {
            var start = TextWindow.Position;

            surrogateCharacter = SlidingTextWindow.InvalidCharacter;
            char ch = TextWindow.NextChar();

            Debug.Assert(ch == '\\');

            ch = TextWindow.NextChar();
            switch (ch)
            {
            // escaped characters that translate to themselves
            case '\'':
            case '"':
            case '\\':
                break;

            // translate escapes as per C# spec 2.4.4.4
            case '0':
                ch = '\u0000';
                break;

            case 'a':
                ch = '\u0007';
                break;

            case 'b':
                ch = '\u0008';
                break;

            case 'f':
                ch = '\u000c';
                break;

            case 'n':
                ch = '\u000a';
                break;

            case 'r':
                ch = '\u000d';
                break;

            case 't':
                ch = '\u0009';
                break;

            case 'v':
                ch = '\u000b';
                break;

            case 'x':
            case 'u':
            case 'U':
                TextWindow.Reset(start);
                SyntaxDiagnosticInfo error;
                ch = TextWindow.NextUnicodeEscape(surrogateCharacter: out surrogateCharacter, info: out error);
                AddError(error);
                break;

            default:
                this.AddError(start, TextWindow.Position - start, ErrorCode.ERR_IllegalEscape);
                break;
            }

            return(ch);
        }
コード例 #2
0
        private string ParseShortString()
        {
            _builder.Clear();
            var delim = TextWindow.NextChar();

            LorettaDebug.Assert(delim is '"' or '\'' or '`');

            char ch;

            while (!IsAtEnd(ch = TextWindow.PeekChar()) && ch != delim)
            {
                var charStart = TextWindow.Position;
                switch (ch)
                {
                    #region Escapes

                case '\\':
                {
                    var escapeStart = TextWindow.Position;
                    TextWindow.AdvanceChar();

                    switch (ch = TextWindow.PeekChar())
                    {
                    case '\n':
                    case '\r':
                    {
                        _builder.Append(TextWindow.NextChar());
                        char ch2;
                        if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) &&
                            ch != ch2)
                        {
                            _builder.Append(TextWindow.NextChar());
                        }
                        break;
                    }

                    case 'a':
                        TextWindow.AdvanceChar();
                        _builder.Append('\a');
                        break;

                    case 'b':
                        TextWindow.AdvanceChar();
                        _builder.Append('\b');
                        break;

                    case 'f':
                        TextWindow.AdvanceChar();
                        _builder.Append('\f');
                        break;

                    case 'n':
                        TextWindow.AdvanceChar();
                        _builder.Append('\n');
                        break;

                    case 'r':
                        TextWindow.AdvanceChar();
                        _builder.Append('\r');
                        break;

                    case 't':
                        TextWindow.AdvanceChar();
                        _builder.Append('\t');
                        break;

                    case 'v':
                        TextWindow.AdvanceChar();
                        _builder.Append('\v');
                        break;

                    case '\\':
                        TextWindow.AdvanceChar();
                        _builder.Append('\\');
                        break;

                    case '\'':
                        TextWindow.AdvanceChar();
                        _builder.Append('\'');
                        break;

                    case '"':
                        TextWindow.AdvanceChar();
                        _builder.Append('"');
                        break;

                    case 'z':
                        TextWindow.AdvanceChar();

                        while (CharUtils.IsWhitespace(TextWindow.PeekChar()))
                        {
                            TextWindow.AdvanceChar();
                        }

                        if (!_options.SyntaxOptions.AcceptWhitespaceEscape)
                        {
                            AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_WhitespaceEscapeNotSupportedInVersion);
                        }
                        break;

                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                    {
                        var parsedCharInteger = parseDecimalInteger(escapeStart);
                        if (parsedCharInteger != char.MaxValue)
                        {
                            _builder.Append(parsedCharInteger);
                        }
                        break;
                    }

                    case 'x':
                    {
                        TextWindow.AdvanceChar();
                        var parsedCharInteger = parseHexadecimalEscapeInteger(escapeStart);
                        if (parsedCharInteger != char.MaxValue)
                        {
                            _builder.Append(parsedCharInteger);
                        }

                        if (!_options.SyntaxOptions.AcceptHexEscapesInStrings)
                        {
                            AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_HexStringEscapesNotSupportedInVersion);
                        }
                    }
                    break;

                    case 'u':
                    {
                        TextWindow.AdvanceChar();
                        var parsed = parseUnicodeEscape(escapeStart);
                        _builder.Append(parsed);

                        if (!_options.SyntaxOptions.AcceptUnicodeEscape)
                        {
                            AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_UnicodeEscapesNotSupportedLuaInVersion);
                        }
                    }
                    break;

                    default:
                        if (!_options.SyntaxOptions.AcceptInvalidEscapes)
                        {
                            // Skip the character after the escape.
                            TextWindow.AdvanceChar();
                            AddError(escapeStart, TextWindow.Position - escapeStart, ErrorCode.ERR_InvalidStringEscape);
                        }
                        break;
                    }
                }
                break;

                    #endregion Escapes

                case '\n':
                case '\r':
                {
                    _builder.Append(TextWindow.NextChar());
                    char ch2;
                    if (CharUtils.IsNewLine(ch2 = TextWindow.PeekChar()) &&
                        ch != ch2)
                    {
                        _builder.Append(TextWindow.NextChar());
                    }

                    AddError(charStart, TextWindow.Position - charStart, ErrorCode.ERR_UnescapedLineBreakInString);
                }
                break;

                default:
                    _builder.Append(TextWindow.NextChar());
                    break;
                }
            }

            if (TextWindow.PeekChar() == delim)
            {
                TextWindow.AdvanceChar();
            }
            else
            {
                AddError(ErrorCode.ERR_UnfinishedString);
            }

            return(TextWindow.Intern(_builder));

            char parseDecimalInteger(int start)
            {
                var  readChars = 0;
                var  num       = 0;
                char ch;

                while (readChars < 3 && CharUtils.IsDecimal(ch = TextWindow.PeekChar()))
                {
                    TextWindow.AdvanceChar();
                    num = (num * 10) + (ch - '0');
                    readChars++;
                }

                if (readChars < 1 || num > 255)
                {
                    AddError(start, TextWindow.Position - start, ErrorCode.ERR_InvalidStringEscape);
                    return(char.MaxValue);
                }

                return((char)num);
            }

            ulong parseHexadecimalNumber(int start, int maxDigits, ErrorCode lessThanZeroErrorCode)
            {
                var readChars = 0;
                var num       = 0L;

                while (readChars < maxDigits)
                {
                    var peek = TextWindow.PeekChar();
                    if (CharUtils.IsDecimal(peek))
                    {
                        TextWindow.AdvanceChar();
                        num = (num << 4) | (uint)(peek - '0');
                    }
                    else if (CharUtils.IsHexadecimal(peek))
                    {
                        TextWindow.AdvanceChar();
                        num = (num << 4) | (uint)(10 + CharUtils.AsciiLowerCase(peek) - 'a');
                    }
                    else
                    {
                        break;
                    }
                    readChars++;
                }

                if (readChars < 1)
                {
                    AddError(start, TextWindow.Position - start, lessThanZeroErrorCode);
                    return(0UL);
                }

                return((ulong)num);
            }

            char parseHexadecimalEscapeInteger(int start) =>
            (char)parseHexadecimalNumber(start, 2, ErrorCode.ERR_InvalidStringEscape);

            string parseUnicodeEscape(int start)
            {
                var missingOpeningBrace = TextWindow.PeekChar() is not '{';

                if (!missingOpeningBrace)
                {
                    TextWindow.AdvanceChar();
                }

                var codepoint = parseHexadecimalNumber(start, 16, ErrorCode.ERR_HexDigitExpected);

                var missingClosingBrace = TextWindow.PeekChar() is not '}';

                if (!missingClosingBrace)
                {
                    TextWindow.AdvanceChar();
                }

                if (missingOpeningBrace)
                {
                    AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingOpenBrace);
                }
                if (missingClosingBrace)
                {
                    AddError(start, TextWindow.Position - start, ErrorCode.ERR_UnicodeEscapeMissingCloseBrace);
                }
                if (codepoint > 0x10FFFF)
                {
                    AddError(start, TextWindow.Position - start, ErrorCode.ERR_EscapeTooLarge, "10FFFF");
                    codepoint = 0x10FFFF;
                }

                // Return the codepoint itself if it's in the BMP.
                // NOTE: It *is* technically incorrect to consider a surrogate
                // an Unicode codepoint but Lua accepts it so we do it as well.
                if (codepoint <= 0xFFFF)
                {
                    return(char.ToString((char)codepoint));
                }

                return(char.ConvertFromUtf32((int)codepoint));
            }
        }
コード例 #3
0
#pragma warning restore IDE0079 // Remove unnecessary suppression
        private void ParseHexadecimalNumber(ref TokenInfo info)
        {
            _builder.Clear();
            ConsumeHexDigits();
            var isHexFloat = false;

            if (TextWindow.PeekChar() == '.')
            {
                TextWindow.AdvanceChar();
                isHexFloat = true;
                _builder.Append('.');
                ConsumeHexDigits();
            }

            if (CharUtils.AsciiLowerCase(TextWindow.PeekChar()) == 'p')
            {
                TextWindow.AdvanceChar();
                isHexFloat = true;
                _builder.Append('p');
                if (TextWindow.PeekChar() is '+' or '-')
                {
                    _builder.Append(TextWindow.NextChar());
                }
                ConsumeDecimalDigits(_builder);
            }

            var(isUnsignedLong, isSignedLong, isComplex) = (false, false, false);

            if (TextWindow.AdvanceIfMatches("ull", true))
            {
                if (isHexFloat)
                {
                    AddError(ErrorCode.ERR_LuajitSuffixInFloat);
                }
                else
                {
                    isUnsignedLong = true;
                }
            }
            else if (TextWindow.AdvanceIfMatches("ll", true))
            {
                if (isHexFloat)
                {
                    AddError(ErrorCode.ERR_LuajitSuffixInFloat);
                }
                else
                {
                    isSignedLong = true;
                }
            }
            else if (TextWindow.AdvanceIfMatches("i", true))
            {
                isComplex = true;
            }

            info.Text = TextWindow.GetText(intern: true);
            if (!_options.SyntaxOptions.AcceptUnderscoreInNumberLiterals && info.Text.IndexOf('_') >= 0)
            {
                AddError(ErrorCode.ERR_UnderscoreInNumericLiteralNotSupportedInVersion);
            }
            if (!Options.SyntaxOptions.AcceptLuaJITNumberSuffixes && (isUnsignedLong || isSignedLong || isComplex))
            {
                AddError(ErrorCode.ERR_NumberSuffixNotSupportedInVersion);
            }

            if (isUnsignedLong)
            {
                if (!ulong.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result))
                {
                    AddError(ErrorCode.ERR_NumericLiteralTooLarge);
                }

                info.ValueKind  = ValueKind.ULong;
                info.ULongValue = result;
            }
            else if (isSignedLong)
            {
                if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result))
                {
                    AddError(ErrorCode.ERR_NumericLiteralTooLarge);
                }

                info.ValueKind = ValueKind.Long;
                info.LongValue = result;
            }
            else if (isComplex)
            {
                var result = 0d;
                try
                {
                    result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder));
                }
                catch (OverflowException)
                {
                    AddError(ErrorCode.ERR_DoubleOverflow);
                }

                info.ValueKind    = ValueKind.Complex;
                info.ComplexValue = new Complex(0, result);
            }
            else if (isHexFloat || _options.SyntaxOptions.HexIntegerFormat == IntegerFormats.NotSupported)
            {
                if (!_options.SyntaxOptions.AcceptHexFloatLiterals)
                {
                    AddError(ErrorCode.ERR_HexFloatLiteralNotSupportedInVersion);
                }

                var result = 0d;
                try
                {
                    result = HexFloat.DoubleFromHexString(TextWindow.Intern(_builder));
                }
                catch (OverflowException)
                {
                    AddError(ErrorCode.ERR_DoubleOverflow);
                }
                info.ValueKind   = ValueKind.Double;
                info.DoubleValue = result;
            }
            else
            {
                if (!long.TryParse(TextWindow.Intern(_builder), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out var result))
                {
                    AddError(ErrorCode.ERR_NumericLiteralTooLarge);
                }
                switch (_options.SyntaxOptions.HexIntegerFormat)
                {
                case IntegerFormats.Double:
                    info.ValueKind   = ValueKind.Double;
                    info.DoubleValue = result;
                    break;

                case IntegerFormats.Int64:
                    info.ValueKind = ValueKind.Long;
                    info.LongValue = result;
                    break;

                default:
                    throw ExceptionUtilities.UnexpectedValue(_options.SyntaxOptions.HexIntegerFormat);
                }
            }
        }
コード例 #4
0
        /// <summary>
        /// This method is essentially the same as ScanIdentifier_SlowPath,
        /// except that it can handle XML entities.  Since ScanIdentifier
        /// is hot code and since this method does extra work, it seem
        /// worthwhile to separate it from the common case.
        /// </summary>
        /// <param name="info"></param>
        /// <returns></returns>
        private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info)
        {
            Debug.Assert(InXmlCrefOrNameAttributeValue);

            int start = TextWindow.Position;

            this.ResetIdentBuffer();

            if (AdvanceIfMatches('@'))
            {
                // In xml name _annotation values, the '@' is part of the value text of the identifier
                // (to match dev11).
                if (InXmlNameAttributeValue)
                {
                    AddIdentChar('@');
                }
                else
                {
                    info.IsVerbatim = true;
                }
            }

            while (true)
            {
                int  beforeConsumed = TextWindow.Position;
                char consumedChar;
                char consumedSurrogate;

                if (TextWindow.PeekChar() == '&')
                {
                    if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate))
                    {
                        // If it's not a valid entity, then it's not part of the identifier.
                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    }
                }
                else
                {
                    consumedChar      = TextWindow.NextChar();
                    consumedSurrogate = SlidingTextWindow.InvalidCharacter;
                }

                // NOTE: If the surrogate is non-zero, then consumedChar won't match
                // any of the cases below (UTF-16 guarantees that members of surrogate
                // pairs aren't separately valid).

                bool isEscaped = false;
top:
                switch (consumedChar)
                {
                case '\\':
                    // NOTE: For completeness, we should allow xml entities in unicode escape
                    // sequences (DevDiv #16321).  Since it is not currently a priority, we will
                    // try to make the interim behavior sensible: we will only attempt to scan
                    // a unicode escape if NONE of the characters are XML entities (including
                    // the backslash, which we have already consumed).
                    // When we're ready to implement this behavior, we can drop the position
                    // check and use AdvanceIfMatches instead of PeekChar.
                    if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) &&
                        (TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U'))
                    {
                        Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'");

                        info.HasIdentifierEscapeSequence = true;

                        TextWindow.Reset(beforeConsumed);
                        // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape)
                        isEscaped = true;
                        SyntaxDiagnosticInfo error;
                        consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error);
                        AddCrefError(error);
                        goto top;
                    }

                    goto default;

                case '_':
                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                case 'G':
                case 'H':
                case 'I':
                case 'J':
                case 'K':
                case 'L':
                case 'M':
                case 'N':
                case 'O':
                case 'P':
                case 'Q':
                case 'R':
                case 'S':
                case 'T':
                case 'U':
                case 'V':
                case 'W':
                case 'X':
                case 'Y':
                case 'Z':
                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                case 'g':
                case 'h':
                case 'i':
                case 'j':
                case 'k':
                case 'l':
                case 'm':
                case 'n':
                case 'o':
                case 'p':
                case 'q':
                case 'r':
                case 's':
                case 't':
                case 'u':
                case 'v':
                case 'w':
                case 'x':
                case 'y':
                case 'z':
                {
                    // Again, these are the 'common' identifier characters...
                    break;
                }

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                {
                    if (this._identLen == 0)
                    {
                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    }

                    // Again, these are the 'common' identifier characters...
                    break;
                }

                case ' ':
                case '$':
                case '\t':
                case '.':
                case ';':
                case '(':
                case ')':
                case ',':
                case '<':
                    // ...and these are the 'common' stop characters.
                    TextWindow.Reset(beforeConsumed);
                    goto LoopExit;

                case SlidingTextWindow.InvalidCharacter:
                    if (!TextWindow.IsReallyAtEnd())
                    {
                        goto default;
                    }

                    TextWindow.Reset(beforeConsumed);
                    goto LoopExit;

                default:
                {
                    // This is the 'expensive' call
                    if (this._identLen == 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierStartCharacter(consumedChar))
                    {
                        break;
                    }
                    else if (this._identLen > 0 && consumedChar > 127 && SyntaxKindFacts.IsIdentifierPartCharacter(consumedChar))
                    {
                        //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
                        if (SyntaxKindFacts.IsFormattingChar(consumedChar))
                        {
                            continue;                                             // Ignore formatting characters
                        }

                        break;
                    }
                    else
                    {
                        // Not a valid identifier character, so bail.
                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    }
                }
                }

                this.AddIdentChar(consumedChar);
                if (consumedSurrogate != SlidingTextWindow.InvalidCharacter)
                {
                    this.AddIdentChar(consumedSurrogate);
                }
            }

LoopExit:
            if (this._identLen > 0)
            {
                // NOTE: If we don't intern the string value, then we won't get a hit
                // in the keyword dictionary!  (It searches for a key using identity.)
                // The text does not have to be interned (and probalbly shouldn't be
                // if it contains entities (else-case).

                var width = TextWindow.Width;                 // exact size of input characters

                // id buffer is identical to width in input
                if (this._identLen == width)
                {
                    info.StringValue = TextWindow.GetInternedText();
                    info.Text        = info.StringValue;
                }
                else
                {
                    info.StringValue = TextWindow.Intern(this._identBuffer, 0, this._identLen);
                    info.Text        = TextWindow.GetText(intern: false);
                }

                return(true);
            }
            else
            {
                info.Text        = null;
                info.StringValue = null;
                TextWindow.Reset(start);
                return(false);
            }
        }