Exemplo n.º 1
0
        private IGreenJsonSymbol CreateValue(int currentIndex)
        {
            int length             = currentIndex - SymbolStartIndex;
            IGreenJsonSymbol value = JsonValue.TryCreate(Json.AsSpan().Slice(SymbolStartIndex, length));

            if (value == null)
            {
                // Copy to a substring here, which is not necessary for JsonValue.TryCreate() anymore.
                Report(JsonParseErrors.UnrecognizedValue(Json.Substring(SymbolStartIndex, length), SymbolStartIndex, length));
                value = new GreenJsonUndefinedValueSyntax(length);
            }

            return(value);
        }
Exemplo n.º 2
0
        private IEnumerable <IGreenJsonSymbol> _TokenizeAll()
        {
            // This tokenizer uses labels with goto to switch between modes of tokenization.

            int           length       = Json.Length;
            int           currentIndex = SymbolStartIndex;
            StringBuilder valueBuilder = new StringBuilder();

inWhitespace:

            while (currentIndex < length)
            {
                char c           = Json[currentIndex];
                int  symbolClass = GetSymbolClass(c);

                // Possibly yield a text element, or choose a different tokenization mode if the symbol class changed.
                if (symbolClass != symbolClassWhitespace)
                {
                    if (SymbolStartIndex < currentIndex)
                    {
                        yield return(GreenJsonWhitespaceSyntax.Create(currentIndex - SymbolStartIndex));

                        SymbolStartIndex = currentIndex;
                    }

                    if (symbolClass == symbolClassSymbol)
                    {
                        switch (c)
                        {
                        case JsonSpecialCharacter.CurlyOpenCharacter:
                            yield return(GreenJsonCurlyOpenSyntax.Value);

                            break;

                        case JsonSpecialCharacter.CurlyCloseCharacter:
                            yield return(GreenJsonCurlyCloseSyntax.Value);

                            break;

                        case JsonSpecialCharacter.SquareBracketOpenCharacter:
                            yield return(GreenJsonSquareBracketOpenSyntax.Value);

                            break;

                        case JsonSpecialCharacter.SquareBracketCloseCharacter:
                            yield return(GreenJsonSquareBracketCloseSyntax.Value);

                            break;

                        case JsonSpecialCharacter.ColonCharacter:
                            yield return(GreenJsonColonSyntax.Value);

                            break;

                        case JsonSpecialCharacter.CommaCharacter:
                            yield return(GreenJsonCommaSyntax.Value);

                            break;

                        case StringLiteral.QuoteCharacter:
                            goto inString;

                        case JsonSpecialCharacter.CommentStartFirstCharacter:
                            // Look ahead 1 character to see if this is the start of a comment.
                            // In all other cases, treat as an unexpected symbol.
                            if (currentIndex + 1 < length)
                            {
                                char secondChar = Json[currentIndex + 1];
                                if (secondChar == JsonSpecialCharacter.SingleLineCommentStartSecondCharacter)
                                {
                                    goto inSingleLineComment;
                                }
                                else if (secondChar == JsonSpecialCharacter.MultiLineCommentStartSecondCharacter)
                                {
                                    goto inMultiLineComment;
                                }
                            }
                            goto default;

                        default:
                            Report(JsonParseErrors.UnexpectedSymbol(c, currentIndex));
                            yield return(GreenJsonUnknownSymbolSyntax.Value);

                            break;
                        }

                        // Increment to indicate the current character has been yielded.
                        SymbolStartIndex++;
                    }
                    else
                    {
                        goto inValue;
                    }
                }

                currentIndex++;
            }

            if (SymbolStartIndex < currentIndex)
            {
                yield return(GreenJsonWhitespaceSyntax.Create(currentIndex - SymbolStartIndex));
            }

            yield break;

inValue:

            // Eat the first symbol character, but leave SymbolStartIndex unchanged.
            currentIndex++;

            while (currentIndex < length)
            {
                char c           = Json[currentIndex];
                int  symbolClass = GetSymbolClass(c);

                // Possibly yield a text element, or choose a different tokenization mode if the symbol class changed.
                if (symbolClass != symbolClassValueChar)
                {
                    yield return(CreateValue(currentIndex));

                    SymbolStartIndex = currentIndex;

                    if (symbolClass == symbolClassSymbol)
                    {
                        switch (c)
                        {
                        case JsonSpecialCharacter.CurlyOpenCharacter:
                            yield return(GreenJsonCurlyOpenSyntax.Value);

                            break;

                        case JsonSpecialCharacter.CurlyCloseCharacter:
                            yield return(GreenJsonCurlyCloseSyntax.Value);

                            break;

                        case JsonSpecialCharacter.SquareBracketOpenCharacter:
                            yield return(GreenJsonSquareBracketOpenSyntax.Value);

                            break;

                        case JsonSpecialCharacter.SquareBracketCloseCharacter:
                            yield return(GreenJsonSquareBracketCloseSyntax.Value);

                            break;

                        case JsonSpecialCharacter.ColonCharacter:
                            yield return(GreenJsonColonSyntax.Value);

                            break;

                        case JsonSpecialCharacter.CommaCharacter:
                            yield return(GreenJsonCommaSyntax.Value);

                            break;

                        case StringLiteral.QuoteCharacter:
                            goto inString;

                        case JsonSpecialCharacter.CommentStartFirstCharacter:
                            // Look ahead 1 character to see if this is the start of a comment.
                            // In all other cases, treat as an unexpected symbol.
                            if (currentIndex + 1 < length)
                            {
                                char secondChar = Json[currentIndex + 1];
                                if (secondChar == JsonSpecialCharacter.SingleLineCommentStartSecondCharacter)
                                {
                                    goto inSingleLineComment;
                                }
                                else if (secondChar == JsonSpecialCharacter.MultiLineCommentStartSecondCharacter)
                                {
                                    goto inMultiLineComment;
                                }
                            }
                            goto default;

                        default:
                            Report(JsonParseErrors.UnexpectedSymbol(c, currentIndex));
                            yield return(GreenJsonUnknownSymbolSyntax.Value);

                            break;
                        }

                        // Increment to indicate the current character has been yielded.
                        SymbolStartIndex++;
                    }

                    currentIndex++;
                    goto inWhitespace;
                }

                currentIndex++;
            }

            if (SymbolStartIndex < currentIndex)
            {
                yield return(CreateValue(currentIndex));
            }

            yield break;

inString:

            // Detect errors.
            bool hasStringErrors = false;

            // Eat " character, but leave SymbolStartIndex unchanged.
            currentIndex++;

            // Prepare for use.
            valueBuilder.Clear();

            while (currentIndex < length)
            {
                char c = Json[currentIndex];

                switch (c)
                {
                case StringLiteral.QuoteCharacter:
                    // Closing quote character.
                    currentIndex++;
                    if (hasStringErrors)
                    {
                        yield return(new GreenJsonErrorStringSyntax(currentIndex - SymbolStartIndex));
                    }
                    else
                    {
                        yield return(new GreenJsonStringLiteralSyntax(valueBuilder.ToString(), currentIndex - SymbolStartIndex));
                    }
                    SymbolStartIndex = currentIndex;
                    goto inWhitespace;

                case StringLiteral.EscapeCharacter:
                    // Escape sequence.
                    // Look ahead one character.
                    int escapeSequenceStart = currentIndex;
                    currentIndex++;

                    if (currentIndex < length)
                    {
                        char escapedChar = Json[currentIndex];

                        switch (escapedChar)
                        {
                        case StringLiteral.QuoteCharacter:
                        case StringLiteral.EscapeCharacter:
                        case '/':          // Weird one, but it's in the specification.
                            valueBuilder.Append(escapedChar);
                            break;

                        case 'b':
                            valueBuilder.Append('\b');
                            break;

                        case 'f':
                            valueBuilder.Append('\f');
                            break;

                        case 'n':
                            valueBuilder.Append('\n');
                            break;

                        case 'r':
                            valueBuilder.Append('\r');
                            break;

                        case 't':
                            valueBuilder.Append('\t');
                            break;

                        case 'v':
                            valueBuilder.Append('\v');
                            break;

                        case 'u':
                            bool validUnicodeSequence = true;
                            int  unicodeValue         = 0;

                            // Expect exactly 4 hex characters.
                            const int expectedHexLength = 4;
                            for (int i = 0; i < expectedHexLength; i++)
                            {
                                currentIndex++;
                                if (currentIndex < length)
                                {
                                    // 1 hex character = 4 bits.
                                    unicodeValue <<= 4;
                                    char hexChar = Json[currentIndex];
                                    if ('0' <= hexChar && hexChar <= '9')
                                    {
                                        unicodeValue = unicodeValue + hexChar - '0';
                                    }
                                    else if ('a' <= hexChar && hexChar <= 'f')
                                    {
                                        const int aValue = 'a' - 10;
                                        unicodeValue = unicodeValue + hexChar - aValue;
                                    }
                                    else if ('A' <= hexChar && hexChar <= 'F')
                                    {
                                        const int aValue = 'A' - 10;
                                        unicodeValue = unicodeValue + hexChar - aValue;
                                    }
                                    else
                                    {
                                        currentIndex--;
                                        validUnicodeSequence = false;
                                        break;
                                    }
                                }
                                else
                                {
                                    currentIndex--;
                                    validUnicodeSequence = false;
                                    break;
                                }
                            }

                            if (validUnicodeSequence)
                            {
                                valueBuilder.Append(Convert.ToChar(unicodeValue));
                            }
                            else
                            {
                                hasStringErrors = true;
                                int escapeSequenceLength = currentIndex - escapeSequenceStart + 1;
                                Report(JsonParseErrors.UnrecognizedEscapeSequence(
                                           Json.Substring(escapeSequenceStart, escapeSequenceLength),
                                           escapeSequenceStart, escapeSequenceLength));
                            }
                            break;

                        default:
                            hasStringErrors = true;
                            Report(JsonParseErrors.UnrecognizedEscapeSequence(
                                       Json.Substring(escapeSequenceStart, 2),
                                       escapeSequenceStart, 2));
                            break;
                        }
                    }
                    break;

                default:
                    if (StringLiteral.CharacterMustBeEscaped(c))
                    {
                        // Generate user friendly representation of the illegal character in error message.
                        hasStringErrors = true;
                        Report(JsonParseErrors.IllegalControlCharacterInString(c, currentIndex));
                    }
                    else
                    {
                        valueBuilder.Append(c);
                    }
                    break;
                }

                currentIndex++;
            }

            // Use length rather than currentIndex; currentIndex is bigger after a '\'.
            int unterminatedStringLength = length - SymbolStartIndex;

            Report(JsonParseErrors.UnterminatedString(SymbolStartIndex, unterminatedStringLength));
            yield return(new GreenJsonErrorStringSyntax(unterminatedStringLength));

            yield break;

inSingleLineComment:

            // Eat both / characters, but leave SymbolStartIndex unchanged.
            currentIndex += 2;

            while (currentIndex < length)
            {
                char c = Json[currentIndex];

                switch (c)
                {
                case '\r':
                    // Can already eat this whitespace character.
                    currentIndex++;

                    // Look ahead to see if the next character is a linefeed.
                    // Otherwise, the '\r' just becomes part of the comment.
                    if (currentIndex < length)
                    {
                        char secondChar = Json[currentIndex];
                        if (secondChar == '\n')
                        {
                            yield return(GreenJsonCommentSyntax.Create(currentIndex - 1 - SymbolStartIndex));

                            // Eat the second whitespace character.
                            SymbolStartIndex = currentIndex - 1;
                            currentIndex++;
                            goto inWhitespace;
                        }
                    }
                    break;

                case '\n':
                    yield return(GreenJsonCommentSyntax.Create(currentIndex - SymbolStartIndex));

                    // Eat the '\n'.
                    SymbolStartIndex = currentIndex;
                    currentIndex++;
                    goto inWhitespace;
                }

                currentIndex++;
            }

            yield return(GreenJsonCommentSyntax.Create(currentIndex - SymbolStartIndex));

            yield break;

inMultiLineComment:

            // Eat /* characters, but leave SymbolStartIndex unchanged.
            currentIndex += 2;

            while (currentIndex < length)
            {
                if (Json[currentIndex] == '*')
                {
                    // Look ahead to see if the next character is a slash.
                    if (currentIndex + 1 < length)
                    {
                        char secondChar = Json[currentIndex + 1];

                        if (secondChar == '/')
                        {
                            // Increment so the closing '*/' is regarded as part of the comment.
                            currentIndex += 2;

                            yield return(GreenJsonCommentSyntax.Create(currentIndex - SymbolStartIndex));

                            SymbolStartIndex = currentIndex;
                            goto inWhitespace;
                        }
                    }
                }

                currentIndex++;
            }

            int unterminatedCommentLength = length - SymbolStartIndex;

            Report(JsonParseErrors.UnterminatedMultiLineComment(SymbolStartIndex, unterminatedCommentLength));
            yield return(new GreenJsonUnterminatedMultiLineCommentSyntax(unterminatedCommentLength));
        }