Example #1
0
        private void ReadStringLiteral(TextPosition start, bool allowMultiline)
        {
            var end = _position;

            bool isMultiLine = false;

            NextChar(); // Skip '
            if (allowMultiline && _c == '\'')
            {
                end = _position;
                NextChar();

                if (_c == '\'')
                {
                    end = _position;
                    NextChar();
                    // we have an opening ''' -> this a multi-line literal string
                    isMultiLine = true;

                    SkipImmediateNextLine();
                }
                else
                {
                    // Else this is an empty literal string
                    _token = new SyntaxTokenValue(TokenKind.StringLiteral, start, end, string.Empty);
                    return;
                }
            }

            _textBuilder.Length = 0;
continue_parsing_string:
            while (_c != '\'' && _c != Eof)
            {
                if (!isMultiLine && CharHelper.IsNewLine(_c))
                {
                    AddError("Invalid newline in a string", _position, _position);
                }
                else if (CharHelper.IsControlCharacter(_c) && (!isMultiLine || !CharHelper.IsNewLine(_c)))
                {
                    AddError($"Invalid control character found {((char)_c).ToPrintableString()}", start, start);
                }
                _textBuilder.AppendUtf32(_c);
                end = _position;
                NextChar();
            }

            if (isMultiLine)
            {
                if (_c == '\'')
                {
                    end = _position;
                    NextChar();
                    if (_c == '\'')
                    {
                        end = _position;
                        NextChar();
                        if (_c == '\'')
                        {
                            end = _position;
                            NextChar();
                        }
                        else
                        {
                            _textBuilder.Append('\'');
                            _textBuilder.Append('\'');
                            goto continue_parsing_string;
                        }
                    }
                    else
                    {
                        _textBuilder.Append('\'');
                        goto continue_parsing_string;
                    }
                }
                else
                {
                    AddError("Invalid End-Of-File found for multi-line literal string", end, end);
                }
                _token = new SyntaxTokenValue(TokenKind.StringLiteralMulti, start, end, _textBuilder.ToString());
            }
            else
            {
                if (_c == '\'')
                {
                    end = _position;
                    NextChar();
                }
                else
                {
                    AddError("Invalid End-Of-File found on string literal", end, end);
                }
                _token = new SyntaxTokenValue(TokenKind.StringLiteral, start, end, _textBuilder.ToString());
            }
        }
Example #2
0
        private void NextTokenForKey()
        {
            var start = _position;

            switch (_c)
            {
            case '\n':
                _token = new SyntaxTokenValue(TokenKind.NewLine, start, start);
                NextChar();
                break;

            case '\r':
                NextChar();
                // case of: \r\n
                if (_c == '\n')
                {
                    _token = new SyntaxTokenValue(TokenKind.NewLine, start, _position);
                    NextChar();
                    break;
                }
                // case of \r
                _token = new SyntaxTokenValue(TokenKind.NewLine, start, start);
                break;

            case '#':
                NextChar();
                ReadComment(start);
                break;

            case '.':
                NextChar();
                _token = new SyntaxTokenValue(TokenKind.Dot, start, start);
                break;

            case '=':     // in the context of a key, we need to parse up to the =
                NextChar();
                _token = new SyntaxTokenValue(TokenKind.Equal, start, start);
                break;

            case '{':
                _token = new SyntaxTokenValue(TokenKind.OpenBrace, _position, _position);
                NextChar();
                break;

            case '}':
                _token = new SyntaxTokenValue(TokenKind.CloseBrace, _position, _position);
                NextChar();
                break;

            case '[':
                NextChar();
                // case of: ]]
                if (_c == '[')
                {
                    _token = new SyntaxTokenValue(TokenKind.OpenBracketDouble, start, _position);
                    NextChar();
                    break;
                }
                _token = new SyntaxTokenValue(TokenKind.OpenBracket, start, start);
                break;

            case ']':
                NextChar();
                // case of: ]]
                if (_c == ']')
                {
                    _token = new SyntaxTokenValue(TokenKind.CloseBracketDouble, start, _position);
                    NextChar();
                    break;
                }
                _token = new SyntaxTokenValue(TokenKind.CloseBracket, start, start);
                break;

            case '"':
                ReadString(start, false);
                break;

            case '\'':
                ReadStringLiteral(start, false);
                break;

            case Eof:
                _token = new SyntaxTokenValue(TokenKind.Eof, _position, _position);
                break;

            default:
                // Eat any whitespace
                if (ConsumeWhitespace())
                {
                    break;
                }

                if (CharHelper.IsKeyStart(_c))
                {
                    ReadKey();
                    break;
                }

                // invalid char
                _token = new SyntaxTokenValue(TokenKind.Invalid, _position, _position);
                NextChar();
                break;
            }
        }
Example #3
0
        private void ReadSpecialToken()
        {
            var start = _position;
            var end   = _position;

            _currentIdentifierChars.Clear();

            // We track an identifier to check if it is a keyword (inf, true, false)
            var firstChar = _c;

            _currentIdentifierChars.Add(_c);

            NextChar();

            // IF we have a digit, this is a -1 or +2
            if ((firstChar == '+' || firstChar == '-') && CharHelper.IsDigit(_c))
            {
                _currentIdentifierChars.Clear();
                ReadNumberOrDate(firstChar, start);
                return;
            }

            while (CharHelper.IsIdentifierContinue(_c))
            {
                // We track an identifier to check if it is a keyword (inf, true, false)
                _currentIdentifierChars.Add(_c);

                end = _position;
                NextChar();
            }

            if (MatchCurrentIdentifier("true"))
            {
                _token = new SyntaxTokenValue(TokenKind.True, start, end, BoxedValues.True);
            }
            else if (MatchCurrentIdentifier("false"))
            {
                _token = new SyntaxTokenValue(TokenKind.False, start, end, BoxedValues.False);
            }
            else if (MatchCurrentIdentifier("inf"))
            {
                _token = new SyntaxTokenValue(TokenKind.Infinite, start, end, BoxedValues.FloatPositiveInfinity);
            }
            else if (MatchCurrentIdentifier("+inf"))
            {
                _token = new SyntaxTokenValue(TokenKind.PositiveInfinite, start, end, BoxedValues.FloatPositiveInfinity);
            }
            else if (MatchCurrentIdentifier("-inf"))
            {
                _token = new SyntaxTokenValue(TokenKind.NegativeInfinite, start, end, BoxedValues.FloatNegativeInfinity);
            }
            else if (MatchCurrentIdentifier("nan"))
            {
                _token = new SyntaxTokenValue(TokenKind.Nan, start, end, BoxedValues.FloatNan);
            }
            else if (MatchCurrentIdentifier("+nan"))
            {
                _token = new SyntaxTokenValue(TokenKind.PositiveNan, start, end, BoxedValues.FloatPositiveNaN);
            }
            else if (MatchCurrentIdentifier("-nan"))
            {
                _token = new SyntaxTokenValue(TokenKind.NegativeNan, start, end, BoxedValues.FloatNegativeNaN);
            }
            else
            {
                _token = new SyntaxTokenValue(TokenKind.Invalid, start, end);
            }
            _currentIdentifierChars.Clear();
        }
Example #4
0
        private void ReadNumberOrDate(char32?signPrefix = null, TextPosition?signPrefixPos = null)
        {
            var start   = signPrefixPos ?? _position;
            var end     = _position;
            var isFloat = false;

            var positionFirstDigit = _position;

            //var firstChar = numberPrefix ?? _c;
            var hasLeadingSign = signPrefix != null;
            var hasLeadingZero = _c == '0';

            // Reset parsing of integer
            _textBuilder.Length = 0;
            if (hasLeadingSign)
            {
                _textBuilder.AppendUtf32(signPrefix.Value);
            }

            // If we start with 0, it might be an hexa, octal or binary literal
            if (!hasLeadingSign && hasLeadingZero)
            {
                NextChar(); // Skip first digit character
                if (_c == 'x' || _c == 'X' || _c == 'o' || _c == 'O' || _c == 'b' || _c == 'B')
                {
                    string name;
                    Func <char32, bool> match;
                    Func <char32, int>  convert;
                    string    range;
                    string    prefix;
                    int       shift;
                    TokenKind tokenKind;
                    if (_c == 'x' || _c == 'X')
                    {
                        name      = "hexadecimal";
                        range     = "[0-9a-zA-Z]";
                        prefix    = "0x";
                        match     = CharHelper.IsHexFunc;
                        convert   = CharHelper.HexToDecFunc;
                        shift     = 4;
                        tokenKind = TokenKind.IntegerHexa;
                    }
                    else if (_c == 'o' || _c == 'O')
                    {
                        name      = "octal";
                        range     = "[0-7]";
                        prefix    = "0o";
                        match     = CharHelper.IsOctalFunc;
                        convert   = CharHelper.OctalToDecFunc;
                        shift     = 3;
                        tokenKind = TokenKind.IntegerOctal;
                    }
                    else
                    {
                        name      = "binary";
                        range     = "0 or 1";
                        prefix    = "0b";
                        match     = CharHelper.IsBinaryFunc;
                        convert   = CharHelper.BinaryToDecFunc;
                        shift     = 1;
                        tokenKind = TokenKind.IntegerBinary;
                    }

                    end = _position;
                    NextChar(); // skip x,X,o,O,b,B

                    int   originalMaxShift = 64 / shift;
                    int   maxShift         = originalMaxShift;
                    bool  hasCharInRange   = false;
                    bool  lastWasDigit     = false;
                    ulong value            = 0;
                    while (true)
                    {
                        bool hasLocalCharInRange = false;
                        if (_c == '_' || (hasLocalCharInRange = match(_c)))
                        {
                            var nextIsDigit = _c != '_';
                            if (!lastWasDigit && !nextIsDigit)
                            {
                                // toml-specs: each underscore must be surrounded by at least one digit on each side.
                                AddError($"An underscore must be surrounded by at least one {name} digit on each side", start, start);
                            }
                            else if (nextIsDigit)
                            {
                                value = (value << shift) + (ulong)convert(_c);
                                maxShift--;
                                // Log only once the error that the value is beyond
                                if (maxShift == -1)
                                {
                                    AddError($"Invalid size of {name} integer. Expecting less than or equal {originalMaxShift} {name} digits", start, start);
                                }
                            }

                            lastWasDigit = nextIsDigit;

                            if (hasLocalCharInRange)
                            {
                                hasCharInRange = true;
                            }
                            end = _position;
                            NextChar();
                        }
                        else
                        {
                            break;
                        }
                    }

                    if (!hasCharInRange)
                    {
                        AddError($"Invalid {name} integer. Expecting at least one {range} after {prefix}", start, start);
                        _token = new SyntaxTokenValue(TokenKind.Invalid, start, end);
                    }
                    else if (!lastWasDigit)
                    {
                        AddError($"Invalid {name} integer. Expecting a {range} after the last character", start, start);
                        _token = new SyntaxTokenValue(TokenKind.Invalid, start, end);
                    }
                    else
                    {
                        // toml-specs: 64 bit (signed long) range expected (−9,223,372,036,854,775,808 to 9,223,372,036,854,775,807).
                        _token = new SyntaxTokenValue(tokenKind, start, end, (long)value);
                    }
                    return;
                }
                else
                {
                    // Append the leading 0
                    _textBuilder.Append('0');
                }
            }

            // Parse leading digits
            ReadDigits(ref end, hasLeadingZero);

            // We are in the case of a date
            if (_c == '-' || _c == ':')
            {
                // Offset Date-Time
                // odt1 = 1979-05-27T07:32:00Z
                // odt2 = 1979-05-27T00:32:00-07:00
                // odt3 = 1979-05-27T00:32:00.999999-07:00
                //
                // For the sake of readability, you may replace the T delimiter between date and time with a space (as permitted by RFC 3339 section 5.6).
                //  NOTE: ISO 8601 defines date and time separated by "T".
                //      Applications using this syntax may choose, for the sake of
                //      readability, to specify a full-date and full-time separated by
                //      (say) a space character.
                // odt4 = 1979-05-27 07:32:00Z
                //
                // Local Date-Time
                //
                // ldt1 = 1979-05-27T07:32:00
                //
                // Local Date
                //
                // ld1 = 1979-05-27
                //
                // Local Time
                //
                // lt1 = 07:32:00
                // lt2 = 00:32:00.999999

                // Parse the date/time
                while (CharHelper.IsDateTime(_c))
                {
                    _textBuilder.AppendUtf32(_c);
                    end = _position;
                    NextChar();
                }

                // If we have a space, followed by a digit, try to parse the following
                if (CharHelper.IsWhiteSpace(_c) && CharHelper.IsDateTime(PeekChar()))
                {
                    _textBuilder.AppendUtf32(_c); // Append the space
                    NextChar();                   // skip the space
                    while (CharHelper.IsDateTime(_c))
                    {
                        _textBuilder.AppendUtf32(_c);
                        end = _position;
                        NextChar();
                    }
                }

                var dateTimeAsString = _textBuilder.ToString();

                if (hasLeadingSign)
                {
                    AddError($"Invalid prefix `{signPrefix.Value}` for the following offset/local date/time `{dateTimeAsString}`", start, end);
                    // Still try to recover
                    dateTimeAsString = dateTimeAsString.Substring(1);
                }

                DateTime datetime;
                if (DateTimeRFC3339.TryParseOffsetDateTime(dateTimeAsString, out datetime))
                {
                    _token = new SyntaxTokenValue(TokenKind.OffsetDateTime, start, end, datetime);
                }
                else if (DateTimeRFC3339.TryParseLocalDateTime(dateTimeAsString, out datetime))
                {
                    _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, datetime);
                }
                else if (DateTimeRFC3339.TryParseLocalDate(dateTimeAsString, out datetime))
                {
                    _token = new SyntaxTokenValue(TokenKind.LocalDate, start, end, datetime);
                }
                else if (DateTimeRFC3339.TryParseLocalTime(dateTimeAsString, out datetime))
                {
                    _token = new SyntaxTokenValue(TokenKind.LocalTime, start, end, datetime);
                }
                else
                {
                    // Try to recover the date using the standard C# (not necessarily RFC3339)
                    if (DateTime.TryParse(dateTimeAsString, CultureInfo.InvariantCulture, DateTimeStyles.AllowInnerWhite, out datetime))
                    {
                        _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, datetime);

                        // But we produce an error anyway
                        AddError($"Invalid format of date time/offset `{dateTimeAsString}` not following RFC3339", start, end);
                    }
                    else
                    {
                        _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, new DateTime());
                        // But we produce an error anyway
                        AddError($"Unable to parse the date time/offset `{dateTimeAsString}`", start, end);
                    }
                }

                return;
            }

            // Read any number following
            if (_c == '.')
            {
                _textBuilder.Append('.');
                end = _position;
                NextChar(); // Skip the dot .

                // We expect at least a digit after .
                if (!CharHelper.IsDigit(_c))
                {
                    AddError("Expecting at least one digit after the float dot .", _position, _position);
                    _token = new SyntaxTokenValue(TokenKind.Invalid, start, end);
                    return;
                }

                isFloat = true;
                ReadDigits(ref end, false);
            }

            // Parse only the exponent if we don't have a range
            if (_c == 'e' || _c == 'E')
            {
                isFloat = true;

                _textBuilder.AppendUtf32(_c);
                end = _position;
                NextChar();
                if (_c == '+' || _c == '-')
                {
                    _textBuilder.AppendUtf32(_c);
                    end = _position;
                    NextChar();
                }

                if (!CharHelper.IsDigit(_c))
                {
                    AddError("Expecting at least one digit after the exponent", _position, _position);
                    _token = new SyntaxTokenValue(TokenKind.Invalid, start, end);
                    return;
                }
                ReadDigits(ref end, false);
            }

            var    numberAsText = _textBuilder.ToString();
            object resolvedValue;

            if (isFloat)
            {
                if (!double.TryParse(numberAsText, NumberStyles.Float, CultureInfo.InvariantCulture, out var doubleValue))
                {
                    AddError($"Unable to parse floating point `{numberAsText}`", start, end);
                }
                int firstDigit = (int)doubleValue;
                if (firstDigit != 0 && hasLeadingZero)
                {
                    AddError($"Unexpected leading zero (`0`) for float `{numberAsText}`", positionFirstDigit, positionFirstDigit);
                }

                // If value is 0.0 or 1.0, use box cached otherwise box
                resolvedValue = doubleValue == 0.0 ? BoxedValues.FloatZero : doubleValue == 1.0 ? BoxedValues.FloatOne : doubleValue;
            }
            else
            {
                if (!long.TryParse(numberAsText, NumberStyles.Integer, CultureInfo.InvariantCulture, out var longValue))
                {
                    AddError($"Unable to parse integer `{numberAsText}`", start, end);
                }

                if (hasLeadingZero && longValue != 0)
                {
                    AddError($"Unexpected leading zero (`0`) for integer `{numberAsText}`", positionFirstDigit, positionFirstDigit);
                }

                // If value is 0 or 1, use box cached otherwise box
                resolvedValue = longValue == 0 ? BoxedValues.IntegerZero : longValue == 1 ? BoxedValues.IntegerOne : longValue;
            }

            _token = new SyntaxTokenValue(isFloat ? TokenKind.Float : TokenKind.Integer, start, end, resolvedValue);
        }
Example #5
0
        private void NextTokenForValue()
        {
            var start = _position;

            switch (_c)
            {
            case '\n':
                _token = new SyntaxTokenValue(TokenKind.NewLine, start, _position);
                NextChar();
                break;

            case '\r':
                NextChar();
                // case of: \r\n
                if (_c == '\n')
                {
                    _token = new SyntaxTokenValue(TokenKind.NewLine, start, _position);
                    NextChar();
                    break;
                }
                // case of \r
                _token = new SyntaxTokenValue(TokenKind.NewLine, start, start);
                break;

            case '#':
                NextChar();
                ReadComment(start);
                break;

            case ',':
                _token = new SyntaxTokenValue(TokenKind.Comma, start, start);
                NextChar();
                break;

            case '[':
                NextChar();
                _token = new SyntaxTokenValue(TokenKind.OpenBracket, start, start);
                break;

            case ']':
                NextChar();
                _token = new SyntaxTokenValue(TokenKind.CloseBracket, start, start);
                break;

            case '{':
                _token = new SyntaxTokenValue(TokenKind.OpenBrace, _position, _position);
                NextChar();
                break;

            case '}':
                _token = new SyntaxTokenValue(TokenKind.CloseBrace, _position, _position);
                NextChar();
                break;

            case '"':
                ReadString(start, true);
                break;

            case '\'':
                ReadStringLiteral(start, true);
                break;

            case Eof:
                _token = new SyntaxTokenValue(TokenKind.Eof, _position, _position);
                break;

            default:
                // Eat any whitespace
                if (ConsumeWhitespace())
                {
                    break;
                }

                // Handle inf, +inf, -inf, true, false
                if (_c == '+' || _c == '-' || CharHelper.IsIdentifierStart(_c))
                {
                    ReadSpecialToken();
                    break;
                }

                if (CharHelper.IsDigit(_c))
                {
                    ReadNumberOrDate();
                    break;
                }

                // invalid char
                _token = new SyntaxTokenValue(TokenKind.Invalid, _position, _position);
                NextChar();
                break;
            }
        }