private void ReadDigits(ref TextPosition end, bool isPreviousDigit) { bool isDigit; while ((isDigit = CharHelper.IsDigit(_c)) || _c == '_') { if (isDigit) { _textBuilder.AppendUtf32(_c); isPreviousDigit = true; } else if (!isPreviousDigit) { AddError("An underscore `_` must follow a digit and not another `_`", _position, _position); } else { isPreviousDigit = false; } end = _position; NextChar(); } if (!isPreviousDigit) { AddError("Missing a digit after a trailing underscore `_`", _position, _position); } }
private void ReadSpecialToken() { var start = _position; var end = _position; _currentIdentifierChars.Clear(); // We track an identifier to check if it is a keyword (inf, true, false) var firstChar = _c; _currentIdentifierChars.Add(_c); NextChar(); // IF we have a digit, this is a -1 or +2 if ((firstChar == '+' || firstChar == '-') && CharHelper.IsDigit(_c)) { _currentIdentifierChars.Clear(); ReadNumberOrDate(firstChar, start); return; } while (CharHelper.IsIdentifierContinue(_c)) { // We track an identifier to check if it is a keyword (inf, true, false) _currentIdentifierChars.Add(_c); end = _position; NextChar(); } if (MatchCurrentIdentifier("true")) { _token = new SyntaxTokenValue(TokenKind.True, start, end, BoxedValues.True); } else if (MatchCurrentIdentifier("false")) { _token = new SyntaxTokenValue(TokenKind.False, start, end, BoxedValues.False); } else if (MatchCurrentIdentifier("inf")) { _token = new SyntaxTokenValue(TokenKind.Infinite, start, end, BoxedValues.FloatPositiveInfinity); } else if (MatchCurrentIdentifier("+inf")) { _token = new SyntaxTokenValue(TokenKind.PositiveInfinite, start, end, BoxedValues.FloatPositiveInfinity); } else if (MatchCurrentIdentifier("-inf")) { _token = new SyntaxTokenValue(TokenKind.NegativeInfinite, start, end, BoxedValues.FloatNegativeInfinity); } else if (MatchCurrentIdentifier("nan")) { _token = new SyntaxTokenValue(TokenKind.Nan, start, end, BoxedValues.FloatNan); } else if (MatchCurrentIdentifier("+nan")) { _token = new SyntaxTokenValue(TokenKind.PositiveNan, start, end, BoxedValues.FloatPositiveNaN); } else if (MatchCurrentIdentifier("-nan")) { _token = new SyntaxTokenValue(TokenKind.NegativeNan, start, end, BoxedValues.FloatNegativeNaN); } else { _token = new SyntaxTokenValue(TokenKind.Invalid, start, end); } _currentIdentifierChars.Clear(); }
private void ReadNumberOrDate(char32?signPrefix = null, TextPosition?signPrefixPos = null) { var start = signPrefixPos ?? _position; var end = _position; var isFloat = false; var positionFirstDigit = _position; //var firstChar = numberPrefix ?? _c; var hasLeadingSign = signPrefix != null; var hasLeadingZero = _c == '0'; // Reset parsing of integer _textBuilder.Length = 0; if (hasLeadingSign) { _textBuilder.AppendUtf32(signPrefix.Value); } // If we start with 0, it might be an hexa, octal or binary literal if (!hasLeadingSign && hasLeadingZero) { NextChar(); // Skip first digit character if (_c == 'x' || _c == 'X' || _c == 'o' || _c == 'O' || _c == 'b' || _c == 'B') { string name; Func <char32, bool> match; Func <char32, int> convert; string range; string prefix; int shift; TokenKind tokenKind; if (_c == 'x' || _c == 'X') { name = "hexadecimal"; range = "[0-9a-zA-Z]"; prefix = "0x"; match = CharHelper.IsHexFunc; convert = CharHelper.HexToDecFunc; shift = 4; tokenKind = TokenKind.IntegerHexa; } else if (_c == 'o' || _c == 'O') { name = "octal"; range = "[0-7]"; prefix = "0o"; match = CharHelper.IsOctalFunc; convert = CharHelper.OctalToDecFunc; shift = 3; tokenKind = TokenKind.IntegerOctal; } else { name = "binary"; range = "0 or 1"; prefix = "0b"; match = CharHelper.IsBinaryFunc; convert = CharHelper.BinaryToDecFunc; shift = 1; tokenKind = TokenKind.IntegerBinary; } end = _position; NextChar(); // skip x,X,o,O,b,B int originalMaxShift = 64 / shift; int maxShift = originalMaxShift; bool hasCharInRange = false; bool lastWasDigit = false; ulong value = 0; while (true) { bool hasLocalCharInRange = false; if (_c == '_' || (hasLocalCharInRange = match(_c))) { var nextIsDigit = _c != '_'; if (!lastWasDigit && !nextIsDigit) { // toml-specs: each underscore must be surrounded by at least one digit on each side. AddError($"An underscore must be surrounded by at least one {name} digit on each side", start, start); } else if (nextIsDigit) { value = (value << shift) + (ulong)convert(_c); maxShift--; // Log only once the error that the value is beyond if (maxShift == -1) { AddError($"Invalid size of {name} integer. Expecting less than or equal {originalMaxShift} {name} digits", start, start); } } lastWasDigit = nextIsDigit; if (hasLocalCharInRange) { hasCharInRange = true; } end = _position; NextChar(); } else { break; } } if (!hasCharInRange) { AddError($"Invalid {name} integer. Expecting at least one {range} after {prefix}", start, start); _token = new SyntaxTokenValue(TokenKind.Invalid, start, end); } else if (!lastWasDigit) { AddError($"Invalid {name} integer. Expecting a {range} after the last character", start, start); _token = new SyntaxTokenValue(TokenKind.Invalid, start, end); } else { // toml-specs: 64 bit (signed long) range expected (−9,223,372,036,854,775,808 to 9,223,372,036,854,775,807). _token = new SyntaxTokenValue(tokenKind, start, end, (long)value); } return; } else { // Append the leading 0 _textBuilder.Append('0'); } } // Parse leading digits ReadDigits(ref end, hasLeadingZero); // We are in the case of a date if (_c == '-' || _c == ':') { // Offset Date-Time // odt1 = 1979-05-27T07:32:00Z // odt2 = 1979-05-27T00:32:00-07:00 // odt3 = 1979-05-27T00:32:00.999999-07:00 // // For the sake of readability, you may replace the T delimiter between date and time with a space (as permitted by RFC 3339 section 5.6). // NOTE: ISO 8601 defines date and time separated by "T". // Applications using this syntax may choose, for the sake of // readability, to specify a full-date and full-time separated by // (say) a space character. // odt4 = 1979-05-27 07:32:00Z // // Local Date-Time // // ldt1 = 1979-05-27T07:32:00 // // Local Date // // ld1 = 1979-05-27 // // Local Time // // lt1 = 07:32:00 // lt2 = 00:32:00.999999 // Parse the date/time while (CharHelper.IsDateTime(_c)) { _textBuilder.AppendUtf32(_c); end = _position; NextChar(); } // If we have a space, followed by a digit, try to parse the following if (CharHelper.IsWhiteSpace(_c) && CharHelper.IsDateTime(PeekChar())) { _textBuilder.AppendUtf32(_c); // Append the space NextChar(); // skip the space while (CharHelper.IsDateTime(_c)) { _textBuilder.AppendUtf32(_c); end = _position; NextChar(); } } var dateTimeAsString = _textBuilder.ToString(); if (hasLeadingSign) { AddError($"Invalid prefix `{signPrefix.Value}` for the following offset/local date/time `{dateTimeAsString}`", start, end); // Still try to recover dateTimeAsString = dateTimeAsString.Substring(1); } DateTime datetime; if (DateTimeRFC3339.TryParseOffsetDateTime(dateTimeAsString, out datetime)) { _token = new SyntaxTokenValue(TokenKind.OffsetDateTime, start, end, datetime); } else if (DateTimeRFC3339.TryParseLocalDateTime(dateTimeAsString, out datetime)) { _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, datetime); } else if (DateTimeRFC3339.TryParseLocalDate(dateTimeAsString, out datetime)) { _token = new SyntaxTokenValue(TokenKind.LocalDate, start, end, datetime); } else if (DateTimeRFC3339.TryParseLocalTime(dateTimeAsString, out datetime)) { _token = new SyntaxTokenValue(TokenKind.LocalTime, start, end, datetime); } else { // Try to recover the date using the standard C# (not necessarily RFC3339) if (DateTime.TryParse(dateTimeAsString, CultureInfo.InvariantCulture, DateTimeStyles.AllowInnerWhite, out datetime)) { _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, datetime); // But we produce an error anyway AddError($"Invalid format of date time/offset `{dateTimeAsString}` not following RFC3339", start, end); } else { _token = new SyntaxTokenValue(TokenKind.LocalDateTime, start, end, new DateTime()); // But we produce an error anyway AddError($"Unable to parse the date time/offset `{dateTimeAsString}`", start, end); } } return; } // Read any number following if (_c == '.') { _textBuilder.Append('.'); end = _position; NextChar(); // Skip the dot . // We expect at least a digit after . if (!CharHelper.IsDigit(_c)) { AddError("Expecting at least one digit after the float dot .", _position, _position); _token = new SyntaxTokenValue(TokenKind.Invalid, start, end); return; } isFloat = true; ReadDigits(ref end, false); } // Parse only the exponent if we don't have a range if (_c == 'e' || _c == 'E') { isFloat = true; _textBuilder.AppendUtf32(_c); end = _position; NextChar(); if (_c == '+' || _c == '-') { _textBuilder.AppendUtf32(_c); end = _position; NextChar(); } if (!CharHelper.IsDigit(_c)) { AddError("Expecting at least one digit after the exponent", _position, _position); _token = new SyntaxTokenValue(TokenKind.Invalid, start, end); return; } ReadDigits(ref end, false); } var numberAsText = _textBuilder.ToString(); object resolvedValue; if (isFloat) { if (!double.TryParse(numberAsText, NumberStyles.Float, CultureInfo.InvariantCulture, out var doubleValue)) { AddError($"Unable to parse floating point `{numberAsText}`", start, end); } int firstDigit = (int)doubleValue; if (firstDigit != 0 && hasLeadingZero) { AddError($"Unexpected leading zero (`0`) for float `{numberAsText}`", positionFirstDigit, positionFirstDigit); } // If value is 0.0 or 1.0, use box cached otherwise box resolvedValue = doubleValue == 0.0 ? BoxedValues.FloatZero : doubleValue == 1.0 ? BoxedValues.FloatOne : doubleValue; } else { if (!long.TryParse(numberAsText, NumberStyles.Integer, CultureInfo.InvariantCulture, out var longValue)) { AddError($"Unable to parse integer `{numberAsText}`", start, end); } if (hasLeadingZero && longValue != 0) { AddError($"Unexpected leading zero (`0`) for integer `{numberAsText}`", positionFirstDigit, positionFirstDigit); } // If value is 0 or 1, use box cached otherwise box resolvedValue = longValue == 0 ? BoxedValues.IntegerZero : longValue == 1 ? BoxedValues.IntegerOne : longValue; } _token = new SyntaxTokenValue(isFloat ? TokenKind.Float : TokenKind.Integer, start, end, resolvedValue); }
private void NextTokenForValue() { var start = _position; switch (_c) { case '\n': _token = new SyntaxTokenValue(TokenKind.NewLine, start, _position); NextChar(); break; case '\r': NextChar(); // case of: \r\n if (_c == '\n') { _token = new SyntaxTokenValue(TokenKind.NewLine, start, _position); NextChar(); break; } // case of \r _token = new SyntaxTokenValue(TokenKind.NewLine, start, start); break; case '#': NextChar(); ReadComment(start); break; case ',': _token = new SyntaxTokenValue(TokenKind.Comma, start, start); NextChar(); break; case '[': NextChar(); _token = new SyntaxTokenValue(TokenKind.OpenBracket, start, start); break; case ']': NextChar(); _token = new SyntaxTokenValue(TokenKind.CloseBracket, start, start); break; case '{': _token = new SyntaxTokenValue(TokenKind.OpenBrace, _position, _position); NextChar(); break; case '}': _token = new SyntaxTokenValue(TokenKind.CloseBrace, _position, _position); NextChar(); break; case '"': ReadString(start, true); break; case '\'': ReadStringLiteral(start, true); break; case Eof: _token = new SyntaxTokenValue(TokenKind.Eof, _position, _position); break; default: // Eat any whitespace if (ConsumeWhitespace()) { break; } // Handle inf, +inf, -inf, true, false if (_c == '+' || _c == '-' || CharHelper.IsIdentifierStart(_c)) { ReadSpecialToken(); break; } if (CharHelper.IsDigit(_c)) { ReadNumberOrDate(); break; } // invalid char _token = new SyntaxTokenValue(TokenKind.Invalid, _position, _position); NextChar(); break; } }