// Provides a decimal representation of v. // Returns true if it succeeds, otherwise the result cannot be trusted. // There will be *length digits inside the buffer (not null-terminated). // If the function returns true then // v == (double) (buffer * 10^decimal_exponent). // The digits in the buffer are the shortest representation possible: no // 0.09999999999999999 instead of 0.1. The shorter representation will even be // chosen even if the longer one would be closer to v. // The last digit will be closest to the actual v. That is, even if several // digits might correctly yield 'v' when read again, the closest will be // computed. private static bool Grisu3(double v, FastDtoaMode mode, Span <byte> buffer, out int length, out int decimalExponent) { var w = new IeeeDouble(v).AsNormalizedDiyFp(); // boundary_minus and boundary_plus are the boundaries between v and its // closest floating-point neighbors. Any number strictly between // boundary_minus and boundary_plus will round to v when convert to a double. // Grisu3 will never output representations that lie exactly on a boundary. DiyFp boundaryMinus, boundaryPlus; switch (mode) { case FastDtoaMode.FastDtoaShortest: new IeeeDouble(v).NormalizedBoundaries(out boundaryMinus, out boundaryPlus); break; case FastDtoaMode.FastDtoaShortestSingle: { var singleV = (float)v; new IeeeSingle(singleV).NormalizedBoundaries(out boundaryMinus, out boundaryPlus); break; } default: throw new Exception("Invalid Mode."); } var tenMkMinimalBinaryExponent = KMinimalTargetExponent - (w.e + DiyFp.kSignificandSize); PowersOfTenCache.GetCachedPowerForBinaryExponentRange(tenMkMinimalBinaryExponent, out var tenMk, out var mk); // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a // 64 bit significand and ten_mk is thus only precise up to 64 bits. // The DiyFp::Times procedure rounds its result, and ten_mk is approximated // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now // off by a small amount. // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. // In other words: let f = scaled_w.f() and e = scaled_w.e(), then // (f-1) * 2^e < w*10^k < (f+1) * 2^e var scaledW = DiyFp.Times(ref w, ref tenMk); // In theory it would be possible to avoid some recomputations by computing // the difference between w and boundary_minus/plus (a power of 2) and to // compute scaled_boundary_minus/plus by subtracting/adding from // scaled_w. However the code becomes much less readable and the speed // enhancements are not terrific. var scaledBoundaryMinus = DiyFp.Times(ref boundaryMinus, ref tenMk); var scaledBoundaryPlus = DiyFp.Times(ref boundaryPlus, ref tenMk); // DigitGen will generate the digits of scaled_w. Therefore we have // v == (double) (scaled_w * 10^-mk). // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an // integer than it will be updated. For instance if scaled_w == 1.23 then // the buffer will be filled with "123" und the decimal_exponent will be // decreased by 2. var result = DigitGen(scaledBoundaryMinus, scaledW, scaledBoundaryPlus, buffer, out length, out var kappa); decimalExponent = -mk + kappa; return(result); }
// If the function returns true then the result is the correct double. // Otherwise it is either the correct double or the double that is just below // the correct double. static bool DiyFpStrtod(Vector buffer, int exponent, out double result) { DiyFp input; int remaining_decimals; ReadDiyFp(buffer, out input, out remaining_decimals); // Since we may have dropped some digits the input is not accurate. // If remaining_decimals is different than 0 than the error is at most // .5 ulp (unit in the last place). // We don't want to deal with fractions and therefore keep a common // denominator. const int kDenominatorLog = 3; const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remaining_decimals; uint64_t error = (ulong)(remaining_decimals == 0 ? 0 : kDenominator / 2); int old_e = input.e; input.Normalize(); error <<= old_e - input.e; if (exponent < PowersOfTenCache.kMinDecimalExponent) { result = 0.0; return(true); } DiyFp cached_power; int cached_decimal_exponent; PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out cached_power, out cached_decimal_exponent); if (cached_decimal_exponent != exponent) { int adjustment_exponent = exponent - cached_decimal_exponent; DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); input.Multiply(ref adjustment_power); if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { // The product of input with the adjustment power fits into a 64 bit // integer. } else { // The adjustment power is exact. There is hence only an error of 0.5. error += kDenominator / 2; } } input.Multiply(ref cached_power); // The error introduced by a multiplication of a*b equals // error_a + error_b + error_a*error_b/2^64 + 0.5 // Substituting a with 'input' and b with 'cached_power' we have // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 int error_b = kDenominator / 2; int error_ab = (error == 0 ? 0 : 1); // We round up to 1. int fixed_error = kDenominator / 2; error += (ulong)(error_b + error_ab + fixed_error); old_e = input.e; input.Normalize(); error <<= old_e - input.e; // See if the double's significand changes if we add/subtract the error. int order_of_magnitude = DiyFp.kSignificandSize + input.e; int effective_significand_size = Double.SignificandSizeForOrderOfMagnitude(order_of_magnitude); int precision_digits_count = DiyFp.kSignificandSize - effective_significand_size; if (precision_digits_count + kDenominatorLog >= DiyFp.kSignificandSize) { // This can only happen for very small denormals. In this case the // half-way multiplied by the denominator exceeds the range of an uint64. // Simply shift everything to the right. int shift_amount = (precision_digits_count + kDenominatorLog) - DiyFp.kSignificandSize + 1; input.f = (input.f >> shift_amount); input.e = (input.e + shift_amount); // We add 1 for the lost precision of error, and kDenominator for // the lost precision of input.f(). error = (error >> shift_amount) + 1 + kDenominator; precision_digits_count -= shift_amount; } // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. uint64_t one64 = 1; uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; uint64_t precision_bits = input.f & precision_bits_mask; uint64_t half_way = one64 << (precision_digits_count - 1); precision_bits *= kDenominator; half_way *= kDenominator; DiyFp rounded_input = new DiyFp(input.f >> precision_digits_count, input.e + precision_digits_count); if (precision_bits >= half_way + error) { rounded_input.f = (rounded_input.f + 1); } // If the last_bits are too close to the half-way case than we are too // inaccurate and round down. In this case we return false so that we can // fall back to a more precise algorithm. result = new Double(rounded_input).value(); if (half_way - error < precision_bits && precision_bits < half_way + error) { // Too imprecise. The caller will have to fall back to a slower version. // However the returned number is guaranteed to be either the correct // double, or the next-lower double. return(false); } else { return(true); } }
// If the function returns true then the result is the correct double. // Otherwise it is either the correct double or the double that is just below // the correct double. private static bool DiyFpStrToDouble(ReadOnlySpan <byte> buffer, int exponent, out double result) { ReadDiyFp(buffer, out var input, out var remainingDecimals); // Since we may have dropped some digits the input is not accurate. // If remaining_decimals is different than 0 than the error is at most // .5 ulp (unit in the last place). // We don't want to deal with fractions and therefore keep a common // denominator. const int kDenominatorLog = 3; const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remainingDecimals; var error = (ulong)(remainingDecimals == 0 ? 0 : kDenominator / 2); var oldE = input.e; input.Normalize(); error <<= oldE - input.e; if (exponent < PowersOfTenCache.kMinDecimalExponent) { result = 0.0; return(true); } PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out var cachedPower, out var cachedDecimalExponent); if (cachedDecimalExponent != exponent) { var adjustmentExponent = exponent - cachedDecimalExponent; var adjustmentPower = AdjustmentPowerOfTen(adjustmentExponent); input.Multiply(ref adjustmentPower); if (KMaxUint64DecimalDigits - buffer.Length >= adjustmentExponent) { // The product of input with the adjustment power fits into a 64 bit // integer. } else { // The adjustment power is exact. There is hence only an error of 0.5. error += kDenominator / 2; } } input.Multiply(ref cachedPower); // The error introduced by a multiplication of a*b equals // error_a + error_b + error_a*error_b/2^64 + 0.5 // Substituting a with 'input' and b with 'cached_power' we have // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 const int errorB = kDenominator / 2; var errorAb = error == 0 ? 0 : 1; // We round up to 1. const int fixedError = kDenominator / 2; error += (ulong)(errorB + errorAb + fixedError); oldE = input.e; input.Normalize(); error <<= oldE - input.e; // See if the double's significand changes if we add/subtract the error. var orderOfMagnitude = DiyFp.kSignificandSize + input.e; var effectiveSignificandSize = IeeeDouble.SignificandSizeForOrderOfMagnitude(orderOfMagnitude); var precisionDigitsCount = DiyFp.kSignificandSize - effectiveSignificandSize; if (precisionDigitsCount + kDenominatorLog >= DiyFp.kSignificandSize) { // This can only happen for very small denormals. In this case the // half-way multiplied by the denominator exceeds the range of an uint64. // Simply shift everything to the right. var shiftAmount = precisionDigitsCount + kDenominatorLog - DiyFp.kSignificandSize + 1; input.f >>= shiftAmount; input.e += shiftAmount; // We add 1 for the lost precision of error, and kDenominator for // the lost precision of input.f(). error = (error >> shiftAmount) + 1 + kDenominator; precisionDigitsCount -= shiftAmount; } // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. const ulong one64 = 1; var precisionBitsMask = (one64 << precisionDigitsCount) - 1; var precisionBits = input.f & precisionBitsMask; var halfWay = one64 << (precisionDigitsCount - 1); precisionBits *= kDenominator; halfWay *= kDenominator; var roundedInput = new DiyFp(input.f >> precisionDigitsCount, input.e + precisionDigitsCount); if (precisionBits >= halfWay + error) { roundedInput.f++; } // If the last_bits are too close to the half-way case than we are too // inaccurate and round down. In this case we return false so that we can // fall back to a more precise algorithm. result = new IeeeDouble(roundedInput).Value(); // Too imprecise. The caller will have to fall back to a slower version. // However the returned number is guaranteed to be either the correct // double, or the next-lower double. return(halfWay - error >= precisionBits || precisionBits >= halfWay + error); }