// returns a * b; public static DiyFp Times(ref DiyFp a, ref DiyFp b) { DiyFp result = a; result.Multiply(ref b); return(result); }
// Returns a - b. // The exponents of both numbers must be the same and this must be bigger // than other. The result will not be normalized. public static DiyFp Minus(ref DiyFp a, ref DiyFp b) { DiyFp result = a; result.Subtract(ref b); return(result); }
// this = this * other. public void Multiply(ref DiyFp other) { // Simply "emulates" a 128 bit multiplication. // However: the resulting number only contains 64 bits. The least // significant 64 bits are only used for rounding the most significant 64 // bits. const long kM32 = 0xFFFFFFFFU; ulong a = f >> 32; ulong b = f & kM32; ulong c = other.f >> 32; ulong d = other.f & kM32; ulong ac = a * c; ulong bc = b * c; ulong ad = a * d; ulong bd = b * d; ulong tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); // By adding 1U << 31 to tmp we round the final result. // Halfway cases will be round up. tmp += 1U << 31; ulong result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); e += other.e + 64; f = result_f; }
public static DiyFp Normalize(ref DiyFp a) { DiyFp result = a; result.Normalize(); return(result); }
// Reads a DiyFp from the buffer. // The returned DiyFp is not necessarily normalized. // If remaining_decimals is zero then the returned DiyFp is accurate. // Otherwise it has been rounded and has error of at most 1/2 ulp. static void ReadDiyFp(Vector buffer, out DiyFp result, out int remaining_decimals) { int read_digits; uint64_t significand = ReadUint64(buffer, out read_digits); if (buffer.length() == read_digits) { result = new DiyFp(significand, 0); remaining_decimals = 0; } else { // Round the significand. if (buffer[read_digits] >= '5') { significand++; } // Compute the binary exponent. int exponent = 0; result = new DiyFp(significand, exponent); remaining_decimals = buffer.length() - read_digits; } }
public static void GetCachedPowerForDecimalExponent(int requested_exponent, out DiyFp power, out int found_exponent) { int index = (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; CachedPower cached_power = kCachedPowers[index]; power = new DiyFp(cached_power.significand, cached_power.binary_exponent); found_exponent = cached_power.decimal_exponent; }
public static void GetCachedPowerForBinaryExponentRange(int min_exponent, out DiyFp power, out int decimal_exponent) { const int kQ = DiyFp.kSignificandSize; var k = Math.Ceiling((min_exponent + kQ - 1) * kD_1_LOG2_10); const int foo = kCachedPowersOffset; var index = (foo + (int)k - 1) / kDecimalExponentDistance + 1; var cached_power = kCachedPowers[index]; // (void)max_exponent; // Mark variable as used. decimal_exponent = cached_power.decimal_exponent; power = new DiyFp(cached_power.significand, cached_power.binary_exponent); }
// Computes the two boundaries of this. // The bigger boundary (m_plus) is normalized. The lower boundary has the same // exponent as m_plus. // Precondition: the value encoded by this Single must be greater than 0. public void NormalizedBoundaries(out DiyFp out_m_minus, out DiyFp out_m_plus) { DiyFp v = this.AsDiyFp(); var __ = new DiyFp((v.f << 1) + 1, v.e - 1); DiyFp m_plus = DiyFp.Normalize(ref __); DiyFp m_minus; if (LowerBoundaryIsCloser()) { m_minus = new DiyFp((v.f << 2) - 1, v.e - 2); } else { m_minus = new DiyFp((v.f << 1) - 1, v.e - 1); } m_minus.f = (m_minus.f << (m_minus.e - m_plus.e)); m_minus.e = (m_plus.e); out_m_plus = m_plus; out_m_minus = m_minus; }
// Reads a DiyFp from the buffer. // The returned DiyFp is not necessarily normalized. // If remaining_decimals is zero then the returned DiyFp is accurate. // Otherwise it has been rounded and has error of at most 1/2 ulp. private static void ReadDiyFp(ReadOnlySpan <byte> buffer, out DiyFp result, out int remainingDecimals) { var significand = ReadUint64(buffer, out var readDigits); if (buffer.Length == readDigits) { result = new DiyFp(significand, 0); remainingDecimals = 0; } else { // Round the significand. if (buffer[readDigits] >= '5') { significand++; } // Compute the binary exponent. const int exponent = 0; result = new DiyFp(significand, exponent); remainingDecimals = buffer.Length - readDigits; } }
public static ulong DiyFpToUint64(DiyFp diy_fp) { ulong significand = diy_fp.f; int exponent = diy_fp.e; while (significand > kHiddenBit + kSignificandMask) { significand >>= 1; exponent++; } if (exponent >= kMaxExponent) { return(kInfinity); } if (exponent < kDenormalExponent) { return(0); } while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { significand <<= 1; exponent--; } ulong biased_exponent; if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { biased_exponent = 0; } else { biased_exponent = (ulong)(exponent + kExponentBias); } return((significand & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize)); }
// If the function returns true then the result is the correct double. // Otherwise it is either the correct double or the double that is just below // the correct double. static bool DiyFpStrtod(Vector buffer, int exponent, out double result) { DiyFp input; int remaining_decimals; ReadDiyFp(buffer, out input, out remaining_decimals); // Since we may have dropped some digits the input is not accurate. // If remaining_decimals is different than 0 than the error is at most // .5 ulp (unit in the last place). // We don't want to deal with fractions and therefore keep a common // denominator. const int kDenominatorLog = 3; const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remaining_decimals; uint64_t error = (ulong)(remaining_decimals == 0 ? 0 : kDenominator / 2); int old_e = input.e; input.Normalize(); error <<= old_e - input.e; if (exponent < PowersOfTenCache.kMinDecimalExponent) { result = 0.0; return(true); } DiyFp cached_power; int cached_decimal_exponent; PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out cached_power, out cached_decimal_exponent); if (cached_decimal_exponent != exponent) { int adjustment_exponent = exponent - cached_decimal_exponent; DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); input.Multiply(ref adjustment_power); if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { // The product of input with the adjustment power fits into a 64 bit // integer. } else { // The adjustment power is exact. There is hence only an error of 0.5. error += kDenominator / 2; } } input.Multiply(ref cached_power); // The error introduced by a multiplication of a*b equals // error_a + error_b + error_a*error_b/2^64 + 0.5 // Substituting a with 'input' and b with 'cached_power' we have // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 int error_b = kDenominator / 2; int error_ab = (error == 0 ? 0 : 1); // We round up to 1. int fixed_error = kDenominator / 2; error += (ulong)(error_b + error_ab + fixed_error); old_e = input.e; input.Normalize(); error <<= old_e - input.e; // See if the double's significand changes if we add/subtract the error. int order_of_magnitude = DiyFp.kSignificandSize + input.e; int effective_significand_size = Double.SignificandSizeForOrderOfMagnitude(order_of_magnitude); int precision_digits_count = DiyFp.kSignificandSize - effective_significand_size; if (precision_digits_count + kDenominatorLog >= DiyFp.kSignificandSize) { // This can only happen for very small denormals. In this case the // half-way multiplied by the denominator exceeds the range of an uint64. // Simply shift everything to the right. int shift_amount = (precision_digits_count + kDenominatorLog) - DiyFp.kSignificandSize + 1; input.f = (input.f >> shift_amount); input.e = (input.e + shift_amount); // We add 1 for the lost precision of error, and kDenominator for // the lost precision of input.f(). error = (error >> shift_amount) + 1 + kDenominator; precision_digits_count -= shift_amount; } // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. uint64_t one64 = 1; uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; uint64_t precision_bits = input.f & precision_bits_mask; uint64_t half_way = one64 << (precision_digits_count - 1); precision_bits *= kDenominator; half_way *= kDenominator; DiyFp rounded_input = new DiyFp(input.f >> precision_digits_count, input.e + precision_digits_count); if (precision_bits >= half_way + error) { rounded_input.f = (rounded_input.f + 1); } // If the last_bits are too close to the half-way case than we are too // inaccurate and round down. In this case we return false so that we can // fall back to a more precise algorithm. result = new Double(rounded_input).value(); if (half_way - error < precision_bits && precision_bits < half_way + error) { // Too imprecise. The caller will have to fall back to a slower version. // However the returned number is guaranteed to be either the correct // double, or the next-lower double. return(false); } else { return(true); } }
// this = this - other. // The exponents of both numbers must be the same and the significand of this // must be bigger than the significand of other. // The result will not be normalized. public void Subtract(ref DiyFp other) { f -= other.f; }
// Provides a decimal representation of v. // Returns true if it succeeds, otherwise the result cannot be trusted. // There will be *length digits inside the buffer (not null-terminated). // If the function returns true then // v == (double) (buffer * 10^decimal_exponent). // The digits in the buffer are the shortest representation possible: no // 0.09999999999999999 instead of 0.1. The shorter representation will even be // chosen even if the longer one would be closer to v. // The last digit will be closest to the actual v. That is, even if several // digits might correctly yield 'v' when read again, the closest will be // computed. private static bool Grisu3(double v, FastDtoaMode mode, Span <byte> buffer, out int length, out int decimalExponent) { var w = new IeeeDouble(v).AsNormalizedDiyFp(); // boundary_minus and boundary_plus are the boundaries between v and its // closest floating-point neighbors. Any number strictly between // boundary_minus and boundary_plus will round to v when convert to a double. // Grisu3 will never output representations that lie exactly on a boundary. DiyFp boundaryMinus, boundaryPlus; switch (mode) { case FastDtoaMode.FastDtoaShortest: new IeeeDouble(v).NormalizedBoundaries(out boundaryMinus, out boundaryPlus); break; case FastDtoaMode.FastDtoaShortestSingle: { var singleV = (float)v; new IeeeSingle(singleV).NormalizedBoundaries(out boundaryMinus, out boundaryPlus); break; } default: throw new Exception("Invalid Mode."); } var tenMkMinimalBinaryExponent = KMinimalTargetExponent - (w.e + DiyFp.kSignificandSize); PowersOfTenCache.GetCachedPowerForBinaryExponentRange(tenMkMinimalBinaryExponent, out var tenMk, out var mk); // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a // 64 bit significand and ten_mk is thus only precise up to 64 bits. // The DiyFp::Times procedure rounds its result, and ten_mk is approximated // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now // off by a small amount. // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. // In other words: let f = scaled_w.f() and e = scaled_w.e(), then // (f-1) * 2^e < w*10^k < (f+1) * 2^e var scaledW = DiyFp.Times(ref w, ref tenMk); // In theory it would be possible to avoid some recomputations by computing // the difference between w and boundary_minus/plus (a power of 2) and to // compute scaled_boundary_minus/plus by subtracting/adding from // scaled_w. However the code becomes much less readable and the speed // enhancements are not terrific. var scaledBoundaryMinus = DiyFp.Times(ref boundaryMinus, ref tenMk); var scaledBoundaryPlus = DiyFp.Times(ref boundaryPlus, ref tenMk); // DigitGen will generate the digits of scaled_w. Therefore we have // v == (double) (scaled_w * 10^-mk). // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an // integer than it will be updated. For instance if scaled_w == 1.23 then // the buffer will be filled with "123" und the decimal_exponent will be // decreased by 2. var result = DigitGen(scaledBoundaryMinus, scaledW, scaledBoundaryPlus, buffer, out length, out var kappa); decimalExponent = -mk + kappa; return(result); }
// Generates the digits of input number w. // w is a floating-point number (DiyFp), consisting of a significand and an // exponent. Its exponent is bounded by kMinimalTargetExponent and // kMaximalTargetExponent. // Hence -60 <= w.e() <= -32. // // Returns false if it fails, in which case the generated digits in the buffer // should not be used. // Preconditions: // * low, w and high are correct up to 1 ulp (unit in the last place). That // is, their error must be less than a unit of their last digits. // * low.e() == w.e() == high.e() // * low < w < high, and taking into account their error: low~ <= high~ // * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent // Post conditions: returns false if procedure fails. // otherwise: // * buffer is not null-terminated, but len contains the number of digits. // * buffer contains the shortest possible decimal digit-sequence // such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the // correct values of low and high (without their error). // * if more than one decimal representation gives the minimal number of // decimal digits then the one closest to W (where W is the correct value // of w) is chosen. // Remark: this procedure takes into account the imprecision of its input // numbers. If the precision is not enough to guarantee all the post conditions // then false is returned. This usually happens rarely (~0.5%). // // Say, for the sake of example, that // w.e() == -48, and w.f() == 0x1234567890abcdef // w's value can be computed by w.f() * 2^w.e() // We can obtain w's integral digits by simply shifting w.f() by -w.e(). // -> w's integral part is 0x1234 // w's fractional part is therefore 0x567890abcdef. // Printing w's integral part is easy (simply print 0x1234 in decimal). // In order to print its fraction we repeatedly multiply the fraction by 10 and // get each digit. Example the first digit after the point would be computed by // (0x567890abcdef * 10) >> 48. -> 3 // The whole thing becomes slightly more complicated because we want to stop // once we have enough digits. That is, once the digits inside the buffer // represent 'w' we can stop. Everything inside the interval low - high // represents w. However we have to pay attention to low, high and w's // imprecision. private static bool DigitGen(DiyFp low, DiyFp w, DiyFp high, Span <byte> buffer, out int length, out int kappa) { // low, w and high are imprecise, but by less than one ulp (unit in the last // place). // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that // the new numbers are outside of the interval we want the final // representation to lie in. // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield // numbers that are certain to lie in the interval. We will use this fact // later on. // We will now start by generating the digits within the uncertain // interval. Later we will weed out representations that lie outside the safe // interval and thus _might_ lie outside the correct interval. ulong unit = 1; var tooLow = new DiyFp(low.f - unit, low.e); var tooHigh = new DiyFp(high.f + unit, high.e); // too_low and too_high are guaranteed to lie outside the interval we want the // generated number in. var unsafeInterval = DiyFp.Minus(ref tooHigh, ref tooLow); // We now cut the input number into two parts: the integral digits and the // fractionals. We will not write any decimal separator though, but adapt // kappa instead. // Reminder: we are currently computing the digits (stored inside the buffer) // such that: too_low < buffer * 10^kappa < too_high // We use too_high for the digit_generation and stop as soon as possible. // If we stop early we effectively round down. var one = new DiyFp((ulong)1 << -w.e, w.e); // Division by one is a shift. var integrals = (uint)(tooHigh.f >> -one.e); // Modulo by one is an and. var fractionals = tooHigh.f & (one.f - 1); BiggestPowerTen(integrals, DiyFp.kSignificandSize - -one.e, out var divisor, out var divisorExponentPlusOne); kappa = divisorExponentPlusOne; length = 0; // Loop invariant: buffer = too_high / 10^kappa (integer division) // The invariant holds for the first iteration: kappa has been initialized // with the divisor exponent + 1. And the divisor is the biggest power of ten // that is smaller than integrals. while (kappa > 0) { var digit = unchecked ((int)(integrals / divisor)); buffer[length] = (byte)((byte)'0' + digit); length++; integrals %= divisor; kappa--; // Note that kappa now equals the exponent of the divisor and that the // invariant thus holds again. var rest = ((ulong)integrals << -one.e) + fractionals; // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) // Reminder: unsafe_interval.e() == one.e() if (rest < unsafeInterval.f) { // Rounding down (by not emitting the remaining digits) yields a number // that lies within the unsafe interval. return(RoundWeed(buffer, length, DiyFp.Minus(ref tooHigh, ref w).f, unsafeInterval.f, rest, (ulong)divisor << -one.e, unit)); } divisor /= 10; } // The integrals have been generated. We are at the point of the decimal // separator. In the following loop we simply multiply the remaining digits by // 10 and divide by one. We just need to pay attention to multiply associated // data (like the interval or 'unit'), too. // Note that the multiplication by 10 does not overflow, because w.e >= -60 // and thus one.e >= -60. for (; ;) { fractionals *= 10; unit *= 10; unsafeInterval.f *= 10; // Integer division by one. var digit = (int)(fractionals >> -one.e); buffer[length] = (byte)((byte)'0' + digit); length++; fractionals &= one.f - 1; // Modulo by one. kappa--; if (fractionals < unsafeInterval.f) { return(RoundWeed(buffer, length, DiyFp.Minus(ref tooHigh, ref w).f *unit, unsafeInterval.f, fractionals, one.f, unit)); } } }
public Double(DiyFp d) { d64_ = DiyFpToUint64(d); }
// If the function returns true then the result is the correct double. // Otherwise it is either the correct double or the double that is just below // the correct double. private static bool DiyFpStrToDouble(ReadOnlySpan <byte> buffer, int exponent, out double result) { ReadDiyFp(buffer, out var input, out var remainingDecimals); // Since we may have dropped some digits the input is not accurate. // If remaining_decimals is different than 0 than the error is at most // .5 ulp (unit in the last place). // We don't want to deal with fractions and therefore keep a common // denominator. const int kDenominatorLog = 3; const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remainingDecimals; var error = (ulong)(remainingDecimals == 0 ? 0 : kDenominator / 2); var oldE = input.e; input.Normalize(); error <<= oldE - input.e; if (exponent < PowersOfTenCache.kMinDecimalExponent) { result = 0.0; return(true); } PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out var cachedPower, out var cachedDecimalExponent); if (cachedDecimalExponent != exponent) { var adjustmentExponent = exponent - cachedDecimalExponent; var adjustmentPower = AdjustmentPowerOfTen(adjustmentExponent); input.Multiply(ref adjustmentPower); if (KMaxUint64DecimalDigits - buffer.Length >= adjustmentExponent) { // The product of input with the adjustment power fits into a 64 bit // integer. } else { // The adjustment power is exact. There is hence only an error of 0.5. error += kDenominator / 2; } } input.Multiply(ref cachedPower); // The error introduced by a multiplication of a*b equals // error_a + error_b + error_a*error_b/2^64 + 0.5 // Substituting a with 'input' and b with 'cached_power' we have // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 const int errorB = kDenominator / 2; var errorAb = error == 0 ? 0 : 1; // We round up to 1. const int fixedError = kDenominator / 2; error += (ulong)(errorB + errorAb + fixedError); oldE = input.e; input.Normalize(); error <<= oldE - input.e; // See if the double's significand changes if we add/subtract the error. var orderOfMagnitude = DiyFp.kSignificandSize + input.e; var effectiveSignificandSize = IeeeDouble.SignificandSizeForOrderOfMagnitude(orderOfMagnitude); var precisionDigitsCount = DiyFp.kSignificandSize - effectiveSignificandSize; if (precisionDigitsCount + kDenominatorLog >= DiyFp.kSignificandSize) { // This can only happen for very small denormals. In this case the // half-way multiplied by the denominator exceeds the range of an uint64. // Simply shift everything to the right. var shiftAmount = precisionDigitsCount + kDenominatorLog - DiyFp.kSignificandSize + 1; input.f >>= shiftAmount; input.e += shiftAmount; // We add 1 for the lost precision of error, and kDenominator for // the lost precision of input.f(). error = (error >> shiftAmount) + 1 + kDenominator; precisionDigitsCount -= shiftAmount; } // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. const ulong one64 = 1; var precisionBitsMask = (one64 << precisionDigitsCount) - 1; var precisionBits = input.f & precisionBitsMask; var halfWay = one64 << (precisionDigitsCount - 1); precisionBits *= kDenominator; halfWay *= kDenominator; var roundedInput = new DiyFp(input.f >> precisionDigitsCount, input.e + precisionDigitsCount); if (precisionBits >= halfWay + error) { roundedInput.f++; } // If the last_bits are too close to the half-way case than we are too // inaccurate and round down. In this case we return false so that we can // fall back to a more precise algorithm. result = new IeeeDouble(roundedInput).Value(); // Too imprecise. The caller will have to fall back to a slower version. // However the returned number is guaranteed to be either the correct // double, or the next-lower double. return(halfWay - error >= precisionBits || precisionBits >= halfWay + error); }