// Reads a DiyFp from the buffer. // The returned DiyFp is not necessarily normalized. // If remaining_decimals is zero then the returned DiyFp is accurate. // Otherwise it has been rounded and has error of at most 1/2 ulp. static void ReadDiyFp(Vector buffer, out DiyFp result, out int remaining_decimals) { int read_digits; uint64_t significand = ReadUint64(buffer, out read_digits); if (buffer.length() == read_digits) { result = new DiyFp(significand, 0); remaining_decimals = 0; } else { // Round the significand. if (buffer[read_digits] >= '5') { significand++; } // Compute the binary exponent. int exponent = 0; result = new DiyFp(significand, exponent); remaining_decimals = buffer.length() - read_digits; } }
public static void GetCachedPowerForDecimalExponent(int requestedExponent, out DiyFp power, out int foundExponent) { var index = (requestedExponent + KCachedPowersOffset) / KDecimalExponentDistance; var cachedPower = KCachedPowers[index]; power = new DiyFp(cachedPower.Significand, cachedPower.BinaryExponent); foundExponent = cachedPower.DecimalExponent; }
public static void GetCachedPowerForDecimalExponent(int requested_exponent, out DiyFp power, out int found_exponent) { int index = (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; CachedPower cached_power = kCachedPowers[index]; power = new DiyFp(cached_power.significand, cached_power.binary_exponent); found_exponent = cached_power.decimal_exponent; }
public static void GetCachedPowerForBinaryExponentRange( int minExponent, int maxExponent, out DiyFp power, out int decimalExponent) { var kQ = DiyFp.KSignificandSize; var k = Math.Ceiling((minExponent + kQ - 1) * KD1Log210); var foo = KCachedPowersOffset; var index = (foo + (int)(k) - 1) / KDecimalExponentDistance + 1; var cachedPower = KCachedPowers[index]; // (void)max_exponent; // Mark variable as used. decimalExponent = cachedPower.DecimalExponent; power = new DiyFp(cachedPower.Significand, cachedPower.BinaryExponent); }
public static void GetCachedPowerForBinaryExponentRange( int min_exponent, int max_exponent, out DiyFp power, out int decimal_exponent) { int kQ = DiyFp.kSignificandSize; double k = Math.Ceiling((min_exponent + kQ - 1) * kD_1_LOG2_10); int foo = kCachedPowersOffset; int index = (foo + (int)(k) - 1) / kDecimalExponentDistance + 1; CachedPower cached_power = kCachedPowers[index]; // (void)max_exponent; // Mark variable as used. decimal_exponent = cached_power.decimal_exponent; power = new DiyFp(cached_power.significand, cached_power.binary_exponent); }
// Computes the two boundaries of this. // The bigger boundary (m_plus) is normalized. The lower boundary has the same // exponent as m_plus. // Precondition: the value encoded by this Single must be greater than 0. public void NormalizedBoundaries(out DiyFp out_m_minus, out DiyFp out_m_plus) { DiyFp v = this.AsDiyFp(); var __ = new DiyFp((v.f << 1) + 1, v.e - 1); DiyFp m_plus = DiyFp.Normalize(ref __); DiyFp m_minus; if (LowerBoundaryIsCloser()) { m_minus = new DiyFp((v.f << 2) - 1, v.e - 2); } else { m_minus = new DiyFp((v.f << 1) - 1, v.e - 1); } m_minus.f = (m_minus.f << (m_minus.e - m_plus.e)); m_minus.e = (m_plus.e); out_m_plus = m_plus; out_m_minus = m_minus; }
public static ulong DiyFpToUint64(DiyFp diy_fp) { ulong significand = diy_fp.f; int exponent = diy_fp.e; while (significand > kHiddenBit + kSignificandMask) { significand >>= 1; exponent++; } if (exponent >= kMaxExponent) { return(kInfinity); } if (exponent < kDenormalExponent) { return(0); } while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { significand <<= 1; exponent--; } ulong biased_exponent; if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { biased_exponent = 0; } else { biased_exponent = (ulong)(exponent + kExponentBias); } return((significand & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize)); }
public Double(DiyFp d) { d64_ = DiyFpToUint64(d); }
// If the function returns true then the result is the correct double. // Otherwise it is either the correct double or the double that is just below // the correct double. static bool DiyFpStrtod(Vector buffer, int exponent, out double result) { DiyFp input; int remaining_decimals; ReadDiyFp(buffer, out input, out remaining_decimals); // Since we may have dropped some digits the input is not accurate. // If remaining_decimals is different than 0 than the error is at most // .5 ulp (unit in the last place). // We don't want to deal with fractions and therefore keep a common // denominator. const int kDenominatorLog = 3; const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remaining_decimals; uint64_t error = (ulong)(remaining_decimals == 0 ? 0 : kDenominator / 2); int old_e = input.e; input.Normalize(); error <<= old_e - input.e; if (exponent < PowersOfTenCache.kMinDecimalExponent) { result = 0.0; return(true); } DiyFp cached_power; int cached_decimal_exponent; PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out cached_power, out cached_decimal_exponent); if (cached_decimal_exponent != exponent) { int adjustment_exponent = exponent - cached_decimal_exponent; DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); input.Multiply(ref adjustment_power); if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { // The product of input with the adjustment power fits into a 64 bit // integer. } else { // The adjustment power is exact. There is hence only an error of 0.5. error += kDenominator / 2; } } input.Multiply(ref cached_power); // The error introduced by a multiplication of a*b equals // error_a + error_b + error_a*error_b/2^64 + 0.5 // Substituting a with 'input' and b with 'cached_power' we have // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 int error_b = kDenominator / 2; int error_ab = (error == 0 ? 0 : 1); // We round up to 1. int fixed_error = kDenominator / 2; error += (ulong)(error_b + error_ab + fixed_error); old_e = input.e; input.Normalize(); error <<= old_e - input.e; // See if the double's significand changes if we add/subtract the error. int order_of_magnitude = DiyFp.kSignificandSize + input.e; int effective_significand_size = Double.SignificandSizeForOrderOfMagnitude(order_of_magnitude); int precision_digits_count = DiyFp.kSignificandSize - effective_significand_size; if (precision_digits_count + kDenominatorLog >= DiyFp.kSignificandSize) { // This can only happen for very small denormals. In this case the // half-way multiplied by the denominator exceeds the range of an uint64. // Simply shift everything to the right. int shift_amount = (precision_digits_count + kDenominatorLog) - DiyFp.kSignificandSize + 1; input.f = (input.f >> shift_amount); input.e = (input.e + shift_amount); // We add 1 for the lost precision of error, and kDenominator for // the lost precision of input.f(). error = (error >> shift_amount) + 1 + kDenominator; precision_digits_count -= shift_amount; } // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. uint64_t one64 = 1; uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; uint64_t precision_bits = input.f & precision_bits_mask; uint64_t half_way = one64 << (precision_digits_count - 1); precision_bits *= kDenominator; half_way *= kDenominator; DiyFp rounded_input = new DiyFp(input.f >> precision_digits_count, input.e + precision_digits_count); if (precision_bits >= half_way + error) { rounded_input.f = (rounded_input.f + 1); } // If the last_bits are too close to the half-way case than we are too // inaccurate and round down. In this case we return false so that we can // fall back to a more precise algorithm. result = new Double(rounded_input).value(); if (half_way - error < precision_bits && precision_bits < half_way + error) { // Too imprecise. The caller will have to fall back to a slower version. // However the returned number is guaranteed to be either the correct // double, or the next-lower double. return(false); } else { return(true); } }