Ejemplo n.º 1
0
        // If the function returns true then the result is the correct double.
        // Otherwise it is either the correct double or the double that is just below
        // the correct double.
        static bool DiyFpStrtod(Vector buffer,
                                int exponent,
                                out double result)
        {
            DiyFp input;
            int   remaining_decimals;

            ReadDiyFp(buffer, out input, out remaining_decimals);
            // Since we may have dropped some digits the input is not accurate.
            // If remaining_decimals is different than 0 than the error is at most
            // .5 ulp (unit in the last place).
            // We don't want to deal with fractions and therefore keep a common
            // denominator.
            const int kDenominatorLog = 3;
            const int kDenominator    = 1 << kDenominatorLog;

            // Move the remaining decimals into the exponent.
            exponent += remaining_decimals;
            uint64_t error = (ulong)(remaining_decimals == 0 ? 0 : kDenominator / 2);

            int old_e = input.e;

            input.Normalize();
            error <<= old_e - input.e;

            if (exponent < PowersOfTenCache.kMinDecimalExponent)
            {
                result = 0.0;
                return(true);
            }
            DiyFp cached_power;
            int   cached_decimal_exponent;

            PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent,
                                                              out cached_power,
                                                              out cached_decimal_exponent);

            if (cached_decimal_exponent != exponent)
            {
                int   adjustment_exponent = exponent - cached_decimal_exponent;
                DiyFp adjustment_power    = AdjustmentPowerOfTen(adjustment_exponent);
                input.Multiply(ref adjustment_power);
                if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent)
                {
                    // The product of input with the adjustment power fits into a 64 bit
                    // integer.
                }
                else
                {
                    // The adjustment power is exact. There is hence only an error of 0.5.
                    error += kDenominator / 2;
                }
            }

            input.Multiply(ref cached_power);
            // The error introduced by a multiplication of a*b equals
            //   error_a + error_b + error_a*error_b/2^64 + 0.5
            // Substituting a with 'input' and b with 'cached_power' we have
            //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
            //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
            int error_b     = kDenominator / 2;
            int error_ab    = (error == 0 ? 0 : 1); // We round up to 1.
            int fixed_error = kDenominator / 2;

            error += (ulong)(error_b + error_ab + fixed_error);

            old_e = input.e;
            input.Normalize();
            error <<= old_e - input.e;

            // See if the double's significand changes if we add/subtract the error.
            int order_of_magnitude         = DiyFp.kSignificandSize + input.e;
            int effective_significand_size = Double.SignificandSizeForOrderOfMagnitude(order_of_magnitude);
            int precision_digits_count     = DiyFp.kSignificandSize - effective_significand_size;

            if (precision_digits_count + kDenominatorLog >= DiyFp.kSignificandSize)
            {
                // This can only happen for very small denormals. In this case the
                // half-way multiplied by the denominator exceeds the range of an uint64.
                // Simply shift everything to the right.
                int shift_amount = (precision_digits_count + kDenominatorLog) -
                                   DiyFp.kSignificandSize + 1;
                input.f = (input.f >> shift_amount);
                input.e = (input.e + shift_amount);
                // We add 1 for the lost precision of error, and kDenominator for
                // the lost precision of input.f().
                error = (error >> shift_amount) + 1 + kDenominator;
                precision_digits_count -= shift_amount;
            }
            // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
            uint64_t one64 = 1;
            uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
            uint64_t precision_bits      = input.f & precision_bits_mask;
            uint64_t half_way            = one64 << (precision_digits_count - 1);

            precision_bits *= kDenominator;
            half_way       *= kDenominator;
            DiyFp rounded_input = new DiyFp(input.f >> precision_digits_count, input.e + precision_digits_count);

            if (precision_bits >= half_way + error)
            {
                rounded_input.f = (rounded_input.f + 1);
            }
            // If the last_bits are too close to the half-way case than we are too
            // inaccurate and round down. In this case we return false so that we can
            // fall back to a more precise algorithm.

            result = new Double(rounded_input).value();
            if (half_way - error < precision_bits && precision_bits < half_way + error)
            {
                // Too imprecise. The caller will have to fall back to a slower version.
                // However the returned number is guaranteed to be either the correct
                // double, or the next-lower double.
                return(false);
            }
            else
            {
                return(true);
            }
        }
Ejemplo n.º 2
0
        // If the function returns true then the result is the correct double.
        // Otherwise it is either the correct double or the double that is just below
        // the correct double.
        private static bool DiyFpStrToDouble(ReadOnlySpan <byte> buffer, int exponent, out double result)
        {
            ReadDiyFp(buffer, out var input, out var remainingDecimals);
            // Since we may have dropped some digits the input is not accurate.
            // If remaining_decimals is different than 0 than the error is at most
            // .5 ulp (unit in the last place).
            // We don't want to deal with fractions and therefore keep a common
            // denominator.
            const int kDenominatorLog = 3;
            const int kDenominator    = 1 << kDenominatorLog;

            // Move the remaining decimals into the exponent.
            exponent += remainingDecimals;
            var error = (ulong)(remainingDecimals == 0 ? 0 : kDenominator / 2);

            var oldE = input.e;

            input.Normalize();
            error <<= oldE - input.e;

            if (exponent < PowersOfTenCache.kMinDecimalExponent)
            {
                result = 0.0;
                return(true);
            }

            PowersOfTenCache.GetCachedPowerForDecimalExponent(exponent, out var cachedPower, out var cachedDecimalExponent);

            if (cachedDecimalExponent != exponent)
            {
                var adjustmentExponent = exponent - cachedDecimalExponent;
                var adjustmentPower    = AdjustmentPowerOfTen(adjustmentExponent);
                input.Multiply(ref adjustmentPower);
                if (KMaxUint64DecimalDigits - buffer.Length >= adjustmentExponent)
                {
                    // The product of input with the adjustment power fits into a 64 bit
                    // integer.
                }
                else
                {
                    // The adjustment power is exact. There is hence only an error of 0.5.
                    error += kDenominator / 2;
                }
            }

            input.Multiply(ref cachedPower);
            // The error introduced by a multiplication of a*b equals
            //   error_a + error_b + error_a*error_b/2^64 + 0.5
            // Substituting a with 'input' and b with 'cached_power' we have
            //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
            //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
            const int errorB     = kDenominator / 2;
            var       errorAb    = error == 0 ? 0 : 1; // We round up to 1.
            const int fixedError = kDenominator / 2;

            error += (ulong)(errorB + errorAb + fixedError);

            oldE = input.e;
            input.Normalize();
            error <<= oldE - input.e;

            // See if the double's significand changes if we add/subtract the error.
            var orderOfMagnitude         = DiyFp.kSignificandSize + input.e;
            var effectiveSignificandSize = IeeeDouble.SignificandSizeForOrderOfMagnitude(orderOfMagnitude);
            var precisionDigitsCount     = DiyFp.kSignificandSize - effectiveSignificandSize;

            if (precisionDigitsCount + kDenominatorLog >= DiyFp.kSignificandSize)
            {
                // This can only happen for very small denormals. In this case the
                // half-way multiplied by the denominator exceeds the range of an uint64.
                // Simply shift everything to the right.
                var shiftAmount = precisionDigitsCount + kDenominatorLog -
                                  DiyFp.kSignificandSize + 1;
                input.f >>= shiftAmount;
                input.e  += shiftAmount;
                // We add 1 for the lost precision of error, and kDenominator for
                // the lost precision of input.f().
                error = (error >> shiftAmount) + 1 + kDenominator;
                precisionDigitsCount -= shiftAmount;
            }

            // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
            const ulong one64             = 1;
            var         precisionBitsMask = (one64 << precisionDigitsCount) - 1;
            var         precisionBits     = input.f & precisionBitsMask;
            var         halfWay           = one64 << (precisionDigitsCount - 1);

            precisionBits *= kDenominator;
            halfWay       *= kDenominator;
            var roundedInput = new DiyFp(input.f >> precisionDigitsCount, input.e + precisionDigitsCount);

            if (precisionBits >= halfWay + error)
            {
                roundedInput.f++;
            }
            // If the last_bits are too close to the half-way case than we are too
            // inaccurate and round down. In this case we return false so that we can
            // fall back to a more precise algorithm.

            result = new IeeeDouble(roundedInput).Value();
            // Too imprecise. The caller will have to fall back to a slower version.
            // However the returned number is guaranteed to be either the correct
            // double, or the next-lower double.
            return(halfWay - error >= precisionBits || precisionBits >= halfWay + error);
        }