/// <summary> /// Modifies the initial estimate until the closest double-precision number to the desired /// value is found. /// </summary> /// <param name="initialEstimate"> The initial estimate. Assumed to be very close to the /// result. </param> /// <param name="base10Exponent"> The power-of-ten scale factor. </param> /// <param name="desiredValue"> The desired value, already scaled using the power-of-ten /// scale factor. </param> /// <returns> The closest double-precision number to the desired value. If there are two /// such values, the one with the least significant bit set to zero is returned. </returns> private static double RefineEstimate(double initialEstimate, int base10Exponent, BigInteger desiredValue) { // Numbers with 16 digits or more are tricky because rounding error can cause the // result to be out by one or more ULPs (units in the last place). // The algorithm is as follows: // 1. Use the initially calculated result as an estimate. // 2. Create a second estimate by modifying the estimate by one ULP. // 3. Calculate the actual value of both estimates to precision X (using arbitrary // precision arithmetic). // 4. If they are both above the desired value then decrease the estimates by 1 // ULP and goto step 3. // 5. If they are both below the desired value then increase up the estimates by // 1 ULP and goto step 3. // 6. One estimate must now be above the desired value and one below. // 7. If one is estimate is clearly closer to the desired value than the other, // then return that estimate. // 8. Increase the precision by 32 bits. // 9. If the precision is less than or equal to 160 bits goto step 3. // 10. Assume that the estimates are equally close to the desired value; return the // value with the least significant bit equal to 0. int direction = double.IsPositiveInfinity(initialEstimate) ? -1 : 1; int precision = 32; // Calculate the candidate value by modifying the last bit. double result = initialEstimate; double result2 = AddUlps(result, direction); // Figure out our multiplier. Either base10Exponent is positive, in which case we // multiply actual1 and actual2, or it's negative, in which case we multiply // desiredValue. BigInteger multiplier = BigInteger.One; if (base10Exponent < 0) { multiplier = BigInteger.Pow(10, -base10Exponent); } else if (base10Exponent > 0) { desiredValue = BigInteger.Multiply(desiredValue, BigInteger.Pow(10, base10Exponent)); } while (precision <= 160) { // Scale the candidate values to a big integer. var actual1 = ScaleToInteger(result, multiplier, precision); var actual2 = ScaleToInteger(result2, multiplier, precision); // Calculate the differences between the candidate values and the desired value. var baseline = BigInteger.LeftShift(desiredValue, precision); var diff1 = BigInteger.Subtract(actual1, baseline); var diff2 = BigInteger.Subtract(actual2, baseline); if (diff1.Sign == direction && diff2.Sign == direction) { // We're going the wrong way! direction = -direction; result2 = AddUlps(result, direction); } else if (diff1.Sign == -direction && diff2.Sign == -direction) { // Going the right way, but need to go further. result = result2; result2 = AddUlps(result, direction); } else { // Found two values that bracket the actual value. // If one candidate value is closer to the actual value by at least 2 (one // doesn't cut it because of the integer division) then use that value. diff1 = BigInteger.Abs(diff1); diff2 = BigInteger.Abs(diff2); if (BigInteger.Compare(diff1, BigInteger.Subtract(diff2, BigInteger.One)) < 0) { return(result); } if (BigInteger.Compare(diff2, BigInteger.Subtract(diff1, BigInteger.One)) < 0) { return(result2); } // Not enough precision to determine the correct answer, or it's a halfway case. // Increase the precision. precision += 32; } // If result2 is NaN then we have gone too far. if (double.IsNaN(result2) == true) { return(result); } } // Even with heaps of precision there is no clear winner. // Assume this is a halfway case: choose the floating-point value with its least // significant bit equal to 0. return((BitConverter.DoubleToInt64Bits(result) & 1) == 0 ? result : result2); }