public void ShowDoubleRoundingPriestExplicit() { double a = DoubleConverter.FromFloatingPointBinaryString("1" + '0'.Repeat(51) + "1"); // 100....00001 (53-bits wide) double b = DoubleConverter.FromFloatingPointBinaryString("0.0" + '1'.Repeat(53)); // 0.0111...111 (53 1's) double expected53 = a; double expected64 = a + 1.0; // The point is that this is different to expceted53. Debug.Print(DoubleConverter.ToFloatingPointBinaryString(a)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(b)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected53)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected64)); // Set Fpu to 53-bit precision (the default) FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl); double result53 = a + b; Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53)); Assert.AreEqual(expected53, result53); // Explicit rounding makes no difference here (since we're in Double53bits precision FPU mode) result53 = (double)(a + b); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53)); Assert.AreEqual(expected53, result53); // Set Fpu to 64-bit precision (extended precision) FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl); double result64 = (double)(a + b); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result64)); Assert.AreEqual(expected64, result64); }
public void ShowDoubleRoundingPriest() { double a = Math.Pow(2.0, 52.0) + 1.0; double b = 0.5 - Math.Pow(2.0, -54.0); double expected53 = a; double expected64 = a + 1.0; // Math.Pow(2.0, 52.0) + 2.0; Debug.Print(DoubleConverter.ToFloatingPointBinaryString(a)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(b)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected53)); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected64)); // Set Fpu to 53-bit precision (the default) FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl); double result53 = a + b; Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53)); Assert.AreEqual(expected53, result53); // Set Fpu to 64-bit precision (extended precision) FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl); double result64 = (double)(a + b); Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result64)); Assert.AreEqual(expected64, result64); }
public void ShowDoubleRounding() { double a = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110 double b = DoubleConverter.FromFloatingPointBinaryString("0.100000000001"); // 000....00000.100000000001 double expected53 = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "1"); // 111....11111 double expected64 = a; // Set Fpu to 53-bit precision (the default) FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl); double result53 = a + b; Assert.AreEqual(expected53, result53); // Set Fpu to 64-bit precision (extended precision) FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl); double result64 = (double)(a + b); Assert.AreEqual(expected64, result64); double result64_0 = (a + b) - a; Assert.AreNotEqual(0.0, result64_0); Assert.AreNotEqual(b, result64_0); Assert.AreEqual(0.5, result64_0); // 000....00000.1 }
public void TestMethod1() { double before = TestCalc(); Assert.AreEqual(0.0, before); var oldState = new FpuControl.State(FpuControl.GetState()); var oldPc = oldState.PrecisionControl; uint err = FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl); var newState = new FpuControl.State(FpuControl.GetState()); var newPc = newState.PrecisionControl; double after = TestCalc(); Assert.AreEqual(0.5, after); double afterSafe = TestCalcSafe(); Assert.AreEqual(0.0, afterSafe); FpuControl.SetState((uint)oldState.PrecisionControl, FpuControl.Mask.PrecisionControl); double reset = TestCalc(); Assert.AreEqual(0.0, reset); }
public void TestPrecision53() { double a = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110 double b = DoubleConverter.FromFloatingPointBinaryString("0.1"); // 000....00000.1 double expected53 = a; FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl); double result53 = (double)(a + b); Assert.AreEqual(expected53, result53); double result53_0 = (a + b) - a; Assert.AreEqual(0.0, result53_0); double result53_X = ((double)(a + b)) - a; Assert.AreEqual(0.0, result53_X); }
public void TestPrecision64() { double a = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110 double b = DoubleConverter.FromFloatingPointBinaryString("0.1"); // 000....00000.1 double expected64 = a; FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl); double result64 = (double)(a + b); Assert.AreEqual(expected64, result64); double result64_b = (a + b) - a; Assert.AreEqual(b, result64_b); double result64_0 = ((double)(a + b)) - a; Assert.AreNotEqual(b, result64_0); Assert.AreEqual(0.0, result64_0); }