Beispiel #1
0
        public void ShowDoubleRoundingPriestExplicit()
        {
            double a          = DoubleConverter.FromFloatingPointBinaryString("1" + '0'.Repeat(51) + "1"); // 100....00001 (53-bits wide)
            double b          = DoubleConverter.FromFloatingPointBinaryString("0.0" + '1'.Repeat(53));     //            0.0111...111 (53 1's)
            double expected53 = a;
            double expected64 = a + 1.0;                                                                   // The point is that this is different to expceted53.

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(a));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(b));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected53));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected64));

            // Set Fpu to 53-bit precision (the default)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl);

            double result53 = a + b;

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53));
            Assert.AreEqual(expected53, result53);

            // Explicit rounding makes no difference here (since we're in Double53bits precision FPU mode)
            result53 = (double)(a + b);
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53));
            Assert.AreEqual(expected53, result53);

            // Set Fpu to 64-bit precision (extended precision)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl);
            double result64 = (double)(a + b);

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result64));
            Assert.AreEqual(expected64, result64);
        }
Beispiel #2
0
        public void ShowDoubleRoundingPriest()
        {
            double a          = Math.Pow(2.0, 52.0) + 1.0;
            double b          = 0.5 - Math.Pow(2.0, -54.0);
            double expected53 = a;
            double expected64 = a + 1.0; // Math.Pow(2.0, 52.0) + 2.0;

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(a));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(b));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected53));
            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(expected64));

            // Set Fpu to 53-bit precision (the default)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl);

            double result53 = a + b;

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result53));
            Assert.AreEqual(expected53, result53);

            // Set Fpu to 64-bit precision (extended precision)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl);
            double result64 = (double)(a + b);

            Debug.Print(DoubleConverter.ToFloatingPointBinaryString(result64));
            Assert.AreEqual(expected64, result64);
        }
Beispiel #3
0
        public void ShowDoubleRounding()
        {
            double a          = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110
            double b          = DoubleConverter.FromFloatingPointBinaryString("0.100000000001");     // 000....00000.100000000001
            double expected53 = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "1"); // 111....11111
            double expected64 = a;

            // Set Fpu to 53-bit precision (the default)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl);

            double result53 = a + b;

            Assert.AreEqual(expected53, result53);

            // Set Fpu to 64-bit precision (extended precision)
            FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl);
            double result64 = (double)(a + b);

            Assert.AreEqual(expected64, result64);

            double result64_0 = (a + b) - a;

            Assert.AreNotEqual(0.0, result64_0);
            Assert.AreNotEqual(b, result64_0);
            Assert.AreEqual(0.5, result64_0);       // 000....00000.1
        }
        public void TestMethod1()
        {
            double before = TestCalc();

            Assert.AreEqual(0.0, before);

            var  oldState = new FpuControl.State(FpuControl.GetState());
            var  oldPc    = oldState.PrecisionControl;
            uint err      = FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl);
            var  newState = new FpuControl.State(FpuControl.GetState());
            var  newPc    = newState.PrecisionControl;

            double after = TestCalc();

            Assert.AreEqual(0.5, after);

            double afterSafe = TestCalcSafe();

            Assert.AreEqual(0.0, afterSafe);

            FpuControl.SetState((uint)oldState.PrecisionControl, FpuControl.Mask.PrecisionControl);

            double reset = TestCalc();

            Assert.AreEqual(0.0, reset);
        }
        public void TestPrecision53()
        {
            double a          = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110
            double b          = DoubleConverter.FromFloatingPointBinaryString("0.1");                // 000....00000.1
            double expected53 = a;

            FpuControl.SetState((uint)FpuControl.PrecisionControl.Double53Bits, FpuControl.Mask.PrecisionControl);
            double result53 = (double)(a + b);

            Assert.AreEqual(expected53, result53);

            double result53_0 = (a + b) - a;

            Assert.AreEqual(0.0, result53_0);

            double result53_X = ((double)(a + b)) - a;

            Assert.AreEqual(0.0, result53_X);
        }
        public void TestPrecision64()
        {
            double a          = DoubleConverter.FromFloatingPointBinaryString('1'.Repeat(52) + "0"); // 111....11110
            double b          = DoubleConverter.FromFloatingPointBinaryString("0.1");                // 000....00000.1
            double expected64 = a;

            FpuControl.SetState((uint)FpuControl.PrecisionControl.Extended64Bits, FpuControl.Mask.PrecisionControl);
            double result64 = (double)(a + b);

            Assert.AreEqual(expected64, result64);

            double result64_b = (a + b) - a;

            Assert.AreEqual(b, result64_b);

            double result64_0 = ((double)(a + b)) - a;

            Assert.AreNotEqual(b, result64_0);
            Assert.AreEqual(0.0, result64_0);
        }