예제 #1
0
        public static Vector256 <double> FusedMultiplyAdd(Vector256 <double> x, Vector256 <double> y, Vector256 <double> z)
        {
            if (Fma.IsSupported)
            {
                return(Fma.MultiplyAdd(x, y, z));
            }

            return(SoftwareFallback(x, y, z));
예제 #2
0
        public void RunLclFldScenario()
        {
            var test   = new SimpleTernaryOpTest__MultiplyAddDouble();
            var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
예제 #3
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
예제 #4
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Fma.MultiplyAdd(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
예제 #5
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleTernaryOpTest__MultiplyAddSingle();
            var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
예제 #6
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
예제 #7
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr);
            var result   = Fma.MultiplyAdd(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
예제 #8
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr));
            var result   = Fma.MultiplyAdd(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
예제 #9
0
 public static f32 FMulAdd_f32(f32 a, f32 b, f32 c)
 {
     if (Fma.IsSupported)
     {
         return(Fma.MultiplyAdd(a, b, c));
     }
     else
     {
         return(Add(Mul(a, b), c));
     }
 }
예제 #10
0
        public void RunBasicScenario_Load()
        {
            var result = Fma.MultiplyAdd(
                Sse2.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Sse2.LoadVector128((Double *)(_dataTable.inArray2Ptr)),
                Sse2.LoadVector128((Double *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #11
0
        static unsafe float fmaTest()
        {
            vec a;
            var b = Vector128.Create(1f);
            var c = Vector128.Create(2f);
            var d = Vector128.Create(3f);

            c = Fma.MultiplyAdd(Sse.LoadVector128((float *)&a), b, c);

            return(Sse.Add(c, d).ToScalar());
        }
예제 #12
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Fma.MultiplyAdd(
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #13
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Fma.MultiplyAdd(
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #14
0
        public void RunClsVarScenario()
        {
            var result = Fma.MultiplyAdd(
                _clsVar1,
                _clsVar2,
                _clsVar3
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
        }
예제 #15
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var firstOp  = Avx.LoadVector256((Single *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadVector256((Single *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadVector256((Single *)(_dataTable.inArray3Ptr));
            var result   = Fma.MultiplyAdd(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
예제 #16
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var firstOp  = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray3Ptr);
            var result   = Fma.MultiplyAdd(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
예제 #17
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var op2    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var op3    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray3Ptr));
            var result = Fma.MultiplyAdd(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
예제 #18
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var op3    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr);
            var result = Fma.MultiplyAdd(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
 private static Vector256 <float> MultiplyAdd(Vector256 <float> src1, Vector256 <float> src2, Vector256 <float> src3)
 {
     if (Fma.IsSupported)
     {
         return(Fma.MultiplyAdd(src1, src2, src3));
     }
     else
     {
         Vector256 <float> product = Avx.Multiply(src1, src2);
         return(Avx.Add(product, src3));
     }
 }
예제 #20
0
        public static float DotMultiplyIntrinsicWFma(ref Memory <float> mem1, ref Memory <float> mem2)
        {
            var span1   = mem1.Span;
            var span2   = mem2.Span;
            var cnt     = Math.Min(span1.Length, span2.Length);
            var v3      = Vector256.CreateScalarUnsafe(0f);
            var vectLen = Vector256 <float> .Count;
            var vectCnt = cnt / vectLen;

#if TEST
            var file = Path.GetTempFileName();
            using var writer = new StreamWriter(file);
            Console.WriteLine($"Intrinsic with Fma Mult. results will be written into {file}");
#endif

            int i;
            unsafe
            {
                for (i = 0; i < vectCnt; i++)
                {
                    var index = i * vectLen;
                    var v1    = Avx.LoadVector256((float *)Unsafe.AsPointer(ref span1[index]));
                    var v2    = Avx.LoadVector256((float *)Unsafe.AsPointer(ref span2[index]));
                    v3 = Fma.MultiplyAdd(v1, v2, v3);
#if TEST
                    writer.WriteLine($"{v1.ToString()}\t{v2.ToString()}\t{v3.ToString()}");
#endif
                }
            }

            var total = 0f;
            for (i = 0; i < vectLen; i++)
            {
                total += v3.GetElement(i);
            }

            for (i = vectCnt * vectLen; i < cnt; i++)
            {
                total += span1[i] * span2[i];
            }

            if (span1.Length != span2.Length)
            {
                var h = span1.Length > span2.Length ? span1 : span2;
                for (var j = cnt; j < h.Length; j++)
                {
                    total += h[j];
                }
            }

            return(total);
        }
예제 #21
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Fma.MultiplyAdd(
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #22
0
 public void FMA()
 {
     for (var a = 0; a < 1; a++)
     {
         var operand0 = Vector256.Create(0.0, 1.0, 2.0, 3.0);
         var operand1 = Vector256.Create(3.0, 2.0, 1.0, 0.0);
         var operand2 = Vector256.Create(1.0, 1.0, 1.0, 1.0);
         for (var b = 0; b < 1; b++)
         {
             var result = Fma.MultiplyAdd(operand0, operand1, operand2);
         }
     }
 }
예제 #23
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Fma.MultiplyAdd(
                Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)),
                Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)),
                Sse.LoadVector128((Single *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #24
0
        public void RunBasicScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));

            var result = Fma.MultiplyAdd(
                Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)),
                Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
예제 #25
0
        public void Vector256FloatMultipleOps()
        {
            ReadOnlySpan <Vector256 <float> > d1 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data, 0, numberOfFloatItems));
            ReadOnlySpan <Vector256 <float> > d2 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data2, 0, numberOfFloatItems));
            ReadOnlySpan <Vector256 <float> > d3 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data3, 0, numberOfFloatItems));
            Span <Vector256 <float> >         r  = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(result, 0, numberOfFloatItems));

            for (int i = 0; i < d1.Length; i++)
            {
                r[i] = Fma.MultiplyAdd(d1[i], d2[i], d3[i]);
                r[i] = Fma.MultiplyAdd(r[i], d1[i], d1[i]);
                r[i] = Fma.MultiplyAdd(d1[i], d2[i], r[i]);
            }
        }
예제 #26
0
        private unsafe void TestAddSum(byte[] vs)
        {
            fixed(byte *p = vs)
            {
                var v  = Avx.LoadVector256(p);
                var v2 = Avx.LoadVector256(p + 32);
                //Avx.MultipleSumAbsoluteDifferences;
                Vector256 <int>   i1 = Avx2.ConvertToVector256Int32(p);
                Vector256 <float> f1 = Avx.ConvertToVector256Single(i1);
                Vector256 <float> m1 = Avx.Multiply(f1, f1);

                Vector128 <int>    i128 = Sse41.ConvertToVector128Int32(p);
                Vector256 <double> d256 = Avx.ConvertToVector256Double(i128);
                var dZero = Vector256 <double> .Zero;
                Vector256 <double> ma1 = Fma.MultiplyAdd(d256, d256, dZero);

                var i256  = Avx2.ConvertToVector256Int32(p);
                var f256  = Avx.ConvertToVector256Single(i256);
                var fZero = Vector256 <float> .Zero;
                var ma2   = Fma.MultiplyAdd(f256, f256, fZero);

                Vector128 <float> s128 = Sse2.ConvertToVector128Single(i128);
                Vector128 <float> ms   = Sse.MultiplyScalar(s128, s128);

//                x86 / x64 SIMD命令一覧表(SSE~AVX2)
//https://www.officedaytime.com/tips/simd.html
                //                pmaddwd
                //https://www.officedaytime.com/tips/simdimg/si.php?f=pmaddwd

                Vector128 <short> sh128 = Sse41.ConvertToVector128Int16(p);
                Vector128 <int>   vv3   = Avx.MultiplyAddAdjacent(sh128, sh128);

                var neko = 0;
                //Avx.MultiplyAddAdjacent;
                //Avx.MultiplyHigh;
                //Avx.MultiplyHighRoundScale;
                //Avx.MultiplyLow;
                //Avx.MultiplyScalar;
                //Fma.MultiplyAdd;
                //Fma.MultiplyAddNegated;
                //Fma.MultiplyAddNegatedScalar;
                //Fma.MultiplyAddScalar;
                //Fma.MultiplyAddSubtract;
                //Fma.MultiplySubtract;
                //Fma.MultiplySubtractAdd;
                //Fma.MultiplySubtractNegated;
                //Fma.MultiplySubtractNegatedScalar;
                //Fma.MultiplySubtractScalar;
            }
        }
예제 #27
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Fma.MultiplyAdd(
                Sse.LoadVector128((Single *)(&test._fld1)),
                Sse.LoadVector128((Single *)(&test._fld2)),
                Sse.LoadVector128((Single *)(&test._fld3))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
예제 #28
0
            public void RunStructFldScenario_Load(SimpleTernaryOpTest__MultiplyAddSingle testClass)
            {
                fixed(Vector128 <Single> *pFld1 = &_fld1)
                fixed(Vector128 <Single> *pFld2 = &_fld2)
                fixed(Vector128 <Single> *pFld3 = &_fld3)
                {
                    var result = Fma.MultiplyAdd(
                        Sse.LoadVector128((Single *)(pFld1)),
                        Sse.LoadVector128((Single *)(pFld2)),
                        Sse.LoadVector128((Single *)(pFld3))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
                }
            }
예제 #29
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Single> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Single> *pClsVar2 = &_clsVar2)
            fixed(Vector128 <Single> *pClsVar3 = &_clsVar3)
            {
                var result = Fma.MultiplyAdd(
                    Sse.LoadVector128((Single *)(pClsVar1)),
                    Sse.LoadVector128((Single *)(pClsVar2)),
                    Sse.LoadVector128((Single *)(pClsVar3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
            }
        }
예제 #30
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Single> *pFld1 = &_fld1)
            fixed(Vector128 <Single> *pFld2 = &_fld2)
            fixed(Vector128 <Single> *pFld3 = &_fld3)
            {
                var result = Fma.MultiplyAdd(
                    Sse.LoadVector128((Single *)(pFld1)),
                    Sse.LoadVector128((Single *)(pFld2)),
                    Sse.LoadVector128((Single *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
            }
        }