public static Vector256 <double> FusedMultiplyAdd(Vector256 <double> x, Vector256 <double> y, Vector256 <double> z) { if (Fma.IsSupported) { return(Fma.MultiplyAdd(x, y, z)); } return(SoftwareFallback(x, y, z));
public void RunLclFldScenario() { var test = new SimpleTernaryOpTest__MultiplyAddDouble(); var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Fma.MultiplyAdd(_fld1, _fld2, _fld3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleTernaryOpTest__MultiplyAddSingle(); var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Fma.MultiplyAdd(test._fld1, test._fld2, test._fld3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var firstOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr); var secondOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr); var thirdOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr); var result = Fma.MultiplyAdd(firstOp, secondOp, thirdOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var firstOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)); var secondOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)); var thirdOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr)); var result = Fma.MultiplyAdd(firstOp, secondOp, thirdOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr); }
public static f32 FMulAdd_f32(f32 a, f32 b, f32 c) { if (Fma.IsSupported) { return(Fma.MultiplyAdd(a, b, c)); } else { return(Add(Mul(a, b), c)); } }
public void RunBasicScenario_Load() { var result = Fma.MultiplyAdd( Sse2.LoadVector128((Double *)(_dataTable.inArray1Ptr)), Sse2.LoadVector128((Double *)(_dataTable.inArray2Ptr)), Sse2.LoadVector128((Double *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
static unsafe float fmaTest() { vec a; var b = Vector128.Create(1f); var c = Vector128.Create(2f); var d = Vector128.Create(3f); c = Fma.MultiplyAdd(Sse.LoadVector128((float *)&a), b, c); return(Sse.Add(c, d).ToScalar()); }
public void RunBasicScenario_LoadAligned() { var result = Fma.MultiplyAdd( Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)), Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)), Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Fma.MultiplyAdd( Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr), Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Fma.MultiplyAdd( _clsVar1, _clsVar2, _clsVar3 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var firstOp = Avx.LoadVector256((Single *)(_dataTable.inArray1Ptr)); var secondOp = Avx.LoadVector256((Single *)(_dataTable.inArray2Ptr)); var thirdOp = Avx.LoadVector256((Single *)(_dataTable.inArray3Ptr)); var result = Fma.MultiplyAdd(firstOp, secondOp, thirdOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var firstOp = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray1Ptr); var secondOp = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray2Ptr); var thirdOp = Unsafe.Read <Vector128 <Double> >(_dataTable.inArray3Ptr); var result = Fma.MultiplyAdd(firstOp, secondOp, thirdOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var op1 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var op2 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var op3 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray3Ptr)); var result = Fma.MultiplyAdd(op1, op2, op3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var op3 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr); var result = Fma.MultiplyAdd(op1, op2, op3); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); }
private static Vector256 <float> MultiplyAdd(Vector256 <float> src1, Vector256 <float> src2, Vector256 <float> src3) { if (Fma.IsSupported) { return(Fma.MultiplyAdd(src1, src2, src3)); } else { Vector256 <float> product = Avx.Multiply(src1, src2); return(Avx.Add(product, src3)); } }
public static float DotMultiplyIntrinsicWFma(ref Memory <float> mem1, ref Memory <float> mem2) { var span1 = mem1.Span; var span2 = mem2.Span; var cnt = Math.Min(span1.Length, span2.Length); var v3 = Vector256.CreateScalarUnsafe(0f); var vectLen = Vector256 <float> .Count; var vectCnt = cnt / vectLen; #if TEST var file = Path.GetTempFileName(); using var writer = new StreamWriter(file); Console.WriteLine($"Intrinsic with Fma Mult. results will be written into {file}"); #endif int i; unsafe { for (i = 0; i < vectCnt; i++) { var index = i * vectLen; var v1 = Avx.LoadVector256((float *)Unsafe.AsPointer(ref span1[index])); var v2 = Avx.LoadVector256((float *)Unsafe.AsPointer(ref span2[index])); v3 = Fma.MultiplyAdd(v1, v2, v3); #if TEST writer.WriteLine($"{v1.ToString()}\t{v2.ToString()}\t{v3.ToString()}"); #endif } } var total = 0f; for (i = 0; i < vectLen; i++) { total += v3.GetElement(i); } for (i = vectCnt * vectLen; i < cnt; i++) { total += span1[i] * span2[i]; } if (span1.Length != span2.Length) { var h = span1.Length > span2.Length ? span1 : span2; for (var j = cnt; j < h.Length; j++) { total += h[j]; } } return(total); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Fma.MultiplyAdd( Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void FMA() { for (var a = 0; a < 1; a++) { var operand0 = Vector256.Create(0.0, 1.0, 2.0, 3.0); var operand1 = Vector256.Create(3.0, 2.0, 1.0, 0.0); var operand2 = Vector256.Create(1.0, 1.0, 1.0, 1.0); for (var b = 0; b < 1; b++) { var result = Fma.MultiplyAdd(operand0, operand1, operand2); } } }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Fma.MultiplyAdd( Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)), Sse.LoadVector128((Single *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Fma.MultiplyAdd( Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)), Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)), Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void Vector256FloatMultipleOps() { ReadOnlySpan <Vector256 <float> > d1 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data, 0, numberOfFloatItems)); ReadOnlySpan <Vector256 <float> > d2 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data2, 0, numberOfFloatItems)); ReadOnlySpan <Vector256 <float> > d3 = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(data3, 0, numberOfFloatItems)); Span <Vector256 <float> > r = MemoryMarshal.Cast <float, Vector256 <float> >(new Span <float>(result, 0, numberOfFloatItems)); for (int i = 0; i < d1.Length; i++) { r[i] = Fma.MultiplyAdd(d1[i], d2[i], d3[i]); r[i] = Fma.MultiplyAdd(r[i], d1[i], d1[i]); r[i] = Fma.MultiplyAdd(d1[i], d2[i], r[i]); } }
private unsafe void TestAddSum(byte[] vs) { fixed(byte *p = vs) { var v = Avx.LoadVector256(p); var v2 = Avx.LoadVector256(p + 32); //Avx.MultipleSumAbsoluteDifferences; Vector256 <int> i1 = Avx2.ConvertToVector256Int32(p); Vector256 <float> f1 = Avx.ConvertToVector256Single(i1); Vector256 <float> m1 = Avx.Multiply(f1, f1); Vector128 <int> i128 = Sse41.ConvertToVector128Int32(p); Vector256 <double> d256 = Avx.ConvertToVector256Double(i128); var dZero = Vector256 <double> .Zero; Vector256 <double> ma1 = Fma.MultiplyAdd(d256, d256, dZero); var i256 = Avx2.ConvertToVector256Int32(p); var f256 = Avx.ConvertToVector256Single(i256); var fZero = Vector256 <float> .Zero; var ma2 = Fma.MultiplyAdd(f256, f256, fZero); Vector128 <float> s128 = Sse2.ConvertToVector128Single(i128); Vector128 <float> ms = Sse.MultiplyScalar(s128, s128); // x86 / x64 SIMD命令一覧表(SSE~AVX2) //https://www.officedaytime.com/tips/simd.html // pmaddwd //https://www.officedaytime.com/tips/simdimg/si.php?f=pmaddwd Vector128 <short> sh128 = Sse41.ConvertToVector128Int16(p); Vector128 <int> vv3 = Avx.MultiplyAddAdjacent(sh128, sh128); var neko = 0; //Avx.MultiplyAddAdjacent; //Avx.MultiplyHigh; //Avx.MultiplyHighRoundScale; //Avx.MultiplyLow; //Avx.MultiplyScalar; //Fma.MultiplyAdd; //Fma.MultiplyAddNegated; //Fma.MultiplyAddNegatedScalar; //Fma.MultiplyAddScalar; //Fma.MultiplyAddSubtract; //Fma.MultiplySubtract; //Fma.MultiplySubtractAdd; //Fma.MultiplySubtractNegated; //Fma.MultiplySubtractNegatedScalar; //Fma.MultiplySubtractScalar; } }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Fma.MultiplyAdd( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)), Sse.LoadVector128((Single *)(&test._fld3)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleTernaryOpTest__MultiplyAddSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) fixed(Vector128 <Single> *pFld3 = &_fld3) { var result = Fma.MultiplyAdd( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)), Sse.LoadVector128((Single *)(pFld3)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Single> *pClsVar1 = &_clsVar1) fixed(Vector128 <Single> *pClsVar2 = &_clsVar2) fixed(Vector128 <Single> *pClsVar3 = &_clsVar3) { var result = Fma.MultiplyAdd( Sse.LoadVector128((Single *)(pClsVar1)), Sse.LoadVector128((Single *)(pClsVar2)), Sse.LoadVector128((Single *)(pClsVar3)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) fixed(Vector128 <Single> *pFld3 = &_fld3) { var result = Fma.MultiplyAdd( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)), Sse.LoadVector128((Single *)(pFld3)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); } }