public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Sse41.BlendVariable( Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray2Ptr)), Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); var result = Sse41.BlendVariable( _clsVar1, _clsVar2, _clsVar3 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); var result = Sse41.Insert( _clsVar, _scalarClsData, 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar, _scalarClsData, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.BlendVariable( Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray2Ptr), Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray3Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Sse41.Insert( Sse2.LoadAlignedVector128((UInt64 *)(_dataTable.inArrayPtr)), (ulong)2, 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunBasicScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); var result = Sse41.Insert( Unsafe.Read <Vector128 <SByte> >(_dataTable.inArrayPtr), (sbyte)2, 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, (sbyte)2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.Insert( Unsafe.Read <Vector128 <Int64> >(_dataTable.inArrayPtr), (long)2, 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public static double VectorExtractDouble(Vector128 <float> Vector, byte Index) { if (Sse41.IsSupported) { return(BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast <float, long>(Vector), Index))); } else if (Sse2.IsSupported) { return(BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(Vector, Index, 3))); } throw new PlatformNotSupportedException(); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse41.MultiplyLow( Sse2.LoadVector128((UInt32 *)(&test._fld1)), Sse2.LoadVector128((UInt32 *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Sse41.Insert( Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)), LoadVector128((Single *)(_dataTable.inArray2Ptr)), 128 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static f32 Ceil_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToPositiveInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(fval, a); return(Sse.Add(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public void RunStructFldScenario_Load(BooleanBinaryOpTest__TestZByte testClass) { fixed(Vector128 <Byte> *pFld1 = &_fld1) fixed(Vector128 <Byte> *pFld2 = &_fld2) { var result = Sse41.TestZ( Sse2.LoadVector128((Byte *)(pFld1)), Sse2.LoadVector128((Byte *)(pFld2)) ); testClass.ValidateResult(_fld1, _fld2, result); } }
public void RunStructFldScenario_Load(BooleanBinaryOpTest__TestMixOnesZerosInt32 testClass) { fixed(Vector128 <Int32> *pFld1 = &_fld1) fixed(Vector128 <Int32> *pFld2 = &_fld2) { var result = Sse41.TestMixOnesZeros( Sse2.LoadVector128((Int32 *)(pFld1)), Sse2.LoadVector128((Int32 *)(pFld2)) ); testClass.ValidateResult(_fld1, _fld2, result); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <UInt16> *pFld1 = &_fld1) { var result = Sse41.TestAllOnes( Sse2.LoadVector128((UInt16 *)(pFld1)) ); ValidateResult(_fld1, result); } }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse41.RoundToNegativeInfinityScalar( Sse2.LoadVector128((Double *)(&test._fld1)), Sse2.LoadVector128((Double *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static f32 Floor_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToNegativeInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(a, fval); return(Sse.Subtract(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public static long VectorExtractIntSx(Vector128 <float> vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return((sbyte)Sse41.Extract(Sse.StaticCast <float, byte>(vector), index)); } else if (size == 1) { return((short)Sse2.Extract(Sse.StaticCast <float, ushort>(vector), index)); } else if (size == 2) { return(Sse41.Extract(Sse.StaticCast <float, int>(vector), index)); } else if (size == 3) { return(Sse41.Extract(Sse.StaticCast <float, long>(vector), index)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { if (size == 0) { return((sbyte)VectorExtractIntZx(vector, index, size)); } else if (size == 1) { return((short)VectorExtractIntZx(vector, index, size)); } else if (size == 2) { return((int)VectorExtractIntZx(vector, index, size)); } else if (size == 3) { return((long)VectorExtractIntZx(vector, index, size)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__RoundCurrentDirectionScalarDouble testClass) { fixed(Vector128 <Double> *pFld1 = &_fld1) fixed(Vector128 <Double> *pFld2 = &_fld2) { var result = Sse41.RoundCurrentDirectionScalar( Sse2.LoadVector128((Double *)(pFld1)), Sse2.LoadVector128((Double *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__CeilingScalarSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse41.CeilingScalar( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
private unsafe void TestAddSum(byte[] vs) { fixed(byte *p = vs) { var v = Avx.LoadVector256(p); var v2 = Avx.LoadVector256(p + 32); //Avx.MultipleSumAbsoluteDifferences; Vector256 <int> i1 = Avx2.ConvertToVector256Int32(p); Vector256 <float> f1 = Avx.ConvertToVector256Single(i1); Vector256 <float> m1 = Avx.Multiply(f1, f1); Vector128 <int> i128 = Sse41.ConvertToVector128Int32(p); Vector256 <double> d256 = Avx.ConvertToVector256Double(i128); var dZero = Vector256 <double> .Zero; Vector256 <double> ma1 = Fma.MultiplyAdd(d256, d256, dZero); var i256 = Avx2.ConvertToVector256Int32(p); var f256 = Avx.ConvertToVector256Single(i256); var fZero = Vector256 <float> .Zero; var ma2 = Fma.MultiplyAdd(f256, f256, fZero); Vector128 <float> s128 = Sse2.ConvertToVector128Single(i128); Vector128 <float> ms = Sse.MultiplyScalar(s128, s128); // x86 / x64 SIMD命令一覧表(SSE~AVX2) //https://www.officedaytime.com/tips/simd.html // pmaddwd //https://www.officedaytime.com/tips/simdimg/si.php?f=pmaddwd Vector128 <short> sh128 = Sse41.ConvertToVector128Int16(p); Vector128 <int> vv3 = Avx.MultiplyAddAdjacent(sh128, sh128); var neko = 0; //Avx.MultiplyAddAdjacent; //Avx.MultiplyHigh; //Avx.MultiplyHighRoundScale; //Avx.MultiplyLow; //Avx.MultiplyScalar; //Fma.MultiplyAdd; //Fma.MultiplyAddNegated; //Fma.MultiplyAddNegatedScalar; //Fma.MultiplyAddScalar; //Fma.MultiplyAddSubtract; //Fma.MultiplySubtract; //Fma.MultiplySubtractAdd; //Fma.MultiplySubtractNegated; //Fma.MultiplySubtractNegatedScalar; //Fma.MultiplySubtractScalar; } }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__MinUInt16 testClass) { fixed(Vector128 <UInt16> *pFld1 = &_fld1) fixed(Vector128 <UInt16> *pFld2 = &_fld2) { var result = Sse41.Min( Sse2.LoadVector128((UInt16 *)(pFld1)), Sse2.LoadVector128((UInt16 *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) { var result = Sse41.RoundToNegativeInfinityScalar( Sse.LoadVector128((Single *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _dataTable.outArrayPtr); } }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse41.BlendVariable( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)), Sse.LoadVector128((Single *)(&test._fld3)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Double> *pFld1 = &_fld1) { var result = Sse41.RoundCurrentDirectionScalar( Sse2.LoadVector128((Double *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _dataTable.outArrayPtr); } }
public void RunStructFldScenario_Load(HorizontalBinaryOpTest__PackUnsignedSaturateUInt16 testClass) { fixed(Vector128 <Int32> *pFld1 = &_fld1) fixed(Vector128 <Int32> *pFld2 = &_fld2) { var result = Sse41.PackUnsignedSaturate( Sse2.LoadVector128((Int32 *)(pFld1)), Sse2.LoadVector128((Int32 *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public static Vector4Int32 Multiply(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right) { if (Sse41.IsSupported) { return(Sse41.MultiplyLow(left, right)); } // TODO try accelerate with less than < Sse4.1 //else if (Sse2.IsSupported) //{ // Vector128<ulong> elem2And0 = Sse2.Multiply(left.AsUInt32(), right.AsUInt32()); //} return(Multiply_Software(left, right)); }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Double> *pClsVar1 = &_clsVar1) { var result = Sse41.RoundToNearestIntegerScalar( Sse2.LoadVector128((Double *)(pClsVar1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Single> *pClsVar1 = &_clsVar1) { var result = Sse41.RoundToPositiveInfinity( Sse.LoadVector128((Single *)(pClsVar1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _dataTable.outArrayPtr); } }
public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3, [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3) { uint Opcode = 0x4E826820; Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0)); Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0)); AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3)); }
public static unsafe void CalculateDiagonalSection_Sse41 <T>(void *refDiag1Ptr, void *refDiag2Ptr, char *sourcePtr, char *targetPtr, ref int rowIndex, int columnIndex) where T : struct { if (typeof(T) == typeof(int)) { var diag1Ptr = (int *)refDiag1Ptr; var diag2Ptr = (int *)refDiag2Ptr; var sourceVector = Sse41.ConvertToVector128Int32((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count); var targetVector = Sse41.ConvertToVector128Int32((ushort *)targetPtr + columnIndex - 1); targetVector = Sse2.Shuffle(targetVector, 0x1b); var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector); var substitutionCost = Sse2.Add( Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count), substitutionCostAdjustment ); var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1)); var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count); var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost); localCost = Sse2.Add(localCost, Vector128.Create(1)); Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost); } else if (typeof(T) == typeof(ushort)) { var diag1Ptr = (ushort *)refDiag1Ptr; var diag2Ptr = (ushort *)refDiag2Ptr; var sourceVector = Sse3.LoadDquVector128((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count); var targetVector = Sse3.LoadDquVector128((ushort *)targetPtr + columnIndex - 1); targetVector = Ssse3.Shuffle(targetVector.AsByte(), REVERSE_USHORT_AS_BYTE_128).AsUInt16(); var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector); var substitutionCost = Sse2.Add( Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count), substitutionCostAdjustment ); var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1)); var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count); var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost); localCost = Sse2.Add(localCost, Vector128.Create((ushort)1)); Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost); } }