private static void FromByteArrayToVector(byte[] state, ref Vector128 <float> op) { if (!Sse2.IsSupported) { throw new PlatformNotSupportedException(); } op = Sse.StaticCast <byte, float>(Sse2.SetVector128( state[15], state[14], state[13], state[12], state[11], state[10], state[9], state[8], state[7], state[6], state[5], state[4], state[3], state[2], state[1], state[0])); }
public static f32 Floor_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToNegativeInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(a, fval); return(Sse.Subtract(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Sse41.Insert( Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)), LoadVector128((Single *)(_dataTable.inArray2Ptr)), 4 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Fma.MultiplyAddScalar( Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
public static float dot(float8 x, float8 y) { if (Avx.IsAvxSupported) { x = Avx.mm256_dp_ps(x, y, 255); return(Sse.add_ss(Avx.mm256_castps256_ps128(x), Avx.mm256_extractf128_ps(x, 1)).Float0); } else { return(math.dot(x.v4_0, y.v4_0) + math.dot(x.v4_4, y.v4_4)); } }
public void RunReflectionScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); var result = typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThan), new Type[] { typeof(Vector128 <Single>), typeof(Vector128 <Single>) }) .Invoke(null, new object[] { Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)) }); Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Single>)(result)); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static unsafe Float3 Cross(Float3 a, Float3 b) { Vector128 <float> va = Sse.LoadVector128(&a.X); Vector128 <float> vb = Sse.LoadVector128(&b.X); Vector128 <float> r = Sse.Subtract(Sse.Multiply(Sse.Shuffle(va, va, _MM_SHUFFLE(3, 0, 2, 1)), Sse.Shuffle(vb, vb, _MM_SHUFFLE(3, 1, 0, 2))), Sse.Multiply(Sse.Shuffle(va, va, _MM_SHUFFLE(3, 1, 0, 2)), Sse.Shuffle(vb, vb, _MM_SHUFFLE(3, 0, 2, 1)))); Sse.Store(&a.X, r); return(a); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse.MaxScalar( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(BooleanBinaryOpTest__CompareScalarUnorderedEqualBoolean testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareScalarUnorderedEqual( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); testClass.ValidateResult(_fld1, _fld2, result); } }
public static f32 Ceil_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToPositiveInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(fval, a); return(Sse.Add(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public static double VectorExtractDouble(Vector128 <float> vector, byte index) { if (Sse41.IsSupported) { return(BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast <float, long>(vector), index))); } else if (Sse2.IsSupported) { return(BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3))); } throw new PlatformNotSupportedException(); }
public static int dot(short4 a, short4 b) { if (Sse2.IsSse2Supported) { a = Sse2.madd_epi16(a, b); return(Sse2.add_epi32(a, Sse2.shufflelo_epi16(a, Sse.SHUFFLE(0, 0, 3, 2))).SInt0); } else { return(((a.x * b.x) + (a.y * b.y)) + ((a.z * b.z) + (a.w * b.w))); } }
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); var result = typeof(Sse41).GetMethod(nameof(Sse41.Extract), new Type[] { typeof(Vector128 <Single>), typeof(byte) }) .Invoke(null, new object[] { Sse.LoadVector128((Single *)(_dataTable.inArrayPtr)), (byte)1 }); Unsafe.Write(_dataTable.outArrayPtr, (Single)(result)); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public static int dot(short3 a, short3 b) { if (Sse2.IsSse2Supported) { short4 temp = Sse2.madd_epi16(Sse2.insert_epi16(a, 0, 3), b); return(Sse2.add_epi32(temp, Sse2.shufflelo_epi16(temp, Sse.SHUFFLE(0, 0, 3, 2))).SInt0); } else { return((a.x * b.x) + (a.y * b.y) + (a.z * b.z)); } }
public static long VectorExtractIntSx(Vector128 <float> vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return((sbyte)Sse41.Extract(Sse.StaticCast <float, byte>(vector), index)); } else if (size == 1) { return((short)Sse2.Extract(Sse.StaticCast <float, ushort>(vector), index)); } else if (size == 2) { return(Sse41.Extract(Sse.StaticCast <float, int>(vector), index)); } else if (size == 3) { return(Sse41.Extract(Sse.StaticCast <float, long>(vector), index)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { if (size == 0) { return((sbyte)VectorExtractIntZx(vector, index, size)); } else if (size == 1) { return((short)VectorExtractIntZx(vector, index, size)); } else if (size == 2) { return((int)VectorExtractIntZx(vector, index, size)); } else if (size == 3) { return((long)VectorExtractIntZx(vector, index, size)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); }
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); var result = typeof(Fma).GetMethod(nameof(Fma.MultiplySubtractAdd), new Type[] { typeof(Vector128 <Single>), typeof(Vector128 <Single>), typeof(Vector128 <Single>) }) .Invoke(null, new object[] { Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)), Sse.LoadVector128((Single *)(_dataTable.inArray3Ptr)) }); Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Single>)(result)); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); }
private unsafe void TestAddSum(byte[] vs) { fixed(byte *p = vs) { var v = Avx.LoadVector256(p); var v2 = Avx.LoadVector256(p + 32); //Avx.MultipleSumAbsoluteDifferences; Vector256 <int> i1 = Avx2.ConvertToVector256Int32(p); Vector256 <float> f1 = Avx.ConvertToVector256Single(i1); Vector256 <float> m1 = Avx.Multiply(f1, f1); Vector128 <int> i128 = Sse41.ConvertToVector128Int32(p); Vector256 <double> d256 = Avx.ConvertToVector256Double(i128); var dZero = Vector256 <double> .Zero; Vector256 <double> ma1 = Fma.MultiplyAdd(d256, d256, dZero); var i256 = Avx2.ConvertToVector256Int32(p); var f256 = Avx.ConvertToVector256Single(i256); var fZero = Vector256 <float> .Zero; var ma2 = Fma.MultiplyAdd(f256, f256, fZero); Vector128 <float> s128 = Sse2.ConvertToVector128Single(i128); Vector128 <float> ms = Sse.MultiplyScalar(s128, s128); // x86 / x64 SIMD命令一覧表(SSE~AVX2) //https://www.officedaytime.com/tips/simd.html // pmaddwd //https://www.officedaytime.com/tips/simdimg/si.php?f=pmaddwd Vector128 <short> sh128 = Sse41.ConvertToVector128Int16(p); Vector128 <int> vv3 = Avx.MultiplyAddAdjacent(sh128, sh128); var neko = 0; //Avx.MultiplyAddAdjacent; //Avx.MultiplyHigh; //Avx.MultiplyHighRoundScale; //Avx.MultiplyLow; //Avx.MultiplyScalar; //Fma.MultiplyAdd; //Fma.MultiplyAddNegated; //Fma.MultiplyAddNegatedScalar; //Fma.MultiplyAddScalar; //Fma.MultiplyAddSubtract; //Fma.MultiplySubtract; //Fma.MultiplySubtractAdd; //Fma.MultiplySubtractNegated; //Fma.MultiplySubtractNegatedScalar; //Fma.MultiplySubtractScalar; } }
public void WriteVector128(long Position, Vector128 <float> Value) { EnsureAccessIsValid(Position + 0, AMemoryPerm.Write); EnsureAccessIsValid(Position + 15, AMemoryPerm.Write); if (Sse.IsSupported) { Sse.Store((float *)(RamPtr + (uint)Position), Value); } else { throw new PlatformNotSupportedException(); } }
public void WriteVector64(long Position, Vector128 <float> Value) { EnsureAccessIsValid(Position + 0, AMemoryPerm.Write); EnsureAccessIsValid(Position + 7, AMemoryPerm.Write); if (Sse2.IsSupported) { Sse2.StoreScalar((double *)(RamPtr + (uint)Position), Sse.StaticCast <float, double>(Value)); } else { throw new PlatformNotSupportedException(); } }
public Vector128 <float> ReadVector128(long Position) { EnsureAccessIsValid(Position + 0, AMemoryPerm.Read); EnsureAccessIsValid(Position + 15, AMemoryPerm.Read); if (Sse.IsSupported) { return(Sse.LoadVector128((float *)(RamPtr + (uint)Position))); } else { throw new PlatformNotSupportedException(); } }
public Vector128 <float> ReadVector64(long Position) { EnsureAccessIsValid(Position + 0, AMemoryPerm.Read); EnsureAccessIsValid(Position + 7, AMemoryPerm.Read); if (Sse2.IsSupported) { return(Sse.StaticCast <double, float>(Sse2.LoadScalarVector128((double *)(RamPtr + (uint)Position)))); } else { throw new PlatformNotSupportedException(); } }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__RoundToNegativeInfinityScalarSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse41.RoundToNegativeInfinityScalar( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Single> *pClsVar1 = &_clsVar1) { var result = Sse41.RoundToNearestIntegerScalar( Sse.LoadVector128((Single *)(pClsVar1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) { var result = Avx2.BroadcastScalarToVector128( Sse.LoadVector128((Single *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) { var result = Sse41.RoundToPositiveInfinity( Sse.LoadVector128((Single *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _dataTable.outArrayPtr); } }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__CompareLessThanOrEqualSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareLessThanOrEqual( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3, [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3) { uint Opcode = 0x4E826820; Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0)); Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0)); AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3)); Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3)); }
public void RunStructFldScenario_Load(HorizontalBinaryOpTest__HorizontalAddSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse3.HorizontalAdd( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void WriteVector8(long position, Vector128 <float> value) { if (Sse41.IsSupported) { WriteByte(position, Sse41.Extract(Sse.StaticCast <float, byte>(value), 0)); } else if (Sse2.IsSupported) { WriteByte(position, (byte)Sse2.Extract(Sse.StaticCast <float, ushort>(value), 0)); } else { WriteByte(position, (byte)VectorHelper.VectorExtractIntZx(value, 0, 0)); } }
public Vector128 <float> ReadVector64(long position) { if (Sse2.IsSupported && (position & 7) == 0) { return(Sse.StaticCast <double, float>(Sse2.LoadScalarVector128((double *)Translate(position)))); } else { Vector128 <float> value = VectorHelper.VectorSingleZero(); value = VectorHelper.VectorInsertInt(ReadUInt64(position), value, 0, 3); return(value); } }