public void RunBasicScenario_Load() { var result = Sse41.Extract( Sse.LoadVector128((Single *)(_dataTable.inArrayPtr)), 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
internal static unsafe long Extract64(Vector128 <sbyte> value) { if (Sse41.X64.IsSupported) { return(Sse41.X64.Extract(value.AsInt64(), 0)); //会在JIT时进行静态判断 } var v = value.AsInt32(); return((long)((uint)Sse41.Extract(v, 0) | ((ulong)Sse41.Extract(v, 1) << 32))); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var firstOp = Unsafe.Read <Vector128 <Single> >(_dataTable.inArrayPtr); var result = Sse41.Extract(firstOp, 1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Sse41.Extract( Sse2.LoadAlignedVector128((UInt64 *)(_dataTable.inArrayPtr)), 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var firstOp = Sse.LoadAlignedVector128((Single *)(_dataTable.inArrayPtr)); var result = Sse41.Extract(firstOp, 1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new ExtractScalarTest__ExtractSingle1(); var result = Sse41.Extract(test._fld, 1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse41.Extract( Unsafe.Read <Vector128 <Single> >(_dataTable.inArrayPtr), 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse41.Extract( _clsVar, 129 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse41.Extract(test._fld, 1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld, _dataTable.outArrayPtr); }
protected static double VectorExtractDouble(Vector128 <float> Vector, byte Index) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } long Value = Sse41.Extract(Sse.StaticCast <float, long>(Vector), Index); return(BitConverter.Int64BitsToDouble(Value)); }
public void Frecps_V([Random(10)] float A, [Random(10)] float B) { AThreadState ThreadState = SingleOpcode(0x4E20FC44, V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B))); Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B))); Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B))); Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B))); }
protected static float VectorExtractSingle(Vector128 <float> Vector, byte Index) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } int Value = Sse41.Extract(Sse.StaticCast <float, int>(Vector), Index); return(BitConverter.Int32BitsToSingle(Value)); }
public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); var result = Sse41.Extract( _clsVar, 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.Extract( Unsafe.Read <Vector128 <UInt64> >(_dataTable.inArrayPtr), 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Sse41.Extract( Sse2.LoadAlignedVector128((UInt64 *)(_dataTable.inArrayPtr)), 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public static double VectorExtractDouble(Vector128 <float> vector, byte index) { if (Sse41.IsSupported) { return(BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast <float, long>(vector), index))); } else if (Sse2.IsSupported) { return(BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3))); } throw new PlatformNotSupportedException(); }
public static long VectorExtractIntSx(Vector128 <float> vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return((sbyte)Sse41.Extract(Sse.StaticCast <float, byte>(vector), index)); } else if (size == 1) { return((short)Sse2.Extract(Sse.StaticCast <float, ushort>(vector), index)); } else if (size == 2) { return(Sse41.Extract(Sse.StaticCast <float, int>(vector), index)); } else if (size == 3) { return(Sse41.Extract(Sse.StaticCast <float, long>(vector), index)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { if (size == 0) { return((sbyte)VectorExtractIntZx(vector, index, size)); } else if (size == 1) { return((short)VectorExtractIntZx(vector, index, size)); } else if (size == 2) { return((int)VectorExtractIntZx(vector, index, size)); } else if (size == 3) { return((long)VectorExtractIntZx(vector, index, size)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); }
public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3, [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3) { uint Opcode = 0x4E826820; Vector128 <float> V1 = Sse.StaticCast <uint, float>(Sse2.SetVector128(A3, A2, A1, A0)); Vector128 <float> V2 = Sse.StaticCast <uint, float>(Sse2.SetVector128(B3, B2, B1, B0)); AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3)); }
[TestCase((ushort)0x0001, 0x33800000u)] // 5.96046448e-8 (Smallest Subnormal) public void Fcvtl_V_f16(ushort Value, uint Result) { uint Opcode = 0x0E217801; Vector128 <float> V0 = Sse.StaticCast <ushort, float>(Sse2.SetAllVector128(Value)); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0); Assert.Multiple(() => { Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V1), (byte)0), Is.EqualTo(Result)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V1), (byte)1), Is.EqualTo(Result)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V1), (byte)2), Is.EqualTo(Result)); Assert.That(Sse41.Extract(Sse.StaticCast <float, uint>(ThreadState.V1), (byte)3), Is.EqualTo(Result)); }); }
public void WriteVector8(long position, Vector128 <float> value) { if (Sse41.IsSupported) { WriteByte(position, Sse41.Extract(Sse.StaticCast <float, byte>(value), 0)); } else if (Sse2.IsSupported) { WriteByte(position, (byte)Sse2.Extract(Sse.StaticCast <float, ushort>(value), 0)); } else { throw new PlatformNotSupportedException(); } }
public void WriteVector8(long position, Vector128 <float> value) { if (Sse41.IsSupported) { WriteByte(position, Sse41.Extract(Sse.StaticCast <float, byte>(value), 0)); } else if (Sse2.IsSupported) { WriteByte(position, (byte)Sse2.Extract(Sse.StaticCast <float, ushort>(value), 0)); } else { WriteByte(position, (byte)VectorHelper.VectorExtractIntZx(value, 0, 0)); } }
public void Frecps_V([Random(10)] float A, [Random(10)] float B) { AThreadState ThreadState = SingleOpcode(0x4E20FC44, V2: Sse.SetAllVector128(A), V0: Sse.SetAllVector128(B)); float Result = (float)(2 - ((double)A * (double)B)); Assert.Multiple(() => { Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result)); Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result)); Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result)); }); }
internal static ulong Step(ulong crc, byte[] data, uint length) { int bufPos = 16; const ulong k1 = 0xe05dd497ca393ae4; const ulong k2 = 0xdabe95afc7875f40; const ulong mu = 0x9c3e466c172963d5; const ulong pol = 0x92d8af2baf0e1e85; Vector128 <ulong> foldConstants1 = Vector128.Create(k1, k2); Vector128 <ulong> foldConstants2 = Vector128.Create(mu, pol); Vector128 <ulong> initialCrc = Vector128.Create(~crc, 0); length -= 16; // Initial CRC can simply be added to data ShiftRight128(initialCrc, 0, out Vector128 <ulong> crc0, out Vector128 <ulong> crc1); Vector128 <ulong> accumulator = Sse2.Xor(Fold(Sse2.Xor(crc0, Vector128.Create(BitConverter.ToUInt64(data, 0), BitConverter.ToUInt64(data, 8))), foldConstants1), crc1); while (length >= 32) { accumulator = Fold(Sse2.Xor(Vector128.Create(BitConverter.ToUInt64(data, bufPos), BitConverter.ToUInt64(data, bufPos + 8)), accumulator), foldConstants1); length -= 16; bufPos += 16; } Vector128 <ulong> p = Sse2.Xor(accumulator, Vector128.Create(BitConverter.ToUInt64(data, bufPos), BitConverter.ToUInt64(data, bufPos + 8))); Vector128 <ulong> r = Sse2.Xor(Pclmulqdq.CarrylessMultiply(p, foldConstants1, 0x10), Sse2.ShiftRightLogical128BitLane(p, 8)); // Final Barrett reduction Vector128 <ulong> t1 = Pclmulqdq.CarrylessMultiply(r, foldConstants2, 0x00); Vector128 <ulong> t2 = Sse2.Xor(Sse2.Xor(Pclmulqdq.CarrylessMultiply(t1, foldConstants2, 0x10), Sse2.ShiftLeftLogical128BitLane(t1, 8)), r); return(~(((ulong)Sse41.Extract(t2.AsUInt32(), 3) << 32) | Sse41.Extract(t2.AsUInt32(), 2))); }
private unsafe int AffinePropagateAvx2(Span <sbyte> input, int[] biases, sbyte[] weights) { fixed(sbyte *inputPtr = input) fixed(sbyte *weightsPtr = weights) { var iv = (Vector256 <byte> *)inputPtr; var row = (Vector256 <sbyte> *)weightsPtr; var prod1 = Avx2.MultiplyAddAdjacent(iv[0], row[0]); var prod = Avx2.MultiplyAddAdjacent(prod1, Vector256.Create((short)1)); var sum = Sse2.Add(prod.GetLower(), Avx2.ExtractVector128(prod, 1)); sum = Sse2.Add(sum, Sse2.Shuffle(sum, 0x1b)); var result = Sse2.ConvertToInt32(sum) + Sse41.Extract(sum, 1) + biases[0]; return(result); } }
public unsafe int ParseSIMD() { var tmp = Sse2.LoadVector128(ptr); var tmp1 = Sse.StaticCast <byte, sbyte>(tmp); tmp1 = Sse2.Subtract(tmp1, subtmp); var data0 = Ssse3.Shuffle(tmp1, mask0); var data0f = Sse2.ConvertToVector128Single(Sse.StaticCast <sbyte, int>(data0)); var data1 = Ssse3.Shuffle(tmp1, mask1); var data1f = Sse2.ConvertToVector128Single(Sse.StaticCast <sbyte, int>(data1)); var ans = Sse2.Add(Sse2.ConvertToVector128Int32(Sse41.DotProduct(data0f, mul0, 0b11111000)), Sse2.ConvertToVector128Int32(Sse41.DotProduct(data1f, mul1, 0b11111000))); return(Sse41.Extract(ans, 3)); }
public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_2S([ValueSource("_floats_")][Random(RndCnt)] float A, [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE [Values(0u, 1u)] uint bit13) // "LT" { uint Opcode = 0x0EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); Vector128 <float> V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); Vector128 <float> V1 = Sse.SetVector128(0, 0, A, A); AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); float Zero = +0f; byte[] Exp = default(byte[]); byte[] Ones = new byte[] { 0xFF, 0xFF, 0xFF, 0xFF }; byte[] Zeros = new byte[] { 0x00, 0x00, 0x00, 0x00 }; if (bit13 == 0) { switch (opU) { case 0: Exp = (A > Zero ? Ones : Zeros); break; case 1: Exp = (A >= Zero ? Ones : Zeros); break; case 2: Exp = (A == Zero ? Ones : Zeros); break; case 3: Exp = (Zero >= A ? Ones : Zeros); break; } } else { Exp = (Zero > A ? Ones : Zeros); } Assert.Multiple(() => { Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); }); CompareAgainstUnicorn(); }
public static float VectorExtractSingle(Vector128 <float> Vector, byte Index) { if (Sse41.IsSupported) { return(Sse41.Extract(Vector, Index)); } else if (Sse2.IsSupported) { Vector128 <ushort> ShortVector = Sse.StaticCast <float, ushort>(Vector); int Low = Sse2.Extract(ShortVector, (byte)(Index * 2 + 0)); int High = Sse2.Extract(ShortVector, (byte)(Index * 2 + 1)); return(BitConverter.Int32BitsToSingle(Low | (High << 16))); } throw new PlatformNotSupportedException(); }
public static Vector128 <sbyte> op_Division(Vector128 <sbyte> left, Vector128 <sbyte> right) => Vector128.Create( (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 0) / (sbyte)Sse41.Extract(right.As <byte>(), 0)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 1) / (sbyte)Sse41.Extract(right.As <byte>(), 1)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 2) / (sbyte)Sse41.Extract(right.As <byte>(), 2)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 3) / (sbyte)Sse41.Extract(right.As <byte>(), 3)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 4) / (sbyte)Sse41.Extract(right.As <byte>(), 4)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 5) / (sbyte)Sse41.Extract(right.As <byte>(), 5)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 6) / (sbyte)Sse41.Extract(right.As <byte>(), 6)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 7) / (sbyte)Sse41.Extract(right.As <byte>(), 7)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 8) / (sbyte)Sse41.Extract(right.As <byte>(), 8)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 9) / (sbyte)Sse41.Extract(right.As <byte>(), 9)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 10) / (sbyte)Sse41.Extract(right.As <byte>(), 10)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 11) / (sbyte)Sse41.Extract(right.As <byte>(), 11)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 12) / (sbyte)Sse41.Extract(right.As <byte>(), 12)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 13) / (sbyte)Sse41.Extract(right.As <byte>(), 13)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 14) / (sbyte)Sse41.Extract(right.As <byte>(), 14)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 15) / (sbyte)Sse41.Extract(right.As <byte>(), 15)));
public static Vector128 <sbyte> op_Modulus(Vector128 <sbyte> left, Vector128 <sbyte> right) => Vector128.Create( (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 0) % (sbyte)Sse41.Extract(right.As <byte>(), 0)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 1) % (sbyte)Sse41.Extract(right.As <byte>(), 1)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 2) % (sbyte)Sse41.Extract(right.As <byte>(), 2)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 3) % (sbyte)Sse41.Extract(right.As <byte>(), 3)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 4) % (sbyte)Sse41.Extract(right.As <byte>(), 4)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 5) % (sbyte)Sse41.Extract(right.As <byte>(), 5)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 6) % (sbyte)Sse41.Extract(right.As <byte>(), 6)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 7) % (sbyte)Sse41.Extract(right.As <byte>(), 7)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 8) % (sbyte)Sse41.Extract(right.As <byte>(), 8)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 9) % (sbyte)Sse41.Extract(right.As <byte>(), 9)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 10) % (sbyte)Sse41.Extract(right.As <byte>(), 10)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 11) % (sbyte)Sse41.Extract(right.As <byte>(), 11)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 12) % (sbyte)Sse41.Extract(right.As <byte>(), 12)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 13) % (sbyte)Sse41.Extract(right.As <byte>(), 13)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 14) % (sbyte)Sse41.Extract(right.As <byte>(), 14)), (sbyte)((sbyte)Sse41.Extract(left.As <byte>(), 15) % (sbyte)Sse41.Extract(right.As <byte>(), 15)));
public static Vector128 <sbyte> op_Division(Vector128 <sbyte> vector, sbyte scalar) => Vector128.Create( (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 0) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 1) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 2) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 3) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 4) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 5) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 6) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 7) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 8) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 9) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 10) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 11) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 12) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 13) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 14) / scalar), (sbyte)((sbyte)Sse41.Extract(vector.As <byte>(), 15) / scalar));