public void RunClassFldScenario() { var result = Sse41.Max(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario(SimpleBinaryOpTest__MaxSByte testClass) { var result = Sse41.Max(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { var test = new SimpleBinaryOpTest__MaxSByte(); var result = Sse41.Max(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Sse41.Max(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static Vector4Int32 Max(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right) { if (Sse41.IsSupported) { return(Sse41.Max(left, right)); } return(Max_Software(left, right)); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Sse41.Max(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)); var result = Sse41.Max(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr); var result = Sse41.Max(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse41.Max( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse41.Max(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse41.Max( Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Sse41.Max( Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static Vector4Int32 Clamp(Vector4Int32Param1_3 vector, Vector4Int32Param1_3 low, Vector4Int32Param1_3 high) { if (Sse41.IsSupported) { Vector4Int32 temp = Sse41.Min(vector, high); return(Sse41.Max(temp, low)); } return(Clamp_Software(vector, low, high)); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__MaxSByte(); var result = Sse41.Max(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var left = Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray2Ptr)); var result = Sse41.Max(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var left = Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray2Ptr); var result = Sse41.Max(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public static Vector4Int32 Abs(Vector4Int32Param1_3 vector) { if (Sse41.IsSupported) { Vector4Int32 zero = Vector4Int32.Zero; zero = Sse2.Subtract(zero, vector); // This gets the inverted results of all elements return(Sse41.Max(zero, vector)); // This selects the positive values of the 2 vectors } return(Abs_Software(vector)); }
public static i32 Max_i32(i32 a, i32 b) { if (Sse41.IsSupported) { return(Sse41.Max(a, b)); } else { return(Select_i32(GreaterThan(a, b), a, b)); } }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr); var result = Sse41.Max(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var op1 = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)); var op2 = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr)); var result = Sse41.Max(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.Max( Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Sse41.Max( Sse2.LoadVector128((SByte *)(_dataTable.inArray1Ptr)), Sse2.LoadVector128((SByte *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse41.Max( Sse2.LoadVector128((SByte *)(&test._fld1)), Sse2.LoadVector128((SByte *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__MaxSByte testClass) { fixed(Vector128 <SByte> *pFld1 = &_fld1) fixed(Vector128 <SByte> *pFld2 = &_fld2) { var result = Sse41.Max( Sse2.LoadVector128((SByte *)(pFld1)), Sse2.LoadVector128((SByte *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public static unsafe int Max(this Matrix <int> matrix) { var i = 0; fixed(int *ptr = matrix.GetArray()) { var span = new Span <int>(ptr, matrix.Length); var maxScalar = span[0]; if (Sse41.IsSupported) { var maxValues = stackalloc int[4] { span[0], span[0], span[0], span[0] }; var max = Sse2.LoadVector128(maxValues); while (i < span.Length - 4) { var vector128 = Sse2.LoadVector128(ptr + i); max = Sse41.Max(vector128, max); i += 4; } var j = 0; var x = max.GetElement(0); while (j < 4) { var y = max.GetElement(j); x = (y & ((x - y) >> 31)) | (x & (~(x - y) >> 31)); j++; } maxScalar = x; } while (i < span.Length) { var y = span[i]; maxScalar = (y & ((maxScalar - y) >> 31)) | (maxScalar & (~(maxScalar - y) >> 31)); i++; } return(maxScalar); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <SByte> *pFld1 = &_fld1) fixed(Vector128 <SByte> *pFld2 = &_fld2) { var result = Sse41.Max( Sse2.LoadVector128((SByte *)(pFld1)), Sse2.LoadVector128((SByte *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <UInt16> *pClsVar1 = &_clsVar1) fixed(Vector128 <UInt16> *pClsVar2 = &_clsVar2) { var result = Sse41.Max( Sse2.LoadVector128((UInt16 *)(pClsVar1)), Sse2.LoadVector128((UInt16 *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleBinaryOpTest__MaxUInt16(); fixed(Vector128 <UInt16> *pFld1 = &test._fld1) fixed(Vector128 <UInt16> *pFld2 = &test._fld2) { var result = Sse41.Max( Sse2.LoadVector128((UInt16 *)(pFld1)), Sse2.LoadVector128((UInt16 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }
public unsafe int FastMax(Span <int> span) { int max = span[0]; int i = 0; int vectorSize = 4; if (span.Length >= (vectorSize * 2)) { fixed(int *ptr = &span[0]) { var maxVector = Sse41.LoadVector128(ptr + i); for (i = vectorSize; i < span.Length - vectorSize; i += vectorSize) { var current = Sse41.LoadVector128(ptr + i); maxVector = Sse41.Max(maxVector, current); } Span <int> buffer = stackalloc int[vectorSize]; maxVector.AsVector().CopyTo(buffer); for (int k = 0; k < buffer.Length; k++) { if (buffer[k] > max) { max = buffer[k]; } } } } for (; i < span.Length; i++) { if (span[i] > max) { max = span[i]; } } return(max); }
static int Main() { s_success = true; // We expect the AOT compiler generated HW intrinsics with the following characteristics: // // * TRUE = IsSupported assumed to be true, no runtime check // * NULL = IsSupported is a runtime check, code should be behind the check or bad things happen // * FALSE = IsSupported assumed to be false, no runtime check, PlatformNotSupportedException if used // // The test is compiled with multiple defines to test this. #if BASELINE_INTRINSICS bool vectorsAccelerated = true; int byteVectorLength = 16; bool?Sse2AndBelow = true; bool?Sse3Group = null; bool?AesLzPcl = null; bool?Sse4142 = null; bool?PopCnt = null; bool?Avx12 = false; bool?FmaBmi12 = false; bool?Avxvnni = false; #elif NON_VEX_INTRINSICS bool vectorsAccelerated = true; int byteVectorLength = 16; bool?Sse2AndBelow = true; bool?Sse3Group = true; bool?AesLzPcl = null; bool?Sse4142 = true; bool?PopCnt = null; bool?Avx12 = false; bool?FmaBmi12 = false; bool?Avxvnni = false; #elif VEX_INTRINSICS bool vectorsAccelerated = true; int byteVectorLength = 32; bool?Sse2AndBelow = true; bool?Sse3Group = true; bool?AesLzPcl = null; bool?Sse4142 = true; bool?PopCnt = null; bool?Avx12 = true; bool?FmaBmi12 = null; bool?Avxvnni = null; #else #error Who dis? #endif if (vectorsAccelerated != Vector.IsHardwareAccelerated) { throw new Exception($"Vectors HW acceleration state unexpected - expected {vectorsAccelerated}, got {Vector.IsHardwareAccelerated}"); } if (byteVectorLength != Vector <byte> .Count) { throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector<byte>.Count}"); } Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero)); Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128 <float> .Zero) == 0); Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128 <ushort> .Zero, 0) == 0); Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128 <double> .Zero) == 0); Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128 <float> .Zero).Equals(Vector128 <float> .Zero)); Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null); Check("Ssse3", Sse3Group, &Ssse3IsSupported, Ssse3.IsSupported, () => Ssse3.Abs(Vector128 <short> .Zero).Equals(Vector128 <ushort> .Zero)); Check("Ssse3.X64", Sse3Group, &Ssse3X64IsSupported, Ssse3.X64.IsSupported, null); Check("Sse41", Sse4142, &Sse41IsSupported, Sse41.IsSupported, () => Sse41.Max(Vector128 <int> .Zero, Vector128 <int> .Zero).Equals(Vector128 <int> .Zero)); Check("Sse41.X64", Sse4142, &Sse41X64IsSupported, Sse41.X64.IsSupported, () => Sse41.X64.Extract(Vector128 <long> .Zero, 0) == 0); Check("Sse42", Sse4142, &Sse42IsSupported, Sse42.IsSupported, () => Sse42.Crc32(0, 0) == 0); Check("Sse42.X64", Sse4142, &Sse42X64IsSupported, Sse42.X64.IsSupported, () => Sse42.X64.Crc32(0, 0) == 0); Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128 <byte> .Zero, 0).Equals(Vector128.Create((byte)99))); Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null); Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256 <double> .Zero, Vector256 <double> .Zero).Equals(Vector256 <double> .Zero)); Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null); Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256 <int> .Zero).Equals(Vector256 <uint> .Zero)); Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null); Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0); Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0); Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0); Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0); Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128 <float> .Zero, Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero)); Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null); Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32); Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); Check("Pclmulqdq", AesLzPcl, &PclmulqdqIsSupported, Pclmulqdq.IsSupported, () => Pclmulqdq.CarrylessMultiply(Vector128 <long> .Zero, Vector128 <long> .Zero, 0).Equals(Vector128 <long> .Zero)); Check("Pclmulqdq.X64", AesLzPcl, &PclmulqdqX64IsSupported, Pclmulqdq.X64.IsSupported, null); Check("Popcnt", PopCnt, &PopcntIsSupported, Popcnt.IsSupported, () => Popcnt.PopCount(0) == 0); Check("Popcnt.X64", PopCnt, &PopcntX64IsSupported, Popcnt.X64.IsSupported, () => Popcnt.X64.PopCount(0) == 0); Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128 <int> .Zero, Vector128 <byte> .Zero, Vector128 <sbyte> .Zero).Equals(Vector128 <int> .Zero)); Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null); return(s_success ? 100 : 1); }