public static Vector4F Add(Vector4FParam1_3 vector, float scalar) => Add(vector, Vector128.Create(scalar));
public static long _mm_cvtss_si64(Vector128 <float> value) { return(Sse.X64.ConvertToInt64(value)); }
public static Vector128 <float> _mm_cmpgt_ps(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareGreaterThan(left, right)); }
/// <summary> /// Performs SHA1 schedule update 0 /// vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) /// </summary> public static Vector128 <uint> SchedulePart1(Vector128 <uint> w0_3, Vector128 <uint> w4_7, Vector128 <uint> w8_11) { throw new PlatformNotSupportedException(); }
public void RunStructFldScenario(VectorBooleanBinaryOpTest__LessThanAllByte testClass) { var result = Vector128.LessThanAll(_fld1, _fld2); testClass.ValidateResult(_fld1, _fld2, result); }
public Add() { vector1 = Vector128.Create(0x12345678); vector2 = Vector128.Create(0x23456789); }
private void ValidateResult(Vector128 <Double> firstOp, Int32 result, [CallerMemberName] string method = "") { Double[] inArray = new Double[Op1ElementCount]; Unsafe.WriteUnaligned(ref Unsafe.As <Double, byte>(ref inArray[0]), firstOp); ValidateResult(inArray, result, method); }
public VectorArg128 Change(float f) { Vector128 <float> t = Sse.SetAllVector128(f); return(new VectorArg128(Sse.Add(t, _rgb))); }
public static float CopySign(float x, float y) { if (Sse.IsSupported || AdvSimd.IsSupported) { return(VectorMath.ConditionalSelectBitwise(Vector128.CreateScalarUnsafe(-0.0f), Vector128.CreateScalarUnsafe(y), Vector128.CreateScalarUnsafe(x)).ToScalar()); } else { return(SoftwareFallback(x, y)); }
public void Add2_Int16() { for (var left_0_0 = 0; left_0_0 < 1; left_0_0++) { var left_0_1 = (Int16)left_0_0; var left_64_0 = Vector64.Create(left_0_1); for (var left_1_0 = 0; left_1_0 < 1; left_1_0++) { var left_1_1 = (Int16)left_1_0; var left_64_1 = Vector64.Create(left_1_1); var left_128_0 = Vector128.Create(left_64_0, left_64_1); for (var left_2_0 = 0; left_2_0 < 1; left_2_0++) { var left_2_1 = (Int16)left_2_0; var left_64_2 = Vector64.Create(left_2_1); for (var left_3_0 = 0; left_3_0 < 1; left_3_0++) { var left_3_1 = (Int16)left_3_0; var left_64_3 = Vector64.Create(left_3_1); var left_128_1 = Vector128.Create(left_64_2, left_64_3); var left_256 = Vector256.Create(left_128_0, left_128_1); for (var right_0_0 = 0; right_0_0 < 1; right_0_0++) { var right_0_1 = (Int16)right_0_0; var right_64_0 = Vector64.Create(right_0_1); for (var right_1_0 = 0; right_1_0 < 1; right_1_0++) { var right_1_1 = (Int16)right_1_0; var right_64_1 = Vector64.Create(right_1_1); var right_128_0 = Vector128.Create(right_64_0, right_64_1); for (var right_2_0 = 0; right_2_0 < 1; right_2_0++) { var right_2_1 = (Int16)right_2_0; var right_64_2 = Vector64.Create(right_2_1); for (var right_3_0 = 0; right_3_0 < 1; right_3_0++) { var right_3_1 = (Int16)right_3_0; var right_64_3 = Vector64.Create(right_3_1); var right_128_1 = Vector128.Create(right_64_2, right_64_3); var right_256 = Vector256.Create(right_128_0, right_128_1); var actual = Avx2.Add(left_256, right_256); //var expected_upper0 = (UInt64)(left_0_1+left_1_1+actual_5_1+actual_4_1); //var expected_upper1 = // (expected_upper0<<0)| // (expected_upper0<<16)| // (expected_upper0<<32)| // (expected_upper0<<48); //var expected_upper2 = Vector128.Create(expected_upper1); //var expected_lower0 = (UInt64)(actual_7_1+actual_6_1+actual_5_1+actual_4_1); //var expected_lower1 = // (expected_lower0<<0)| // (expected_lower0<<16)| // (expected_lower0<<32)| // (expected_lower0<<48); //var expected_lower2 = Vector128.Create(expected_lower1); //var expected3 = Vector256.Create(expected_lower2,expected_upper2).AsInt16(); } } } } } } } } }
public VectorArg128(Vector128 <float> _rgb) { this._rgb = _rgb; }
public static Vector4F Divide(Vector4FParam1_3 dividend, float scalarDivisor) => Multiply(dividend, Vector128.Create(scalarDivisor));
public static Vector4F Multiply(Vector4FParam1_3 vector, float scalar) => Multiply(vector, Vector128.Create(scalar));
public static Vector4F Subtract(Vector4FParam1_3 vector, float scalar) => Subtract(vector, Vector128.Create(scalar));
private void ValidateResult(Vector128 <UInt64> result, UInt64 expectedLowerValue, UInt64 expectedUpperValue, [CallerMemberName] string method = "") { UInt64[] resultElements = new UInt64[ElementCount]; Unsafe.WriteUnaligned(ref Unsafe.As <UInt64, byte>(ref resultElements[0]), result); ValidateResult(resultElements, expectedLowerValue, expectedUpperValue, method); }
private static double MaxSse2(double a, double b) => Sse2.MaxScalar( Vector128.CreateScalarUnsafe(a), Vector128.CreateScalarUnsafe(b) ).ToScalar();
public override void RunStep() => vector = Vector128.Create(0x123456);
public unsafe void Serialize(ref MessagePackWriter writer, sbyte[]?value, MessagePackSerializerOptions options) { if (value == null) { writer.WriteNil(); return; } var inputLength = value.Length; writer.WriteArrayHeader(inputLength); if (inputLength == 0) { return; } fixed(sbyte *pSource = &value[0]) { var inputEnd = pSource + inputLength; var inputIterator = pSource; if (Popcnt.IsSupported) { const int ShiftCount = 4; const int Stride = 1 << ShiftCount; // We enter the SIMD mode when there are more than the Stride after alignment adjustment. if (inputLength < Stride << 1) { goto ProcessEach; } { // Make InputIterator Aligned var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator); inputLength -= offset; var offsetEnd = inputIterator + offset; while (inputIterator != offsetEnd) { writer.Write(*inputIterator++); } } fixed(byte *tablePointer = &ShuffleAndMaskTable[0]) { fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0]) { var vectorMinFixNegInt = Vector128.Create((sbyte)MessagePackRange.MinFixNegativeInt); var vectorMessagePackCodeInt8 = Vector128.Create(MessagePackCode.Int8); for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride) { var current = Sse2.LoadVector128(inputIterator); var index = unchecked ((uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vectorMinFixNegInt, current))); if (index == 0) { // When all 32 input values are in the FixNum range. var span = writer.GetSpan(Stride); Sse2.Store((sbyte *)Unsafe.AsPointer(ref span[0]), current); writer.Advance(Stride); continue; } unchecked { var index0 = (byte)index; var index1 = (byte)(index >> 8); var count0 = (int)(Popcnt.PopCount(index0) + 8); var count1 = (int)(Popcnt.PopCount(index1) + 8); var countTotal = count0 + count1; var destination = writer.GetSpan(countTotal); fixed(byte *pDestination = &destination[0]) { var tempDestination = pDestination; var shuffle0 = Sse2.LoadVector128(tablePointer + (index0 << 4)); var shuffled0 = Ssse3.Shuffle(current.AsByte(), shuffle0); var answer0 = Sse41.BlendVariable(shuffled0, vectorMessagePackCodeInt8, shuffle0); Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), tempDestination); tempDestination += count0; var shuffle1 = Sse2.LoadVector128(tablePointer + (index1 << 4)); var shift1 = Sse2.ShiftRightLogical128BitLane(current.AsByte(), 8); var shuffled1 = Ssse3.Shuffle(shift1, shuffle1); var answer1 = Sse41.BlendVariable(shuffled1, vectorMessagePackCodeInt8, shuffle1); Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tempDestination); } writer.Advance(countTotal); } } } } } ProcessEach: while (inputIterator != inputEnd) { writer.Write(*inputIterator++); } } }
public Multiply() { vector1 = Vector128.Create(0x12345678); vector2 = Vector128.Create(0x23456789); }
public unsafe void Serialize(ref MessagePackWriter writer, int[]?value, MessagePackSerializerOptions options) { if (value == null) { writer.WriteNil(); return; } var inputLength = value.Length; writer.WriteArrayHeader(inputLength); if (inputLength == 0) { return; } fixed(int *pSource = &value[0]) { var inputEnd = pSource + inputLength; var inputIterator = pSource; if (Sse41.IsSupported) { const int ShiftCount = 2; const int Stride = 1 << ShiftCount; if (inputLength < Stride << 1) { goto ProcessEach; } { // Make InputIterator Aligned var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator); // When offset is times of 4, you can adjust memory address. if ((offset & 3) == 0) { offset >>= 2; inputLength -= offset; var offsetEnd = inputIterator + offset; while (inputIterator != offsetEnd) { writer.Write(*inputIterator++); } } } fixed(byte *tablePointer = &ShuffleAndMaskTable[0]) { var countPointer = (int *)(tablePointer + CountTableOffset); fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0]) { var vectorShortMinValueM1 = Vector128.Create(short.MinValue - 1); var vectorSByteMinValueM1 = Vector128.Create(sbyte.MinValue - 1); var vectorMinFixNegIntM1 = Vector128.Create(MessagePackRange.MinFixNegativeInt - 1); var vectorSByteMaxValue = Vector128.Create((int)sbyte.MaxValue); var vectorByteMaxValue = Vector128.Create((int)byte.MaxValue); var vectorUShortMaxValue = Vector128.Create((int)ushort.MaxValue); var vectorM1M7 = Vector128.Create(-1, -7, -1, -7); var vectorIn1Range = Vector128.Create(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride) { var current = Sse2.LoadVector128(inputIterator); var isGreaterThanMinFixNegIntM1 = Sse2.CompareGreaterThan(current, vectorMinFixNegIntM1); var isGreaterThanSByteMaxValue = Sse2.CompareGreaterThan(current, vectorSByteMaxValue); if (Sse2.MoveMask(Sse2.AndNot(isGreaterThanSByteMaxValue, isGreaterThanMinFixNegIntM1).AsByte()) == 0xFFFF) { var answer = Ssse3.Shuffle(current.AsByte(), vectorIn1Range).AsUInt32(); var span = writer.GetSpan(Stride); Unsafe.As <byte, uint>(ref span[0]) = answer.GetElement(0); writer.Advance(Stride); continue; } var indexVector = Sse2.Add(isGreaterThanSByteMaxValue, isGreaterThanMinFixNegIntM1); indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorUShortMaxValue)); indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorByteMaxValue)); indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorShortMinValueM1)); indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorSByteMinValueM1)); indexVector = Sse41.MultiplyLow(indexVector, vectorM1M7); indexVector = Ssse3.HorizontalAdd(indexVector, indexVector); var index0 = indexVector.GetElement(0); var index1 = indexVector.GetElement(1); var count0 = countPointer[index0]; var count1 = countPointer[index1]; var countTotal = count0 + count1; var destination = writer.GetSpan(countTotal); fixed(byte *pDestination = &destination[0]) { var tmpDestination = pDestination; var item0 = tablePointer + (index0 << 5); var shuffle0 = Sse2.LoadVector128(item0); var shuffled0 = Ssse3.Shuffle(current.AsByte(), shuffle0); var constant0 = Sse2.LoadVector128(item0 + 16); var answer0 = Sse2.Or(shuffled0, constant0); Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), pDestination); tmpDestination += count0; var shift1 = Sse2.ShiftRightLogical128BitLane(current, 8).AsByte(); var item1 = tablePointer + (index1 << 5); var shuffle1 = Sse2.LoadVector128(item1); var shuffled1 = Ssse3.Shuffle(shift1, shuffle1); var constant1 = Sse2.LoadVector128(item1 + 16); var answer1 = Sse2.Or(shuffled1, constant1); Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tmpDestination); } writer.Advance(countTotal); } } } } ProcessEach: while (inputIterator != inputEnd) { writer.Write(*inputIterator++); } } }
/// <summary> /// Performs SHA1 hash update parity form. /// vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) /// </summary> public static Vector128 <uint> HashParity(Vector128 <uint> hash_abcd, uint hash_e, Vector128 <uint> wk) { throw new PlatformNotSupportedException(); }
public unsafe void Serialize(ref MessagePackWriter writer, float[]?value, MessagePackSerializerOptions options) { if (value == null) { writer.WriteNil(); return; } var inputLength = value.Length; writer.WriteArrayHeader(inputLength); if (inputLength == 0) { return; } // output byte[] length can be calculated from input float[] length. var outputLength = inputLength * 5; var destination = writer.GetSpan(outputLength); fixed(byte *pDestination = &destination[0]) { var outputIterator = pDestination; fixed(float *pSource = &value[0]) { var inputEnd = pSource + inputLength; var inputIterator = (uint *)pSource; if (Sse42.IsSupported) { if (inputLength < 6) { goto ProcessEach; } // Process 3 floats at once. // From 12 bytes to 15 bytes. var vectorConstant = Vector128.Create(MessagePackCode.Float32, 0, 0, 0, 0, MessagePackCode.Float32, 0, 0, 0, 0, MessagePackCode.Float32, 0, 0, 0, 0, 0); var vectorShuffle = Vector128.Create(0x80, 3, 2, 1, 0, 0x80, 7, 6, 5, 4, 0x80, 11, 10, 9, 8, 0x80); var vectorLoopLength = ((inputLength / 3) - 1) * 3; for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += 3, outputIterator += 15) { // new float[] { 1.0, -2.0, 3.5, } is byte[12] { 00, 00, 80, 3f, 00, 00, 00, c0, 00, 00, 60, 40 } in binary expression; var current = Sse2.LoadVector128((byte *)inputIterator); // Output binary should be byte[15] { ca, 3f, 80, 00, 00, ca, c0, 00, 00, 00, ca, 40, 60, 00, 00 }; Sse2.Store(outputIterator, Sse2.Or(Ssse3.Shuffle(current, vectorShuffle), vectorConstant)); } } ProcessEach: while (inputIterator != inputEnd) { // Encode float as Big Endian * outputIterator++ = MessagePackCode.Float32; var current = *inputIterator++; * outputIterator++ = (byte)(current >> 24); * outputIterator++ = (byte)(current >> 16); * outputIterator++ = (byte)(current >> 8); * outputIterator++ = (byte)current; } } } writer.Advance(outputLength); }
/// <summary> /// Performs SHA1 schedule update 1 /// vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) /// </summary> public static Vector128 <uint> SchedulePart2(Vector128 <uint> tw0_3, Vector128 <uint> w12_15) { throw new PlatformNotSupportedException(); }
public unsafe void Serialize(ref MessagePackWriter writer, double[]?value, MessagePackSerializerOptions options) { if (value == null) { writer.WriteNil(); return; } var inputLength = value.Length; writer.WriteArrayHeader(inputLength); if (inputLength == 0) { return; } var outputLength = inputLength * 9; var destination = writer.GetSpan(outputLength); fixed(byte *pDestination = &destination[0]) { var outputIterator = pDestination; fixed(double *pSource = &value[0]) { var inputEnd = pSource + inputLength; var inputIterator = (ulong *)pSource; if (Avx2.IsSupported) { const int ShiftCount = 2; const int Stride = 1 << ShiftCount; if (inputLength < Stride << 1) { goto ProcessEach; } var vectorShuffle = Vector256.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride) { // Fetch 4 doubles. var current = Avx.LoadVector256((byte *)inputIterator); // Reorder Little Endian bytes to Big Endian. var answer = Avx2.Shuffle(current, vectorShuffle).AsUInt64(); // Write 4 Big-Endian doubles. *outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(0); outputIterator += 8; *outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(1); outputIterator += 8; *outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(2); outputIterator += 8; *outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(3); outputIterator += 8; } } else if (Ssse3.IsSupported) { const int ShiftCount = 1; const int Stride = 1 << ShiftCount; if (inputLength < Stride << 1) { goto ProcessEach; } var vectorShuffle = Vector128.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride) { var current = Sse2.LoadVector128((byte *)inputIterator); var answer = Ssse3.Shuffle(current, vectorShuffle).AsUInt64(); * outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(0); outputIterator += 8; *outputIterator++ = MessagePackCode.Float64; *(ulong *)outputIterator = answer.GetElement(1); outputIterator += 8; } } ProcessEach: while (inputIterator != inputEnd) { * outputIterator++ = MessagePackCode.Float64; var current = *inputIterator++; * outputIterator++ = (byte)(current >> 56); * outputIterator++ = (byte)(current >> 48); * outputIterator++ = (byte)(current >> 40); * outputIterator++ = (byte)(current >> 32); * outputIterator++ = (byte)(current >> 24); * outputIterator++ = (byte)(current >> 16); * outputIterator++ = (byte)(current >> 8); * outputIterator++ = (byte)current; } } } writer.Advance(outputLength); }
public static bool _mm_ucomieq_ss(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareScalarUnorderedEqual(left, right)); }
public void Serialize(ref MessagePackWriter writer, bool[]?value, MessagePackSerializerOptions options) { if (value == null) { writer.WriteNil(); return; } var inputLength = value.Length; writer.WriteArrayHeader(inputLength); if (inputLength == 0) { return; } var outputLength = inputLength; fixed(bool *pSource = &value[0]) { var inputEnd = pSource + inputLength; var inputIterator = pSource; var destination = writer.GetSpan(inputLength); fixed(byte *pDestination = &destination[0]) { var outputIterator = pDestination; if (Avx2.IsSupported) { const int ShiftCount = 5; const int Stride = 1 << ShiftCount; if (inputLength < Stride << 1) { goto ProcessEach; } { // make output span align 32 var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign32(outputIterator); inputLength -= offset; var offsetEnd = inputIterator + offset; while (inputIterator != offsetEnd) { *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False; } } var vectorTrue = Vector256.Create(MessagePackCode.True).AsSByte(); var vectorLoopLength = (inputLength >> ShiftCount) << ShiftCount; for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += Stride, outputIterator += Stride) { // Load 32 bool values. var current = Avx.LoadVector256((sbyte *)inputIterator); // A value of false for the type bool is 0 for the sbyte representation. var isTrue = Avx2.CompareEqual(current, Vector256 <sbyte> .Zero); // A value of true in the SIMD context is -1 for the sbyte representation. // True is 0xc3 as MessagePackCode and false is 0xc2. // Reinterpreted as sbyte values, they are -61 and -62, respectively. // For each of the 32 true Vectors, we can add -1 to the false ones to get the answer. var answer = Avx2.Add(vectorTrue, isTrue); Avx.Store((sbyte *)outputIterator, answer); } } else if (Sse2.IsSupported) { // for older x86 cpu const int ShiftCount = 4; const int Stride = 1 << ShiftCount; if (inputLength < Stride << 1) { goto ProcessEach; } { // make output span align 16 var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(outputIterator); inputLength -= offset; var offsetEnd = inputIterator + offset; while (inputIterator != offsetEnd) { *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False; } } var vectorTrue = Vector128.Create(MessagePackCode.True).AsSByte(); var vectorLoopLength = (inputLength >> ShiftCount) << ShiftCount; for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += Stride, outputIterator += Stride) { // Load 16 bool values. var current = Sse2.LoadVector128((sbyte *)inputIterator); // A value of false for the type bool is 0 for the sbyte representation. var isTrue = Sse2.CompareEqual(current, Vector128 <sbyte> .Zero); // A value of true in the SIMD context is -1 for the sbyte representation. // True is 0xc3 as MessagePackCode and false is 0xc2. // Reinterpreted as sbyte values, they are -61 and -62, respectively. // For each of the 16 true Vectors, we can add -1 to the false ones to get the answer. var answer = Sse2.Add(vectorTrue, isTrue); Sse2.Store((sbyte *)outputIterator, answer); } } ProcessEach: while (inputIterator != inputEnd) { *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False; } } writer.Advance(outputLength); } }
public static Vector128 <float> _mm_cmpeq_ss(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareScalarEqual(left, right)); }
private void ValidateResult(Vector128 <Single> firstOp, Vector128 <Single> secondOp, Vector128 <Single> thirdOp, void *result, [CallerMemberName] string method = "") { Single[] inArray1 = new Single[Op1ElementCount]; Single[] inArray2 = new Single[Op2ElementCount]; Single[] inArray3 = new Single[Op3ElementCount]; Single[] outArray = new Single[RetElementCount]; Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray1[0]), firstOp); Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray2[0]), secondOp); Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray3[0]), thirdOp); Unsafe.CopyBlockUnaligned(ref Unsafe.As <Single, byte>(ref outArray[0]), ref Unsafe.AsRef <byte>(result), (uint)Unsafe.SizeOf <Vector128 <Single> >()); ValidateResult(inArray1, inArray2, inArray3, outArray, method); }
public static bool _mm_ucomigt_ss(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareScalarUnorderedGreaterThan(left, right)); }
private static unsafe double[] BilinearInterpol_AVX( double[] x, double[] A, double minXA, double maxXA, double[] B, double minXB, double maxXB, double weightB) { double[] z = new double[outputVectorSize]; fixed(double *pX = &x[0], pA = &A[0], pB = &B[0], pZ = &z[0]) { Vector256 <double> vWeightB = Vector256.Create(weightB); Vector256 <double> vWeightA = Vector256.Create(1 - weightB); Vector256 <double> vMinXA = Vector256.Create(minXA); Vector256 <double> vMaxXA = Vector256.Create(maxXA); Vector256 <double> vMinXB = Vector256.Create(minXB); Vector256 <double> vMaxXB = Vector256.Create(maxXB); double deltaA = (maxXA - minXA) / (double)(A.Length - 1); double deltaB = (maxXB - minXB) / (double)(B.Length - 1); Vector256 <double> vDeltaA = Vector256.Create(deltaA); Vector256 <double> vDeltaB = Vector256.Create(deltaB); double invDeltaA = 1.0 / deltaA; double invDeltaB = 1.0 / deltaB; Vector256 <double> vInvDeltaA = Vector256.Create(invDeltaA); Vector256 <double> vInvDeltaB = Vector256.Create(invDeltaB); Vector128 <int> ALengthMinusOne = Vector128.Create(A.Length - 1); Vector128 <int> BLengthMinusOne = Vector128.Create(B.Length - 1); Vector128 <int> One = Vector128.Create(1); for (var i = 0; i < x.Length; i += Vector256 <double> .Count) { Vector256 <double> currentX = Avx.LoadVector256(pX + i); // Determine the largest a, such that A[i] = f(xA) and xA <= x[i]. // This involves casting from double to int; here we use a Vector conversion. Vector256 <double> aDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXA), vInvDeltaA); Vector128 <int> a = Avx.ConvertToVector128Int32WithTruncation(aDouble); a = Sse41.Min(Sse41.Max(a, Vector128 <int> .Zero), ALengthMinusOne); Vector128 <int> aPlusOne = Sse41.Min(Sse2.Add(a, One), ALengthMinusOne); // Now, get the reference input, xA, for our index a. // This involves casting from int to double. Vector256 <double> xA = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(a), vDeltaA), vMinXA); // Now, compute the lambda for our A reference point. Vector256 <double> currentXNormA = Avx.Max(vMinXA, Avx.Min(currentX, vMaxXA)); Vector256 <double> lambdaA = Avx.Multiply(Avx.Subtract(currentXNormA, xA), vInvDeltaA); // Now, we need to load up our reference points using Vector Gather operations. Vector256 <double> AVector = Avx2.GatherVector256(pA, a, 8); Vector256 <double> AVectorPlusOne = Avx2.GatherVector256(pA, aPlusOne, 8); // Now, do the all of the above for our B reference point. Vector256 <double> bDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXB), vInvDeltaB); Vector128 <int> b = Avx.ConvertToVector128Int32WithTruncation(bDouble); b = Sse41.Min(Sse41.Max(b, Vector128 <int> .Zero), BLengthMinusOne); Vector128 <int> bPlusOne = Sse41.Min(Sse2.Add(b, One), BLengthMinusOne); Vector256 <double> xB = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(b), vDeltaB), vMinXB); Vector256 <double> currentXNormB = Avx.Max(vMinXB, Avx.Min(currentX, vMaxXB)); Vector256 <double> lambdaB = Avx.Multiply(Avx.Subtract(currentXNormB, xB), vInvDeltaB); Vector256 <double> BVector = Avx2.GatherVector256(pB, b, 8); Vector256 <double> BVectorPlusOne = Avx2.GatherVector256(pB, bPlusOne, 8); Vector256 <double> newZ = Avx.Add(Avx.Multiply(vWeightA, Avx.Add(AVector, Avx.Multiply(lambdaA, Avx.Subtract(AVectorPlusOne, AVector)))), Avx.Multiply(vWeightB, Avx.Add(BVector, Avx.Multiply(lambdaB, Avx.Subtract(BVectorPlusOne, BVector))))); Avx.Store(pZ + i, newZ); } } return(z); }