public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse41.RoundToNearestInteger(test._fld1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var firstOp = Unsafe.Read <Vector128 <Single> >(_dataTable.inArrayPtr); var result = Sse41.RoundToNearestInteger(firstOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var firstOp = Sse.LoadAlignedVector128((Single *)(_dataTable.inArrayPtr)); var result = Sse41.RoundToNearestInteger(firstOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleUnaryOpTest__RoundToNearestIntegerSingle(); var result = Sse41.RoundToNearestInteger(test._fld); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Sse2.LoadVector128((Double *)(_dataTable.inArray1Ptr)); var result = Sse41.RoundToNearestInteger(op1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Sse41.RoundToNearestInteger( Sse.LoadVector128((Single *)(_dataTable.inArrayPtr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.RoundToNearestInteger( Unsafe.Read <Vector128 <Double> >(_dataTable.inArray1Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); var result = Sse41.RoundToNearestInteger( _clsVar ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleUnaryOpTest__RoundToNearestIntegerDouble testClass) { fixed(Vector128 <Double> *pFld1 = &_fld1) { var result = Sse41.RoundToNearestInteger( Sse2.LoadVector128((Double *)(pFld1)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Double> *pFld1 = &_fld1) { var result = Sse41.RoundToNearestInteger( Sse2.LoadVector128((Double *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _dataTable.outArrayPtr); } }
public static Vector256 <double> Round(Vector256 <double> vector) { if (Avx.IsSupported) { return(Avx.RoundToNearestInteger(vector)); } if (Sse41.IsSupported) { GetLowHigh(vector, out var low, out var high); return(FromLowHigh(Sse41.RoundToNearestInteger(low), Sse41.RoundToNearestInteger(high))); } return(SoftwareFallback(vector));
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Single> *pClsVar1 = &_clsVar1) { var result = Sse41.RoundToNearestInteger( Sse.LoadVector128((Single *)(pClsVar1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _dataTable.outArrayPtr); } }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleUnaryOpTest__RoundToNearestIntegerSingle(); fixed(Vector128 <Single> *pFld1 = &test._fld1) { var result = Sse41.RoundToNearestInteger( Sse.LoadVector128((Single *)(pFld1)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, _dataTable.outArrayPtr); } }
public static f32 Round_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToNearestInteger(a)); } else { unchecked { f32 aSign = And(a, Broad_i32((int)0x80000000).AsSingle()); f32 v = Add(a, Or(aSign, Broad_f32(0.5f))); return(Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(v))); } } }
/// <summary> /// Calculate "distance" of cloud at determined pose /// TODO - It's actually slower than SISD. Need more parallelism. /// </summary> /// <param name="cloud">Cloud of points</param> /// <param name="pose">Pose of cloud</param> /// <returns></returns> private int CalculateDistanceSSE41(ScanCloud cloud, Vector3 pose) { int nb_points = 0; long sum = 0; float px = pose.X * HoleMap.Scale; float py = pose.Y * HoleMap.Scale; float c = MathF.Cos(pose.Z) * HoleMap.Scale; float s = MathF.Sin(pose.Z) * HoleMap.Scale; Vector128 <float> sincos = Vector128.Create(c, -s, s, c); Vector128 <float> posxy = Vector128.Create(px, py, px, py); // Translate and rotate scan to robot position and compute the "distance" for (int i = 0; i < cloud.Points.Count; i++) { Vector128 <float> xy = Vector128.Create(cloud.Points[i].X, cloud.Points[i].Y, cloud.Points[i].X, cloud.Points[i].Y); xy = Sse41.Multiply(sincos, xy); xy = Sse41.HorizontalAdd(xy, xy); xy = Sse41.Add(xy, posxy); xy = Sse41.RoundToNearestInteger(xy); int x = (int)xy.GetElement(0); int y = (int)xy.GetElement(1); // Check boundaries if ((x >= 0) && (x < HoleMap.Size) && (y >= 0) && (y < HoleMap.Size)) { sum += HoleMap.Pixels[y * HoleMap.Size + x]; nb_points++; } } if (nb_points > 0) { return((int)((sum * 1024) / cloud.Points.Count)); } else { return(int.MaxValue); } }
private unsafe static void ResampleDefaultQuality(Span <float> outputBuffer, ReadOnlySpan <short> inputBuffer, float ratio, ref float fraction, int sampleCount, bool needPitch) { ReadOnlySpan <float> parameters = GetDefaultParameter(ratio); int inputBufferIndex = 0, i = 0; // TODO: REV8 fast path (when needPitch == false the input index progression is constant + we need SIMD) if (Sse41.IsSupported) { if (ratio == 1f) { fixed(short *pInput = inputBuffer) { fixed(float *pOutput = outputBuffer, pParameters = parameters) { Vector128 <float> parameter = Sse.LoadVector128(pParameters); for (; i < (sampleCount & ~3); i += 4) { Vector128 <int> intInput0 = Sse41.ConvertToVector128Int32(pInput + (uint)i); Vector128 <int> intInput1 = Sse41.ConvertToVector128Int32(pInput + (uint)i + 1); Vector128 <int> intInput2 = Sse41.ConvertToVector128Int32(pInput + (uint)i + 2); Vector128 <int> intInput3 = Sse41.ConvertToVector128Int32(pInput + (uint)i + 3); Vector128 <float> input0 = Sse2.ConvertToVector128Single(intInput0); Vector128 <float> input1 = Sse2.ConvertToVector128Single(intInput1); Vector128 <float> input2 = Sse2.ConvertToVector128Single(intInput2); Vector128 <float> input3 = Sse2.ConvertToVector128Single(intInput3); Vector128 <float> mix0 = Sse.Multiply(input0, parameter); Vector128 <float> mix1 = Sse.Multiply(input1, parameter); Vector128 <float> mix2 = Sse.Multiply(input2, parameter); Vector128 <float> mix3 = Sse.Multiply(input3, parameter); Vector128 <float> mix01 = Sse3.HorizontalAdd(mix0, mix1); Vector128 <float> mix23 = Sse3.HorizontalAdd(mix2, mix3); Vector128 <float> mix0123 = Sse3.HorizontalAdd(mix01, mix23); Sse.Store(pOutput + (uint)i, Sse41.RoundToNearestInteger(mix0123)); } } } inputBufferIndex = i; } else { fixed(short *pInput = inputBuffer) { fixed(float *pOutput = outputBuffer, pParameters = parameters) { for (; i < (sampleCount & ~3); i += 4) { uint baseIndex0 = (uint)(fraction * 128) * 4; uint inputIndex0 = (uint)inputBufferIndex; fraction += ratio; uint baseIndex1 = ((uint)(fraction * 128) & 127) * 4; uint inputIndex1 = (uint)inputBufferIndex + (uint)fraction; fraction += ratio; uint baseIndex2 = ((uint)(fraction * 128) & 127) * 4; uint inputIndex2 = (uint)inputBufferIndex + (uint)fraction; fraction += ratio; uint baseIndex3 = ((uint)(fraction * 128) & 127) * 4; uint inputIndex3 = (uint)inputBufferIndex + (uint)fraction; fraction += ratio; inputBufferIndex += (int)fraction; // Only keep lower part (safe as fraction isn't supposed to be negative) fraction -= (int)fraction; Vector128 <float> parameter0 = Sse.LoadVector128(pParameters + baseIndex0); Vector128 <float> parameter1 = Sse.LoadVector128(pParameters + baseIndex1); Vector128 <float> parameter2 = Sse.LoadVector128(pParameters + baseIndex2); Vector128 <float> parameter3 = Sse.LoadVector128(pParameters + baseIndex3); Vector128 <int> intInput0 = Sse41.ConvertToVector128Int32(pInput + inputIndex0); Vector128 <int> intInput1 = Sse41.ConvertToVector128Int32(pInput + inputIndex1); Vector128 <int> intInput2 = Sse41.ConvertToVector128Int32(pInput + inputIndex2); Vector128 <int> intInput3 = Sse41.ConvertToVector128Int32(pInput + inputIndex3); Vector128 <float> input0 = Sse2.ConvertToVector128Single(intInput0); Vector128 <float> input1 = Sse2.ConvertToVector128Single(intInput1); Vector128 <float> input2 = Sse2.ConvertToVector128Single(intInput2); Vector128 <float> input3 = Sse2.ConvertToVector128Single(intInput3); Vector128 <float> mix0 = Sse.Multiply(input0, parameter0); Vector128 <float> mix1 = Sse.Multiply(input1, parameter1); Vector128 <float> mix2 = Sse.Multiply(input2, parameter2); Vector128 <float> mix3 = Sse.Multiply(input3, parameter3); Vector128 <float> mix01 = Sse3.HorizontalAdd(mix0, mix1); Vector128 <float> mix23 = Sse3.HorizontalAdd(mix2, mix3); Vector128 <float> mix0123 = Sse3.HorizontalAdd(mix01, mix23); Sse.Store(pOutput + (uint)i, Sse41.RoundToNearestInteger(mix0123)); } } } } } for (; i < sampleCount; i++) { int baseIndex = (int)(fraction * 128) * 4; ReadOnlySpan <float> parameter = parameters.Slice(baseIndex, 4); ReadOnlySpan <short> currentInput = inputBuffer.Slice(inputBufferIndex, 4); outputBuffer[i] = (float)Math.Round(currentInput[0] * parameter[0] + currentInput[1] * parameter[1] + currentInput[2] * parameter[2] + currentInput[3] * parameter[3]); fraction += ratio; inputBufferIndex += (int)fraction; // Only keep lower part (safe as fraction isn't supposed to be negative) fraction -= (int)fraction; } }
public static Vector128 <float> _mm_round_ps(Vector128 <float> value) { return(Sse41.RoundToNearestInteger(value)); }
public static Vector128 <double> _mm_round_pd(Vector128 <double> value) { return(Sse41.RoundToNearestInteger(value)); }