public void RunFldScenario() { var result = Avx.ConvertToVector256Int32WithTruncation(_fld); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var firstOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArrayPtr)); var result = Avx.ConvertToVector256Int32WithTruncation(firstOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunLclFldScenario() { var test = new SimpleUnaryOpTest__ConvertToVector256Int32WithTruncationSingle(); var result = Avx.ConvertToVector256Int32WithTruncation(test._fld); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var firstOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArrayPtr); var result = Avx.ConvertToVector256Int32WithTruncation(firstOp); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(firstOp, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { var result = Avx.ConvertToVector256Int32WithTruncation( Avx.LoadVector256((Single *)(_dataTable.inArrayPtr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Avx.ConvertToVector256Int32WithTruncation( Unsafe.Read <Vector256 <Single> >(_dataTable.inArrayPtr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); }
unsafe public static void ConvertFloat3A(byte *ipstart, byte *opstart, float *lutstart, int lutmax, int cb) { Debug.Assert(ipstart == opstart); float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb); float *lp = lutstart; #if HWINTRINSICS if (Avx2.IsSupported) { var vgmsk = Avx.BroadcastVector128ToVector256((float *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.GatherMask3x))); var vgmax = Vector256.Create((float)lutmax); var vzero = Vector256 <float> .Zero; var vfone = Vector256.Create(1f); var vione = Vector256.Create(1); ipe -= Vector256 <float> .Count; while (ip <= ipe) { var vf = Avx.Max(vzero, Avx.LoadVector256(ip)); var va = Avx.Shuffle(vf, vf, HWIntrinsics.ShuffleMaskAlpha); vf = Avx.Multiply(vf, Avx.Multiply(vgmax, Avx.Reciprocal(va))); vf = Avx.Min(vf, vgmax); var vi = Avx.ConvertToVector256Int32WithTruncation(vf); var vfi = Avx.ConvertToVector256Single(vi); var vl = Avx2.GatherMaskVector256(vfone, lp, vi, vgmsk, sizeof(float)); var vh = Avx2.GatherMaskVector256(vfone, lp, Avx2.Add(vi, vione), vgmsk, sizeof(float)); vf = HWIntrinsics.Lerp(vl, vh, Avx.Subtract(vf, vfi)); vf = Avx.Multiply(vf, va); Avx.Store(ip, vf); ip += Vector256 <float> .Count; } ipe += Vector256 <float> .Count; } #endif { var vlmax = new Vector4(lutmax); var vzero = Vector4.Zero; float famin = new Vector4(1 / 1024f).X; while (ip < ipe) { var vf = Unsafe.ReadUnaligned <Vector4>(ip); float f3 = vf.W; if (f3 < famin) { Unsafe.WriteUnaligned(ip, vzero); } else { vf = (vf * vlmax / f3).Clamp(vzero, vlmax); float f0 = vf.X; float f1 = vf.Y; float f2 = vf.Z; uint i0 = (uint)f0; uint i1 = (uint)f1; uint i2 = (uint)f2; ip[0] = Lerp(lp[i0], lp[i0 + 1], f0 - (int)i0) * f3; ip[1] = Lerp(lp[i1], lp[i1 + 1], f1 - (int)i1) * f3; ip[2] = Lerp(lp[i2], lp[i2 + 1], f2 - (int)i2) * f3; } ip += 4; } } }
unsafe public static void ConvertFloat(byte *ipstart, byte *opstart, float *lutstart, int lutmax, int cb) { Debug.Assert(ipstart == opstart); float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb); float *lp = lutstart; #if HWINTRINSICS if (Avx2.IsSupported) { var vlmax = Vector256.Create((float)lutmax); var vzero = Vector256 <float> .Zero; var vione = Vector256.Create(1); ipe -= Vector256 <float> .Count; while (ip <= ipe) { var vf = Avx.Multiply(vlmax, Avx.LoadVector256(ip)); vf = Avx.Min(Avx.Max(vzero, vf), vlmax); var vi = Avx.ConvertToVector256Int32WithTruncation(vf); var vp = Avx.ConvertToVector256Single(vi); var vl = Avx2.GatherVector256(lp, vi, sizeof(float)); var vh = Avx2.GatherVector256(lp, Avx2.Add(vi, vione), sizeof(float)); vf = HWIntrinsics.Lerp(vl, vh, Avx.Subtract(vf, vp)); Avx.Store(ip, vf); ip += Vector256 <float> .Count; } ipe += Vector256 <float> .Count; float fmin = vzero.ToScalar(), flmax = vlmax.ToScalar(); while (ip < ipe) { float f = (*ip * flmax).Clamp(fmin, flmax); uint i = (uint)f; *ip++ = Lerp(lp[i], lp[i + 1], f - i); } } else #endif { var vlmax = new Vector4(lutmax); var vzero = Vector4.Zero; ipe -= 4; while (ip <= ipe) { var vf = (Unsafe.ReadUnaligned <Vector4>(ip) * vlmax).Clamp(vzero, vlmax); float f0 = vf.X; float f1 = vf.Y; float f2 = vf.Z; float f3 = vf.W; uint i0 = (uint)f0; uint i1 = (uint)f1; uint i2 = (uint)f2; uint i3 = (uint)f3; ip[0] = Lerp(lp[i0], lp[i0 + 1], f0 - (int)i0); ip[1] = Lerp(lp[i1], lp[i1 + 1], f1 - (int)i1); ip[2] = Lerp(lp[i2], lp[i2 + 1], f2 - (int)i2); ip[3] = Lerp(lp[i3], lp[i3 + 1], f3 - (int)i3); ip += 4; } ipe += 4; float fmin = vzero.X, flmax = vlmax.X; while (ip < ipe) { float f = (*ip * flmax).Clamp(fmin, flmax); uint i = (uint)f; *ip++ = Lerp(lp[i], lp[i + 1], f - i); } } }