unsafe private static void denoiseLineSse2(byte *pcurr, byte *pprev, byte *pnext, int cb) { byte *ip = pcurr, pp = pprev, np = pnext; nuint cnt = 0, end = (nuint)cb - (nuint)Vector128 <byte> .Count; var voffset = Vector128.Create((byte)0x80); var vthresh = Vector128.Create(denoiseThreshold); LoopTop: do { var vcurr = Sse2.LoadVector128(ip + cnt); var vprev = Sse2.LoadVector128(pp + cnt); var vnext = Sse2.LoadVector128(np + cnt); var vdiffp = Sse2.Or(Sse2.SubtractSaturate(vcurr, vprev), Sse2.SubtractSaturate(vprev, vcurr)); var vmaskp = Sse2.CompareEqual(Sse2.Max(vdiffp, vthresh), vthresh); var vdiffn = Sse2.Or(Sse2.SubtractSaturate(vcurr, vnext), Sse2.SubtractSaturate(vnext, vcurr)); var vmaskn = Sse2.CompareEqual(Sse2.Max(vdiffn, vthresh), vthresh); var vavgp = Sse2.Average(vcurr, vprev); var vavgn = Sse2.Average(vcurr, vnext); var voutval = Sse2.Average(HWIntrinsics.BlendVariable(vavgn, vavgp, vmaskp), HWIntrinsics.BlendVariable(vavgp, vavgn, vmaskn)); var voutmsk = Sse2.Or(vmaskp, vmaskn); voutval = Sse2.Average(voutval, HWIntrinsics.BlendVariable(voutval, Sse2.Average(vprev, vnext), Sse2.And(vmaskp, vmaskn))); var vcurrs = Sse2.Xor(vcurr, voffset).AsSByte(); var vprevs = Sse2.Xor(vprev, voffset).AsSByte(); var vnexts = Sse2.Xor(vnext, voffset).AsSByte(); var vsurlt = Sse2.And(Sse2.CompareGreaterThan(vcurrs, vprevs), Sse2.CompareGreaterThan(vcurrs, vnexts)); var vsurgt = Sse2.And(Sse2.CompareGreaterThan(vprevs, vcurrs), Sse2.CompareGreaterThan(vnexts, vcurrs)); voutmsk = Sse2.And(voutmsk, Sse2.Or(vsurlt, vsurgt).AsByte()); voutval = HWIntrinsics.BlendVariable(vcurr, voutval, voutmsk); Sse2.Store(ip + cnt, voutval); cnt += (nuint)Vector128 <byte> .Count; } while (cnt <= end); if (cnt < end + (nuint)Vector128 <byte> .Count) { cnt = end; goto LoopTop; } }
unsafe private static (uint eql, uint eqr) dedupeLineSse2(byte *pcurr, byte *pprev, byte *penc, int cb, uint bg) { byte *ip = pcurr, pp = pprev, op = penc; nuint cnt = 0, end = (nuint)cb - (nuint)Vector128 <byte> .Count; bool lfound = false; uint eql = 0u, eqr = 0u; var vbg = pp == (byte *)0 ? Vector128.Create(bg) : Vector128 <uint> .Zero; LoopTop: do { var vprev = pp != (byte *)0 ? Sse2.LoadVector128(pp + cnt).AsUInt32() : vbg; var vcurr = Sse2.LoadVector128(ip + cnt).AsUInt32(); var veq = Sse2.CompareEqual(vcurr, vprev); vcurr = HWIntrinsics.BlendVariable(vcurr, vbg, veq); Sse2.Store(op + cnt, vcurr.AsByte()); cnt += (nuint)Vector128 <byte> .Count; uint msk = (uint)Sse2.MoveMask(veq.AsByte()); if (msk == ushort.MinValue) { lfound = true; eqr = 0u; } else if (msk == ushort.MaxValue) { if (!lfound) { eql += (uint)Vector128 <uint> .Count; } eqr += (uint)Vector128 <uint> .Count; } else { msk = ~msk; if (!lfound) { eql += (uint)BitOperations.TrailingZeroCount(msk) / sizeof(uint); lfound = true; } eqr = (uint)BitOperations.LeadingZeroCount(msk) / sizeof(uint); } } while (cnt <= end); if (cnt < end + (nuint)Vector128 <byte> .Count) { uint offs = (uint)(cnt - end) / sizeof(uint); if (!lfound) { eql -= offs; } eqr -= offs; cnt = end; goto LoopTop; } return(eql, eqr); }