unsafe void IConversionProcessor.ConvertLine(byte *istart, byte *ostart, int cb) { ushort *ip = (ushort *)istart, ipe = (ushort *)(istart + cb); byte * op = ostart; #if HWINTRINSICS if (HWIntrinsics.IsSupported && cb >= HWIntrinsics.VectorCount <byte>() * 2) { convertIntrinsic(ip, ipe, op); } else #endif convertScalar(ip, ipe, op); }
public PlanarConversionTransform(PixelSource srcY, PixelSource srcCb, PixelSource srcCr, Matrix4x4 matrix, bool videoLevels) : base(srcY) { if (srcCb.Width != srcY.Width || srcCb.Height != srcY.Height) { throw new ArgumentException("Chroma plane incorrect size", nameof(srcCb)); } if (srcCr.Width != srcY.Width || srcCr.Height != srcY.Height) { throw new ArgumentException("Chroma plane incorrect size", nameof(srcCr)); } if (srcCb.Format.BitsPerPixel != srcY.Format.BitsPerPixel) { throw new ArgumentException("Chroma plane incorrect format", nameof(srcCb)); } if (srcCr.Format.BitsPerPixel != srcY.Format.BitsPerPixel) { throw new ArgumentException("Chroma plane incorrect format", nameof(srcCr)); } matrix = matrix.InvertPrecise(); if (matrix.IsNaN()) { throw new ArgumentException("Invalid YCC matrix", nameof(matrix)); } sourceCb = srcCb; sourceCr = srcCr; if (videoLevels) { matrix *= byte.MaxValue / videoChromaScale; } coeffCb0 = matrix.M23; coeffCb1 = matrix.M22; coeffCr0 = matrix.M32; coeffCr1 = matrix.M31; Format = srcY.Format == PixelFormat.Y8Bpp ? PixelFormat.Bgr24Bpp : PixelFormat.Bgrx128BppFloat; int bufferStride = BufferStride; if (HWIntrinsics.IsAvxSupported) { bufferStride = PowerOfTwoCeiling(bufferStride, HWIntrinsics.VectorCount <byte>()); } lineBuff = BufferPool.Rent(bufferStride * 3, true); }
public PlanarConversionTransform(PixelSource srcY, PixelSource srcCb, PixelSource srcCr, Matrix4x4 matrix, bool videoLevels) : base(srcY) { if (srcCb.Width != srcY.Width || srcCb.Height != srcY.Height) { throw new ArgumentException("Chroma plane incorrect size", nameof(srcCb)); } if (srcCr.Width != srcY.Width || srcCr.Height != srcY.Height) { throw new ArgumentException("Chroma plane incorrect size", nameof(srcCr)); } if (srcCb.Format.BitsPerPixel != srcY.Format.BitsPerPixel) { throw new ArgumentException("Chroma plane incorrect format", nameof(srcCb)); } if (srcCr.Format.BitsPerPixel != srcY.Format.BitsPerPixel) { throw new ArgumentException("Chroma plane incorrect format", nameof(srcCr)); } matrix = matrix.InvertPrecise(); if (matrix.IsNaN()) { throw new ArgumentException("Invalid YCC matrix", nameof(matrix)); } sourceCb = srcCb; sourceCr = srcCr; if (videoLevels) { matrix.M22 *= byte.MaxValue / videoChromaScale; matrix.M23 *= byte.MaxValue / videoChromaScale; matrix.M31 *= byte.MaxValue / videoChromaScale; matrix.M32 *= byte.MaxValue / videoChromaScale; } vec0 = new Vector4(matrix.M13, matrix.M23, matrix.M33, 0f); vec1 = new Vector4(matrix.M12, matrix.M22, matrix.M32, 0f); vec2 = new Vector4(matrix.M11, matrix.M21, matrix.M31, 0f); Format = srcY.Format.FormatGuid == Consts.GUID_WICPixelFormat8bppY ? PixelFormat.FromGuid(Consts.GUID_WICPixelFormat24bppBGR) : PixelFormat.Bgrx128BppFloat; if (HWIntrinsics.IsAvxSupported) { BufferStride = PowerOfTwoCeiling(BufferStride, HWIntrinsics.VectorCount <byte>()); } lineBuff = BufferPool.Rent(BufferStride * 3, true); }
unsafe void IConvolver.WriteDestLine(byte *tstart, byte *ostart, int ox, int ow, byte *pmapy, int smapy) { float *op = (float *)ostart; uint tstride = (uint)smapy * channels; uint vcnt = tstride / (uint)VectorSse.Count; for (int xc = ox + ow; ox < xc; ox++) { uint lcnt = vcnt; float *tp = (float *)tstart + (uint)ox * tstride; float *mp = (float *)pmapy; VectorSse av0; if (Avx.IsSupported && lcnt >= 2) { var ax0 = VectorAvx.Zero; for (; lcnt >= 4; lcnt -= 4) { var iv0 = Avx.LoadVector256(tp); var iv1 = Avx.LoadVector256(tp + VectorAvx.Count); tp += VectorAvx.Count * 2; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count); mp += VectorAvx.Count * 2; } if (lcnt >= 2) { lcnt -= 2; var iv0 = Avx.LoadVector256(tp); tp += VectorAvx.Count; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); mp += VectorAvx.Count; } av0 = Sse.Add(ax0.GetLower(), ax0.GetUpper()); } else { av0 = VectorSse.Zero; for (; lcnt >= 2; lcnt -= 2) { var iv0 = Sse.LoadVector128(tp); var iv1 = Sse.LoadVector128(tp + VectorSse.Count); tp += VectorSse.Count * 2; av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count); mp += VectorSse.Count * 2; } } if (lcnt != 0) { var iv0 = Sse.LoadVector128(tp); av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); } *op++ = av0.HorizontalAdd(); } }
unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy) { float *tp = (float *)tstart, tpe = (float *)(tstart + (uint)cb); uint * pmapx = (uint *)mapxstart; uint kstride = (uint)smapx * channels; uint tstride = (uint)smapy * channels; uint vcnt = kstride / (uint)VectorSse.Count; while (tp < tpe) { uint ix = *pmapx++; uint lcnt = vcnt; float *ip = (float *)istart + ix * channels; float *mp = (float *)(mapxstart + *pmapx++); VectorSse av0; if (Avx.IsSupported && lcnt >= 2) { var ax0 = VectorAvx.Zero; for (; lcnt >= 8; lcnt -= 8) { var iv0 = Avx.LoadVector256(ip); var iv1 = Avx.LoadVector256(ip + VectorAvx.Count); var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2); var iv3 = Avx.LoadVector256(ip + VectorAvx.Count * 3); ip += VectorAvx.Count * 4; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv2, mp + VectorAvx.Count * 2); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv3, mp + VectorAvx.Count * 3); mp += VectorAvx.Count * 4; } if (lcnt >= 6) { lcnt -= 6; var iv0 = Avx.LoadVector256(ip); var iv1 = Avx.LoadVector256(ip + VectorAvx.Count); var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2); ip += VectorAvx.Count * 3; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv2, mp + VectorAvx.Count * 2); mp += VectorAvx.Count * 3; } else if (lcnt >= 4) { lcnt -= 4; var iv0 = Avx.LoadVector256(ip); var iv1 = Avx.LoadVector256(ip + VectorAvx.Count); ip += VectorAvx.Count * 2; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count); mp += VectorAvx.Count * 2; } else if (lcnt >= 2) { lcnt -= 2; var iv0 = Avx.LoadVector256(ip); ip += VectorAvx.Count; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); mp += VectorAvx.Count; } av0 = Sse.Add(ax0.GetLower(), ax0.GetUpper()); } else { av0 = VectorSse.Zero; for (; lcnt >= 4; lcnt -= 4) { var iv0 = Sse.LoadVector128(ip); var iv1 = Sse.LoadVector128(ip + VectorSse.Count); var iv2 = Sse.LoadVector128(ip + VectorSse.Count * 2); var iv3 = Sse.LoadVector128(ip + VectorSse.Count * 3); ip += VectorSse.Count * 4; av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count); av0 = HWIntrinsics.MultiplyAdd(av0, iv2, mp + VectorSse.Count * 2); av0 = HWIntrinsics.MultiplyAdd(av0, iv3, mp + VectorSse.Count * 3); mp += VectorSse.Count * 4; } if (lcnt >= 2) { lcnt -= 2; var iv0 = Sse.LoadVector128(ip); var iv1 = Sse.LoadVector128(ip + VectorSse.Count); ip += VectorSse.Count * 2; av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count); mp += VectorSse.Count * 2; } } if (lcnt != 0) { var iv0 = Sse.LoadVector128(ip); av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); } *tp = av0.HorizontalAdd(); tp += tstride; } }
unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy) { float *tp = (float *)tstart, tpe = (float *)(tstart + (uint)cb); uint * pmapx = (uint *)mapxstart; uint kstride = (uint)smapx * channels; uint tstride = (uint)smapy * 4; uint vcnt = kstride / (uint)VectorSse.Count; while (tp < tpe) { uint ix = *pmapx++; uint lcnt = vcnt; float *ip = (float *)istart + ix * channels; float *mp = (float *)(mapxstart + *pmapx++); VectorSse av0, av1, av2; if (Avx.IsSupported && lcnt >= 6) { var ax0 = VectorAvx.Zero; var ax1 = VectorAvx.Zero; var ax2 = VectorAvx.Zero; for (; lcnt >= 6; lcnt -= 6) { var iv0 = Avx.LoadVector256(ip); var iv1 = Avx.LoadVector256(ip + VectorAvx.Count); var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2); ip += VectorAvx.Count * 3; ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp); ax1 = HWIntrinsics.MultiplyAdd(ax1, iv1, mp + VectorAvx.Count); ax2 = HWIntrinsics.MultiplyAdd(ax2, iv2, mp + VectorAvx.Count * 2); mp += VectorAvx.Count * 3; } av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper()); av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower()); av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper()); } else { av0 = av1 = av2 = VectorSse.Zero; } for (; lcnt != 0; lcnt -= 3) { var iv0 = Sse.LoadVector128(ip); var iv1 = Sse.LoadVector128(ip + VectorSse.Count); var iv2 = Sse.LoadVector128(ip + VectorSse.Count * 2); ip += VectorSse.Count * 3; av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp); av1 = HWIntrinsics.MultiplyAdd(av1, iv1, mp + VectorSse.Count); av2 = HWIntrinsics.MultiplyAdd(av2, iv2, mp + VectorSse.Count * 2); mp += VectorSse.Count * 3; } var avs = Sse.Add(Sse.Add( Sse.Shuffle(av0, av0, 0b_00_10_01_11), Sse.Shuffle(av1, av1, 0b_00_01_11_10)), Sse.Shuffle(av2, av2, 0b_00_11_10_01) ); av0 = Sse.MoveLowToHigh(Sse.UnpackLow(av0, av1), av2); av0 = Sse.Add(av0, avs); Sse.Store(tp, av0); tp += tstride; } }
unsafe void IConvolver.SharpenLine(byte *cstart, byte *ystart, byte *bstart, byte *ostart, int ox, int ow, float amt, float thresh, bool gamma) { float *ip = (float *)cstart + (uint)ox * channels, yp = (float *)ystart + (uint)ox, bp = (float *)bstart, op = (float *)ostart; float *ipe = ip + (uint)ow * channels; bool threshold = thresh > 0f; if (Avx.IsSupported && ip <= ipe - VectorAvx.Count) { var vthresh = Vector256.Create(threshold ? thresh : -1f); var vmsk = Vector256.Create(0x7fffffff).AsSingle(); var vamt = Vector256.Create(amt); var vmin = VectorAvx.Zero; ipe -= VectorAvx.Count; do { var vd = Avx.Subtract(Avx.LoadVector256(yp), Avx.LoadVector256(bp)); yp += VectorAvx.Count; bp += VectorAvx.Count; if (threshold) { var sm = HWIntrinsics.AvxCompareGreaterThan(Avx.And(vd, vmsk), vthresh); vd = Avx.And(vd, sm); } vd = Avx.Multiply(vd, vamt); var v0 = Avx.LoadVector256(ip); ip += VectorAvx.Count; if (gamma) { v0 = Avx.Max(v0, vmin); v0 = Avx.Multiply(v0, Avx.ReciprocalSqrt(v0)); v0 = Avx.Add(v0, vd); v0 = Avx.Max(v0, vmin); v0 = Avx.Multiply(v0, v0); } else { v0 = Avx.Add(v0, vd); } Avx.Store(op, v0); op += VectorAvx.Count; } while (ip <= ipe); ipe += VectorAvx.Count; } else if (ip <= ipe - VectorSse.Count) { var vthresh = Vector128.Create(threshold ? thresh : -1f); var vmsk = Vector128.Create(0x7fffffff).AsSingle(); var vamt = Vector128.Create(amt); var vmin = VectorSse.Zero; ipe -= VectorSse.Count; do { var vd = Sse.Subtract(Sse.LoadVector128(yp), Sse.LoadVector128(bp)); yp += VectorSse.Count; bp += VectorSse.Count; if (threshold) { var sm = Sse.CompareGreaterThan(Sse.And(vd, vmsk), vthresh); vd = Sse.And(vd, sm); } vd = Sse.Multiply(vd, vamt); var v0 = Sse.LoadVector128(ip); ip += VectorSse.Count; if (gamma) { v0 = Sse.Max(v0, vmin); v0 = Sse.Multiply(v0, Sse.ReciprocalSqrt(v0)); v0 = Sse.Add(v0, vd); v0 = Sse.Max(v0, vmin); v0 = Sse.Multiply(v0, v0); } else { v0 = Sse.Add(v0, vd); } Sse.Store(op, v0); op += VectorSse.Count; } while (ip <= ipe); ipe += VectorSse.Count; } float fmin = VectorSse.Zero.ToScalar(); while (ip < ipe) { float dif = *yp++ - *bp++; float c0 = *ip++; if (!threshold || Math.Abs(dif) > thresh) { dif *= amt; if (gamma) { c0 = MathUtil.MaxF(c0, fmin).Sqrt(); c0 = MathUtil.MaxF(c0 + dif, fmin); c0 *= c0; } else { c0 += dif; } } *op++ = c0; } }