예제 #1
0
        unsafe void IConversionProcessor.ConvertLine(byte *istart, byte *ostart, int cb)
        {
            ushort *ip = (ushort *)istart, ipe = (ushort *)(istart + cb);
            byte *  op = ostart;

#if HWINTRINSICS
            if (HWIntrinsics.IsSupported && cb >= HWIntrinsics.VectorCount <byte>() * 2)
            {
                convertIntrinsic(ip, ipe, op);
            }
            else
#endif
            convertScalar(ip, ipe, op);
        }
        public PlanarConversionTransform(PixelSource srcY, PixelSource srcCb, PixelSource srcCr, Matrix4x4 matrix, bool videoLevels) : base(srcY)
        {
            if (srcCb.Width != srcY.Width || srcCb.Height != srcY.Height)
            {
                throw new ArgumentException("Chroma plane incorrect size", nameof(srcCb));
            }
            if (srcCr.Width != srcY.Width || srcCr.Height != srcY.Height)
            {
                throw new ArgumentException("Chroma plane incorrect size", nameof(srcCr));
            }
            if (srcCb.Format.BitsPerPixel != srcY.Format.BitsPerPixel)
            {
                throw new ArgumentException("Chroma plane incorrect format", nameof(srcCb));
            }
            if (srcCr.Format.BitsPerPixel != srcY.Format.BitsPerPixel)
            {
                throw new ArgumentException("Chroma plane incorrect format", nameof(srcCr));
            }

            matrix = matrix.InvertPrecise();
            if (matrix.IsNaN())
            {
                throw new ArgumentException("Invalid YCC matrix", nameof(matrix));
            }

            sourceCb = srcCb;
            sourceCr = srcCr;

            if (videoLevels)
            {
                matrix *= byte.MaxValue / videoChromaScale;
            }

            coeffCb0 = matrix.M23;
            coeffCb1 = matrix.M22;
            coeffCr0 = matrix.M32;
            coeffCr1 = matrix.M31;

            Format = srcY.Format == PixelFormat.Y8Bpp ? PixelFormat.Bgr24Bpp : PixelFormat.Bgrx128BppFloat;

            int bufferStride = BufferStride;

            if (HWIntrinsics.IsAvxSupported)
            {
                bufferStride = PowerOfTwoCeiling(bufferStride, HWIntrinsics.VectorCount <byte>());
            }

            lineBuff = BufferPool.Rent(bufferStride * 3, true);
        }
        public PlanarConversionTransform(PixelSource srcY, PixelSource srcCb, PixelSource srcCr, Matrix4x4 matrix, bool videoLevels) : base(srcY)
        {
            if (srcCb.Width != srcY.Width || srcCb.Height != srcY.Height)
            {
                throw new ArgumentException("Chroma plane incorrect size", nameof(srcCb));
            }
            if (srcCr.Width != srcY.Width || srcCr.Height != srcY.Height)
            {
                throw new ArgumentException("Chroma plane incorrect size", nameof(srcCr));
            }
            if (srcCb.Format.BitsPerPixel != srcY.Format.BitsPerPixel)
            {
                throw new ArgumentException("Chroma plane incorrect format", nameof(srcCb));
            }
            if (srcCr.Format.BitsPerPixel != srcY.Format.BitsPerPixel)
            {
                throw new ArgumentException("Chroma plane incorrect format", nameof(srcCr));
            }

            matrix = matrix.InvertPrecise();
            if (matrix.IsNaN())
            {
                throw new ArgumentException("Invalid YCC matrix", nameof(matrix));
            }

            sourceCb = srcCb;
            sourceCr = srcCr;

            if (videoLevels)
            {
                matrix.M22 *= byte.MaxValue / videoChromaScale;
                matrix.M23 *= byte.MaxValue / videoChromaScale;
                matrix.M31 *= byte.MaxValue / videoChromaScale;
                matrix.M32 *= byte.MaxValue / videoChromaScale;
            }

            vec0 = new Vector4(matrix.M13, matrix.M23, matrix.M33, 0f);
            vec1 = new Vector4(matrix.M12, matrix.M22, matrix.M32, 0f);
            vec2 = new Vector4(matrix.M11, matrix.M21, matrix.M31, 0f);

            Format = srcY.Format.FormatGuid == Consts.GUID_WICPixelFormat8bppY ? PixelFormat.FromGuid(Consts.GUID_WICPixelFormat24bppBGR) : PixelFormat.Bgrx128BppFloat;
            if (HWIntrinsics.IsAvxSupported)
            {
                BufferStride = PowerOfTwoCeiling(BufferStride, HWIntrinsics.VectorCount <byte>());
            }

            lineBuff = BufferPool.Rent(BufferStride * 3, true);
        }
예제 #4
0
        unsafe void IConvolver.WriteDestLine(byte *tstart, byte *ostart, int ox, int ow, byte *pmapy, int smapy)
        {
            float *op      = (float *)ostart;
            uint   tstride = (uint)smapy * channels;
            uint   vcnt    = tstride / (uint)VectorSse.Count;

            for (int xc = ox + ow; ox < xc; ox++)
            {
                uint lcnt = vcnt;

                float *tp = (float *)tstart + (uint)ox * tstride;
                float *mp = (float *)pmapy;

                VectorSse av0;

                if (Avx.IsSupported && lcnt >= 2)
                {
                    var ax0 = VectorAvx.Zero;

                    for (; lcnt >= 4; lcnt -= 4)
                    {
                        var iv0 = Avx.LoadVector256(tp);
                        var iv1 = Avx.LoadVector256(tp + VectorAvx.Count);
                        tp += VectorAvx.Count * 2;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count);
                        mp += VectorAvx.Count * 2;
                    }

                    if (lcnt >= 2)
                    {
                        lcnt -= 2;

                        var iv0 = Avx.LoadVector256(tp);
                        tp += VectorAvx.Count;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        mp += VectorAvx.Count;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax0.GetUpper());
                }
                else
                {
                    av0 = VectorSse.Zero;

                    for (; lcnt >= 2; lcnt -= 2)
                    {
                        var iv0 = Sse.LoadVector128(tp);
                        var iv1 = Sse.LoadVector128(tp + VectorSse.Count);
                        tp += VectorSse.Count * 2;

                        av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                        av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count);
                        mp += VectorSse.Count * 2;
                    }
                }

                if (lcnt != 0)
                {
                    var iv0 = Sse.LoadVector128(tp);

                    av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                }

                *op++ = av0.HorizontalAdd();
            }
        }
예제 #5
0
        unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy)
        {
            float *tp = (float *)tstart, tpe = (float *)(tstart + (uint)cb);
            uint * pmapx   = (uint *)mapxstart;
            uint   kstride = (uint)smapx * channels;
            uint   tstride = (uint)smapy * channels;
            uint   vcnt    = kstride / (uint)VectorSse.Count;

            while (tp < tpe)
            {
                uint ix   = *pmapx++;
                uint lcnt = vcnt;

                float *ip = (float *)istart + ix * channels;
                float *mp = (float *)(mapxstart + *pmapx++);

                VectorSse av0;

                if (Avx.IsSupported && lcnt >= 2)
                {
                    var ax0 = VectorAvx.Zero;

                    for (; lcnt >= 8; lcnt -= 8)
                    {
                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + VectorAvx.Count);
                        var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2);
                        var iv3 = Avx.LoadVector256(ip + VectorAvx.Count * 3);
                        ip += VectorAvx.Count * 4;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv2, mp + VectorAvx.Count * 2);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv3, mp + VectorAvx.Count * 3);
                        mp += VectorAvx.Count * 4;
                    }

                    if (lcnt >= 6)
                    {
                        lcnt -= 6;

                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + VectorAvx.Count);
                        var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2);
                        ip += VectorAvx.Count * 3;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv2, mp + VectorAvx.Count * 2);
                        mp += VectorAvx.Count * 3;
                    }
                    else if (lcnt >= 4)
                    {
                        lcnt -= 4;

                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + VectorAvx.Count);
                        ip += VectorAvx.Count * 2;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv1, mp + VectorAvx.Count);
                        mp += VectorAvx.Count * 2;
                    }
                    else if (lcnt >= 2)
                    {
                        lcnt -= 2;

                        var iv0 = Avx.LoadVector256(ip);
                        ip += VectorAvx.Count;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        mp += VectorAvx.Count;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax0.GetUpper());
                }
                else
                {
                    av0 = VectorSse.Zero;

                    for (; lcnt >= 4; lcnt -= 4)
                    {
                        var iv0 = Sse.LoadVector128(ip);
                        var iv1 = Sse.LoadVector128(ip + VectorSse.Count);
                        var iv2 = Sse.LoadVector128(ip + VectorSse.Count * 2);
                        var iv3 = Sse.LoadVector128(ip + VectorSse.Count * 3);
                        ip += VectorSse.Count * 4;

                        av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                        av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count);
                        av0 = HWIntrinsics.MultiplyAdd(av0, iv2, mp + VectorSse.Count * 2);
                        av0 = HWIntrinsics.MultiplyAdd(av0, iv3, mp + VectorSse.Count * 3);
                        mp += VectorSse.Count * 4;
                    }

                    if (lcnt >= 2)
                    {
                        lcnt -= 2;

                        var iv0 = Sse.LoadVector128(ip);
                        var iv1 = Sse.LoadVector128(ip + VectorSse.Count);
                        ip += VectorSse.Count * 2;

                        av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                        av0 = HWIntrinsics.MultiplyAdd(av0, iv1, mp + VectorSse.Count);
                        mp += VectorSse.Count * 2;
                    }
                }

                if (lcnt != 0)
                {
                    var iv0 = Sse.LoadVector128(ip);

                    av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                }

                *tp = av0.HorizontalAdd();
                tp += tstride;
            }
        }
예제 #6
0
        unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy)
        {
            float *tp = (float *)tstart, tpe = (float *)(tstart + (uint)cb);
            uint * pmapx   = (uint *)mapxstart;
            uint   kstride = (uint)smapx * channels;
            uint   tstride = (uint)smapy * 4;
            uint   vcnt    = kstride / (uint)VectorSse.Count;

            while (tp < tpe)
            {
                uint ix   = *pmapx++;
                uint lcnt = vcnt;

                float *ip = (float *)istart + ix * channels;
                float *mp = (float *)(mapxstart + *pmapx++);

                VectorSse av0, av1, av2;

                if (Avx.IsSupported && lcnt >= 6)
                {
                    var ax0 = VectorAvx.Zero;
                    var ax1 = VectorAvx.Zero;
                    var ax2 = VectorAvx.Zero;

                    for (; lcnt >= 6; lcnt -= 6)
                    {
                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + VectorAvx.Count);
                        var iv2 = Avx.LoadVector256(ip + VectorAvx.Count * 2);
                        ip += VectorAvx.Count * 3;

                        ax0 = HWIntrinsics.MultiplyAdd(ax0, iv0, mp);
                        ax1 = HWIntrinsics.MultiplyAdd(ax1, iv1, mp + VectorAvx.Count);
                        ax2 = HWIntrinsics.MultiplyAdd(ax2, iv2, mp + VectorAvx.Count * 2);
                        mp += VectorAvx.Count * 3;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper());
                    av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower());
                    av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper());
                }
                else
                {
                    av0 = av1 = av2 = VectorSse.Zero;
                }

                for (; lcnt != 0; lcnt -= 3)
                {
                    var iv0 = Sse.LoadVector128(ip);
                    var iv1 = Sse.LoadVector128(ip + VectorSse.Count);
                    var iv2 = Sse.LoadVector128(ip + VectorSse.Count * 2);
                    ip += VectorSse.Count * 3;

                    av0 = HWIntrinsics.MultiplyAdd(av0, iv0, mp);
                    av1 = HWIntrinsics.MultiplyAdd(av1, iv1, mp + VectorSse.Count);
                    av2 = HWIntrinsics.MultiplyAdd(av2, iv2, mp + VectorSse.Count * 2);
                    mp += VectorSse.Count * 3;
                }

                var avs = Sse.Add(Sse.Add(
                                      Sse.Shuffle(av0, av0, 0b_00_10_01_11),
                                      Sse.Shuffle(av1, av1, 0b_00_01_11_10)),
                                  Sse.Shuffle(av2, av2, 0b_00_11_10_01)
                                  );

                av0 = Sse.MoveLowToHigh(Sse.UnpackLow(av0, av1), av2);
                av0 = Sse.Add(av0, avs);

                Sse.Store(tp, av0);
                tp += tstride;
            }
        }
예제 #7
0
        unsafe void IConvolver.SharpenLine(byte *cstart, byte *ystart, byte *bstart, byte *ostart, int ox, int ow, float amt, float thresh, bool gamma)
        {
            float *ip = (float *)cstart + (uint)ox * channels, yp = (float *)ystart + (uint)ox, bp = (float *)bstart, op = (float *)ostart;
            float *ipe = ip + (uint)ow * channels;

            bool threshold = thresh > 0f;

            if (Avx.IsSupported && ip <= ipe - VectorAvx.Count)
            {
                var vthresh = Vector256.Create(threshold ? thresh : -1f);
                var vmsk    = Vector256.Create(0x7fffffff).AsSingle();
                var vamt    = Vector256.Create(amt);
                var vmin    = VectorAvx.Zero;

                ipe -= VectorAvx.Count;
                do
                {
                    var vd = Avx.Subtract(Avx.LoadVector256(yp), Avx.LoadVector256(bp));
                    yp += VectorAvx.Count;
                    bp += VectorAvx.Count;

                    if (threshold)
                    {
                        var sm = HWIntrinsics.AvxCompareGreaterThan(Avx.And(vd, vmsk), vthresh);
                        vd = Avx.And(vd, sm);
                    }
                    vd = Avx.Multiply(vd, vamt);

                    var v0 = Avx.LoadVector256(ip);
                    ip += VectorAvx.Count;

                    if (gamma)
                    {
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, Avx.ReciprocalSqrt(v0));
                        v0 = Avx.Add(v0, vd);
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Avx.Add(v0, vd);
                    }

                    Avx.Store(op, v0);
                    op += VectorAvx.Count;
                } while (ip <= ipe);
                ipe += VectorAvx.Count;
            }
            else if (ip <= ipe - VectorSse.Count)
            {
                var vthresh = Vector128.Create(threshold ? thresh : -1f);
                var vmsk    = Vector128.Create(0x7fffffff).AsSingle();
                var vamt    = Vector128.Create(amt);
                var vmin    = VectorSse.Zero;

                ipe -= VectorSse.Count;
                do
                {
                    var vd = Sse.Subtract(Sse.LoadVector128(yp), Sse.LoadVector128(bp));
                    yp += VectorSse.Count;
                    bp += VectorSse.Count;

                    if (threshold)
                    {
                        var sm = Sse.CompareGreaterThan(Sse.And(vd, vmsk), vthresh);
                        vd = Sse.And(vd, sm);
                    }
                    vd = Sse.Multiply(vd, vamt);

                    var v0 = Sse.LoadVector128(ip);
                    ip += VectorSse.Count;

                    if (gamma)
                    {
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, Sse.ReciprocalSqrt(v0));
                        v0 = Sse.Add(v0, vd);
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Sse.Add(v0, vd);
                    }

                    Sse.Store(op, v0);
                    op += VectorSse.Count;
                } while (ip <= ipe);
                ipe += VectorSse.Count;
            }

            float fmin = VectorSse.Zero.ToScalar();

            while (ip < ipe)
            {
                float dif = *yp++ - *bp++;
                float c0  = *ip++;

                if (!threshold || Math.Abs(dif) > thresh)
                {
                    dif *= amt;

                    if (gamma)
                    {
                        c0  = MathUtil.MaxF(c0, fmin).Sqrt();
                        c0  = MathUtil.MaxF(c0 + dif, fmin);
                        c0 *= c0;
                    }
                    else
                    {
                        c0 += dif;
                    }
                }

                *op++ = c0;
            }
        }