コード例 #1
0
            void ProcessCheckedOptimized(int x0, int y0)
            {
                var sum = 0.0f;
                var x1  = Math.Max(0, ofsx - x0);
                var x2  = Math.Min(kw1, iw1 + ofsx - x0);

                for (var j = 0; j <= kh1; j++)
                {
                    var y = Math.Max(0, Math.Min(ih1, y0 - ofsy + j));
                    var p = img.PixelAddr(x0 - ofsx, y);
                    var k = kernel.PixelAddr(0, j);

                    for (var i = 0; i < x1; i++)
                    {
                        sum += p[x1] * k[i];
                    }

                    for (var i = x1; i <= x2; i++)
                    {
                        sum += p[i] * k[i];
                    }

                    for (var i = x2 + 1; i <= kw1; i++)
                    {
                        sum += p[x2] * k[i];
                    }
                }

                res[x0, y0] = sum;
            }
コード例 #2
0
        public static void Sum_Avx(NativeImage <float> img1, NativeImage <float> img2, NativeImage <float> res)
        {
            var w8 = res.Width / 8 * 8;

            for (var j = 0; j < res.Height; j++)
            {
                var p1 = img1.PixelAddr(0, j);
                var p2 = img2.PixelAddr(0, j);
                var r  = res.PixelAddr(0, j);

                for (var i = 0; i < w8; i += 8)
                {
                    Avx.StoreAligned(r, Avx.Add(Avx.LoadAlignedVector256(p1), Avx.LoadAlignedVector256(p2)));

                    p1 += 8;
                    p2 += 8;
                    r  += 8;
                }

                for (var i = w8; i < res.Width; i++)
                {
                    *r++ = *p1++ + *p2++;
                }
            }
        }
コード例 #3
0
        public static void Rotate180_Optimized(NativeImage <float> src, NativeImage <float> dst)
        {
            var w = src.Width;

            for (var j = 0; j < src.Height; j++)
            {
                var s = src.PixelAddr(0, j);
                var d = dst.PixelAddr(src.Width, j);

                for (var i = 0; i < w; i++)
                {
                    *--d = *s++;
                }
            }
        }
コード例 #4
0
        public static void Sum_Optimized(NativeImage <float> img1, NativeImage <float> img2, NativeImage <float> res)
        {
            var w = res.Width;

            for (var j = 0; j < res.Height; j++)
            {
                var p1 = img1.PixelAddr(0, j);
                var p2 = img2.PixelAddr(0, j);
                var r  = res.PixelAddr(0, j);

                for (var i = 0; i < w; i++)
                {
                    r[i] = p1[i] + p2[i];
                }
            }
        }
コード例 #5
0
            void ProcessUncheckedVector(int x0, int y0)
            {
                var sum = Vector256 <float> .Zero;

                for (var j = 0; j <= kh1; j++)
                {
                    var s = img.PixelAddr(x0 - ofsx, y0 - ofsy + j);
                    var k = kernel.PixelAddr(0, j);

                    for (var i = 0; i <= kw1; i++)
                    {
                        sum = Avx.Add(sum, Avx.Multiply(Avx.LoadVector256(s), Avx.BroadcastScalarToVector256(k)));
                        s++;
                        k++;
                    }
                }

                Avx.Store(res.PixelAddr(x0, y0), sum);
            }
コード例 #6
0
        public static void Rotate180_Avx(NativeImage <float> src, NativeImage <float> dst)
        {
            var w8 = src.Width / 8 * 8;

            for (var j = 0; j < src.Height; j++)
            {
                var s = src.PixelAddr(0, j);
                var d = dst.PixelAddr(src.Width, j);

                for (var i = 0; i < w8; i += 8)
                {
                    var v = Avx.Permute(Avx.LoadAlignedVector256(s), 0x1b);
                    s += 8;
                    d -= 8;
                    Avx.Store(d, Avx.Permute2x128(v, v, 1));
                }

                for (var i = w8; i < src.Width; i++)
                {
                    *--d = *s++;
                }
            }
        }