void ProcessCheckedOptimized(int x0, int y0) { var sum = 0.0f; var x1 = Math.Max(0, ofsx - x0); var x2 = Math.Min(kw1, iw1 + ofsx - x0); for (var j = 0; j <= kh1; j++) { var y = Math.Max(0, Math.Min(ih1, y0 - ofsy + j)); var p = img.PixelAddr(x0 - ofsx, y); var k = kernel.PixelAddr(0, j); for (var i = 0; i < x1; i++) { sum += p[x1] * k[i]; } for (var i = x1; i <= x2; i++) { sum += p[i] * k[i]; } for (var i = x2 + 1; i <= kw1; i++) { sum += p[x2] * k[i]; } } res[x0, y0] = sum; }
public static void Sum_Avx(NativeImage <float> img1, NativeImage <float> img2, NativeImage <float> res) { var w8 = res.Width / 8 * 8; for (var j = 0; j < res.Height; j++) { var p1 = img1.PixelAddr(0, j); var p2 = img2.PixelAddr(0, j); var r = res.PixelAddr(0, j); for (var i = 0; i < w8; i += 8) { Avx.StoreAligned(r, Avx.Add(Avx.LoadAlignedVector256(p1), Avx.LoadAlignedVector256(p2))); p1 += 8; p2 += 8; r += 8; } for (var i = w8; i < res.Width; i++) { *r++ = *p1++ + *p2++; } } }
public static void Rotate180_Optimized(NativeImage <float> src, NativeImage <float> dst) { var w = src.Width; for (var j = 0; j < src.Height; j++) { var s = src.PixelAddr(0, j); var d = dst.PixelAddr(src.Width, j); for (var i = 0; i < w; i++) { *--d = *s++; } } }
public static void Sum_Optimized(NativeImage <float> img1, NativeImage <float> img2, NativeImage <float> res) { var w = res.Width; for (var j = 0; j < res.Height; j++) { var p1 = img1.PixelAddr(0, j); var p2 = img2.PixelAddr(0, j); var r = res.PixelAddr(0, j); for (var i = 0; i < w; i++) { r[i] = p1[i] + p2[i]; } } }
void ProcessUncheckedVector(int x0, int y0) { var sum = Vector256 <float> .Zero; for (var j = 0; j <= kh1; j++) { var s = img.PixelAddr(x0 - ofsx, y0 - ofsy + j); var k = kernel.PixelAddr(0, j); for (var i = 0; i <= kw1; i++) { sum = Avx.Add(sum, Avx.Multiply(Avx.LoadVector256(s), Avx.BroadcastScalarToVector256(k))); s++; k++; } } Avx.Store(res.PixelAddr(x0, y0), sum); }
public static void Rotate180_Avx(NativeImage <float> src, NativeImage <float> dst) { var w8 = src.Width / 8 * 8; for (var j = 0; j < src.Height; j++) { var s = src.PixelAddr(0, j); var d = dst.PixelAddr(src.Width, j); for (var i = 0; i < w8; i += 8) { var v = Avx.Permute(Avx.LoadAlignedVector256(s), 0x1b); s += 8; d -= 8; Avx.Store(d, Avx.Permute2x128(v, v, 1)); } for (var i = w8; i < src.Width; i++) { *--d = *s++; } } }