public static void flatten_ongpu(float[] x, int spatial, int layers, int batch, int forward, float[] output) { int size = spatial * batch * layers; var lp = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(flatten_kernel, lp, size, x, spatial, layers, batch, forward, output); }
public static void reorg_ongpu(float[] x, int w, int h, int c, int batch, int stride, int forward, float[] output) { int size = w * h * c * batch; var lp = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(reorg_kernel, lp, size, x, w, h, c, batch, stride, forward, output); }
public static void normalize_gpu(float[] x, float[] mean, float[] variance, int batch, int filters, int spatial) { var n = batch * filters * spatial; var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(normalize_kernel, lp, n, x, mean, variance, batch, filters, spatial); }
public static void fast_variance_delta_gpu(float[] x, float[] delta, float[] mean, float[] variance, int batch, int filters, int spatial, float[] varianceDelta) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(filters), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(fast_variance_delta_kernel, lp, x, delta, mean, variance, batch, filters, spatial, varianceDelta); }
public static void softmax_gpu(float[] input, int n, int offset, int groups, float temp, float[] output, int inputStart = 0, int outputStart = 0) { int inputs = n; int batch = groups; var lp = new LaunchParam(CudaUtils.cuda_gridsize(batch), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(softmax_kernel, lp, inputs, offset, batch, input, temp, output, inputStart, outputStart); }
public static void shortcut_gpu(int batch, int w1, int h1, int c1, float[] add, int w2, int h2, int c2, float[] output) { int minw = (w1 < w2) ? w1 : w2; int minh = (h1 < h2) ? h1 : h2; int minc = (c1 < c2) ? c1 : c2; int stride = w1 / w2; int sample = w2 / w1; if (stride < 1) { stride = 1; } if (sample < 1) { sample = 1; } int size = batch * minw * minh * minc; var lp = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(shortcut_kernel, lp, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, output); }
public static void fill_ongpu(int n, float alpha, float[] x, int incx, int startx = 0) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(fill_kernel, lp, n, alpha, x, incx, startx); }
public static void constrain_ongpu(int n, float alpha, float[] x, int incx) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(constrain_kernel, lp, n, alpha, x, incx); }
public static void mask_ongpu(int n, float[] x, float maskNum, float[] mask) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(mask_kernel, lp, n, x, maskNum, mask); }
public static void mult_add_into_gpu(int num, float[] a, float[] b, float[] c) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(num), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(mult_add_into_kernel, lp, num, a, b, c); }
public static void mul_ongpu(int n, float[] x, int incx, float[] y, int incy) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(mul_kernel, lp, n, x, incx, y, incy); }
private static void axpy_ongpu_offset(int n, float alpha, float[] x, float[] y, int xStart = 0, int yStart = 0) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(axpy_kernel, lp, n, alpha, x, y, xStart, yStart); }
public static void pow_ongpu(int n, float alpha, float[] x, float[] y) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(pow_kernel, lp, n, alpha, x, y); }
public static void smooth_l1_gpu(int n, float[] pred, float[] truth, float[] delta, float[] error) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(smooth_l1_kernel, lp, n, pred, truth, delta, error); }
private static void copy_ongpu_offset(int n, float[] from, float[] to, int xStart = 0, int yStart = 0) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(copy_kernel, lp, n, from, to, xStart, yStart); }
public static void gradient_array_ongpu(float[] x, int n, Activation a, float[] delta) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(gradient_array_kernel, lp, x, n, a, delta); }
public static void weighted_delta_gpu(float[] a, float[] b, float[] s, float[] da, float[] db, float[] ds, int num, float[] dc) { var lp = new LaunchParam(CudaUtils.cuda_gridsize(num), new dim3(CudaUtils.BlockSize)); Gpu.Default.Launch(weighted_delta_kernel, lp, num, a, b, s, da, db, ds, dc); }