Beispiel #1
0
        public static void normalize_gpu(float[] x, float[] mean, float[] variance, int batch, int filters, int spatial)
        {
            var n  = batch * filters * spatial;
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(normalize_kernel, lp, n, x, mean, variance, batch, filters, spatial);
        }
Beispiel #2
0
        public static void fast_variance_delta_gpu(float[] x, float[] delta, float[] mean, float[] variance, int batch, int filters, int spatial, float[] varianceDelta)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(filters), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(fast_variance_delta_kernel, lp, x, delta, mean, variance, batch, filters, spatial,
                               varianceDelta);
        }
Beispiel #3
0
        public static void reorg_ongpu(float[] x, int w, int h, int c, int batch, int stride, int forward, float[] output)
        {
            int size = w * h * c * batch;
            var lp   = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(reorg_kernel, lp, size, x, w, h, c, batch, stride, forward, output);
        }
Beispiel #4
0
        public static void flatten_ongpu(float[] x, int spatial, int layers, int batch, int forward, float[] output)
        {
            int size = spatial * batch * layers;
            var lp   = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(flatten_kernel, lp, size, x, spatial, layers, batch, forward, output);
        }
Beispiel #5
0
        public static void softmax_gpu(float[] input, int n, int offset, int groups, float temp, float[] output, int inputStart = 0, int outputStart = 0)
        {
            int inputs = n;
            int batch  = groups;
            var lp     = new LaunchParam(CudaUtils.cuda_gridsize(batch), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(softmax_kernel, lp, inputs, offset, batch, input, temp, output, inputStart, outputStart);
        }
Beispiel #6
0
 public static unsafe void gemm_ongpu(int ta, int tb, int m, int n, int k, float alpha,
                                      float[] a, int lda,
                                      float[] b, int ldb,
                                      float beta,
                                      float[] c, int ldc)
 {
     using (var gpuA = Gpu.Default.AllocateDevice(a.ToArray()))
         using (var gpuB = Gpu.Default.AllocateDevice(b.ToArray()))
             using (var gpuC = Gpu.Default.AllocateDevice(c.ToArray()))
             {
                 var handle = CudaUtils.blas_handle();
                 CudaUtils.SafeCall(CuBlas.cublasSgemm_v2(handle,
                                                          (tb != 0 ? cublasOperation_t.CUBLAS_OP_T : cublasOperation_t.CUBLAS_OP_N),
                                                          (ta != 0 ? cublasOperation_t.CUBLAS_OP_T : cublasOperation_t.CUBLAS_OP_N), n, m, k, &alpha, (float *)gpuB.Handle, ldb,
                                                          (float *)gpuA.Handle, lda, &beta, (float *)gpuC.Handle, ldc));
                 a = Gpu.CopyToHost(gpuA);
                 b = Gpu.CopyToHost(gpuB);
                 c = Gpu.CopyToHost(gpuC);
             }
 }
Beispiel #7
0
        public static void shortcut_gpu(int batch, int w1, int h1, int c1, float[] add, int w2, int h2, int c2, float[] output)
        {
            int minw = (w1 < w2) ? w1 : w2;
            int minh = (h1 < h2) ? h1 : h2;
            int minc = (c1 < c2) ? c1 : c2;

            int stride = w1 / w2;
            int sample = w2 / w1;

            if (stride < 1)
            {
                stride = 1;
            }
            if (sample < 1)
            {
                sample = 1;
            }

            int size = batch * minw * minh * minc;
            var lp   = new LaunchParam(CudaUtils.cuda_gridsize(size), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(shortcut_kernel, lp, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2,
                               h2, c2, output);
        }
Beispiel #8
0
        public static void weighted_delta_gpu(float[] a, float[] b, float[] s, float[] da, float[] db, float[] ds, int num, float[] dc)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(num), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(weighted_delta_kernel, lp, num, a, b, s, da, db, ds, dc);
        }
Beispiel #9
0
        public static void gradient_array_ongpu(float[] x, int n, Activation a, float[] delta)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(gradient_array_kernel, lp, x, n, a, delta);
        }
Beispiel #10
0
        public static void smooth_l1_gpu(int n, float[] pred, float[] truth, float[] delta, float[] error)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(smooth_l1_kernel, lp, n, pred, truth, delta, error);
        }
Beispiel #11
0
        public static void fill_ongpu(int n, float alpha, float[] x, int incx, int startx = 0)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(fill_kernel, lp, n, alpha, x, incx, startx);
        }
Beispiel #12
0
        public static void mask_ongpu(int n, float[] x, float maskNum, float[] mask)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(mask_kernel, lp, n, x, maskNum, mask);
        }
Beispiel #13
0
        public static void constrain_ongpu(int n, float alpha, float[] x, int incx)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(constrain_kernel, lp, n, alpha, x, incx);
        }
Beispiel #14
0
        public static void pow_ongpu(int n, float alpha, float[] x, float[] y)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(pow_kernel, lp, n, alpha, x, y);
        }
Beispiel #15
0
        public static void mult_add_into_gpu(int num, float[] a, float[] b, float[] c)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(num), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(mult_add_into_kernel, lp, num, a, b, c);
        }
Beispiel #16
0
        private static void copy_ongpu_offset(int n, float[] from, float[] to, int xStart = 0, int yStart = 0)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(copy_kernel, lp, n, from, to, xStart, yStart);
        }
Beispiel #17
0
        public static void mul_ongpu(int n, float[] x, int incx, float[] y, int incy)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(mul_kernel, lp, n, x, incx, y, incy);
        }
Beispiel #18
0
        private static void axpy_ongpu_offset(int n, float alpha, float[] x, float[] y, int xStart = 0, int yStart = 0)
        {
            var lp = new LaunchParam(CudaUtils.cuda_gridsize(n), new dim3(CudaUtils.BlockSize));

            Gpu.Default.Launch(axpy_kernel, lp, n, alpha, x, y, xStart, yStart);
        }