コード例 #1
0
ファイル: TensorOpGpu.cs プロジェクト: kapkapas/Neuro
        public override void Conv2DInputGradient(Tensor gradient, Tensor rotKernels, int stride, int paddingX, int paddingY, Tensor inputGradients)
        {
            GpuShape[] shapes = new[] { new GpuShape(gradient.Shape),
                                        new GpuShape(rotKernels.Shape),
                                        new GpuShape(inputGradients.Shape),
                                        new GpuShape(rotKernels.Width, rotKernels.Height, 1, rotKernels.BatchSize) };

            float[]    devGradient   = Gpu.CopyToDevice(gradient.Values);
            float[]    devRotKernels = Gpu.CopyToDevice(rotKernels.Values);
            GpuShape[] devShapes     = Gpu.CopyToDevice(shapes);

            int threadsRequiredPerResultElem = rotKernels.BatchSize * rotKernels.Height * rotKernels.Width;

            float[,] resultPartials    = new float[inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)];
            float[,] devResultPartials = Gpu.Allocate(resultPartials);

            // simulate
            //GpuConv2DInputGradient(GetSimulatedThread(blockSize, new dim3(bx, by, bz), new dim3(tx, ty, tz)), gradient.Values, rotKernels.Values, resultPartials, shapes, paddingX, paddingY, stride);

            Gpu.Launch(new dim3(inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)), THREADS_PER_BLOCK).GpuConv2DInputGradient(devGradient, devRotKernels, devResultPartials, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResultPartials, resultPartials);

            Gpu.FreeAll();

            for (int k = 0; k < resultPartials.GetLength(0); ++k)
            {
                for (int partialId = 0; partialId < resultPartials.GetLength(1); ++partialId)
                {
                    inputGradients.Values[k] += resultPartials[k, partialId];
                }
            }
        }
コード例 #2
0
ファイル: TensorOpGpu.cs プロジェクト: kapkapas/Neuro
        //public override void Add(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuAdd(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Sub(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuSub(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Mul(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.BatchSize * t1.Depth * t1.Height * t2.Width;
        //    GpuShape[] shapes = new [] { new GpuShape(t1.Shape), new GpuShape(t2.Shape), new GpuShape(result.Shape) };

        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);
        //    GpuShape[] devShapes = Gpu.CopyToDevice(shapes);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuMul(devT1, devT2, devResult, devShapes);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        public override void Conv2D(Tensor t, Tensor kernels, int stride, int paddingX, int paddingY, Tensor result)
        {
            int threadsRequired = t.BatchSize * kernels.BatchSize * result.Width * result.Height;

            GpuShape[] shapes = new[] { new GpuShape(t.Shape), new GpuShape(kernels.Shape), new GpuShape(result.Shape) };

            float[]    devT       = Gpu.CopyToDevice(t.Values);
            float[]    devKernels = Gpu.CopyToDevice(kernels.Values);
            float[]    devResult  = Gpu.Allocate(result.Values);
            GpuShape[] devShapes  = Gpu.CopyToDevice(shapes);

            Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuConv2D(devT, devKernels, devResult, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResult, result.Values);
            Gpu.FreeAll();
        }