public unsafe void FullyConnectedBackwardData()
        {
            FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian);
            Tensor dy = CreateRandomTensor(400, fc.OutputInfo.Size);

            fixed(float *pw = fc.Weights, pb = fc.Biases)
            {
                Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w);
                Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b);
                Tensor.New(dy.Entities, fc.InputInfo.Size, out Tensor dx1);
                CpuDnn.FullyConnectedBackwardData(w, dy, dx1);
                Gpu gpu = Gpu.Default;

                using (DeviceMemory <float>
                       dy_gpu = gpu.AllocateDevice(dy),
                       w_gpu = gpu.AllocateDevice(w),
                       dx_gpu = gpu.AllocateDevice <float>(dx1.Size))
                {
                    Dnn.Get(gpu).FullyConnectedBackwardData(dy.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr);
                    dx_gpu.CopyToHost(dx1.Entities, dx1.Length, out Tensor dx2);
                    Assert.IsTrue(dx1.ContentEquals(dx2));
                    Tensor.Free(dy, dx1, dx2);
                }
            }
        }
Beispiel #2
0
        public static void Alea(
            Gpu gpu,
            Real[] mIntraReturn,
            Real[] vClose,
            Real[] vIsAlive,
            Real[] vIsValidDay,
            int m,
            int n)
        {
            using (var cudaIntraReturn = gpu.AllocateDevice(mIntraReturn))
                using (var cudaClose = gpu.AllocateDevice(vClose))
                    using (var cudaIsAlive = gpu.AllocateDevice(vIsAlive))
                        using (var cudaIsValidDay = gpu.AllocateDevice(vIsValidDay))
                        {
                            var timer = Stopwatch.StartNew();

                            var gridSizeX = Util.DivUp(n, 32);
                            var gridSizeY = Util.DivUp(m, 8);
                            var lp        = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8));

                            gpu.Launch(AleaKernel, lp, cudaIntraReturn.Ptr, cudaClose.Ptr, cudaIsAlive.Ptr, cudaIsValidDay.Ptr, m, n);

                            gpu.Synchronize();
                            Util.PrintPerformance(timer, "IntraReturn.Alea", 5, m, n);

                            Gpu.Copy(cudaIntraReturn, mIntraReturn);
                        }
        }
Beispiel #3
0
            /// <summary>
            /// Checks whether or not the Cuda features are currently supported
            /// </summary>
            public static bool IsGpuAccelerationSupported()
            {
                try
                {
                    // CUDA test
                    Gpu gpu = Gpu.Default;
                    if (gpu == null)
                    {
                        return(false);
                    }
                    if (!Dnn.IsAvailable)
                    {
                        return(false);                  // cuDNN
                    }
                    using (DeviceMemory <float> sample_gpu = gpu.AllocateDevice <float>(1024))
                    {
                        deviceptr <float> ptr = sample_gpu.Ptr;
                        void Kernel(int i) => ptr[i] = i;

                        Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test
                        float[] sample = Gpu.CopyToHost(sample_gpu);
                        return(Enumerable.Range(0, 1024).Select <int, float>(i => i).ToArray().ContentEquals(sample));
                    }
                }
                catch
                {
                    // Missing .dll or other errors
                    return(false);
                }
            }
        public void ActivationForward()
        {
            Tensor x = CreateRandomTensor(400, 1200);

            Tensor.Like(x, out Tensor y1);
            CpuDnn.ActivationForward(x, ActivationFunctions.Sigmoid, y1);
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float>
                   x_gpu = gpu.AllocateDevice(x),
                   y_gpu = gpu.AllocateDevice <float>(x.Size))
            {
                Dnn.Get(gpu).ActivationForward(x.Entities, x.Length, x_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Sigmoid);
                y_gpu.CopyToHost(y1.Entities, y1.Length, out Tensor y2);
                Assert.IsTrue(y1.ContentEquals(y2));
                Tensor.Free(x, y1, y2);
            }
        }
Beispiel #5
0
        public static void Alea(Gpu gpu, Real[] matrix, Real[] vector, int m, int n)
        {
            using (var cudaMatrix = gpu.AllocateDevice(matrix))
                using (var cudaVector = gpu.AllocateDevice(vector))
                {
                    var timer = Stopwatch.StartNew();

                    var gridSizeX = Util.DivUp(n, 32);
                    var gridSizeY = Util.DivUp(m, 8);
                    var lp        = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8));

                    gpu.Launch(AleaKernel, lp, cudaMatrix.Ptr, cudaVector.Ptr, m, n);

                    gpu.Synchronize();
                    Util.PrintPerformance(timer, "AddVector.Alea", 3, m, n);

                    Gpu.Copy(cudaMatrix, matrix);
                }
        }
        public void ActivationBackward()
        {
            Tensor
                x  = CreateRandomTensor(400, 1200),
                dy = CreateRandomTensor(400, 1200);

            Tensor.Like(x, out Tensor dx1);
            CpuDnn.ActivationBackward(x, dy, ActivationFunctions.SigmoidPrime, dx1);
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float>
                   x_gpu = gpu.AllocateDevice(x),
                   dy_gpu = gpu.AllocateDevice(dy))
            {
                Dnn.Get(gpu).ActivationBackward(x.Entities, x.Length, x_gpu.Ptr, dy_gpu.Ptr, ActivationFunctions.SigmoidPrime, dy_gpu.Ptr);
                dy_gpu.CopyToHost(dy.Entities, dy.Length, out Tensor dx2);
                Assert.IsTrue(dx1.ContentEquals(dx2));
                Tensor.Free(x, dy, dx1, dx2);
            }
        }
Beispiel #7
0
        public static unsafe void Alea(Gpu gpu, Real[] result, Real[] left, Real[] right, int m, int n)
        {
            using (var cudaResult = gpu.AllocateDevice(result))
                using (var cudaLeft = gpu.AllocateDevice(left))
                    using (var cudaRight = gpu.AllocateDevice(right))
                    {
                        var alphas  = new Real[] { 1 };
                        var betas   = new Real[] { 0 };
                        var results = Enumerable.Range(0, m).Select(i => cudaResult.Ptr.Handle + i * n * n * sizeof(Real)).ToArray();
                        var lefts   = Enumerable.Range(0, m).Select(i => cudaLeft.Ptr.Handle + i * n * n * sizeof(Real)).ToArray();
                        var rights  = Enumerable.Range(0, m).Select(i => cudaRight.Ptr.Handle).ToArray();

                        using (var cudaResults = gpu.AllocateDevice(results))
                            using (var cudaLefts = gpu.AllocateDevice(lefts))
                                using (var cudaRights = gpu.AllocateDevice(rights))
                                {
                                    fixed(Real *pAlphas = alphas)
                                    fixed(Real * pBetas = betas)
                                    {
                                        var timer = Stopwatch.StartNew();

                                        var blas    = global::Alea.cuBLAS.Blas.Get(gpu);
                                        var lAlphas = pAlphas;
                                        var lBetas  = pBetas;

                                        gpu.EvalAction(() =>
                                                       global::Alea.cuBLAS.Interop.cublasSafeCall(
#if DOUBLE_PRECISION
                                                           global::Alea.cuBLAS.Interop.cublasDgemmBatched(
#else
                                                           global::Alea.cuBLAS.Interop.cublasSgemmBatched(
#endif
                                                               blas.Handle,
                                                               global::Alea.cuBLAS.Operation.N,
                                                               global::Alea.cuBLAS.Operation.N,
                                                               n,
                                                               n,
                                                               n,
                                                               lAlphas,
                                                               // ReSharper disable AccessToDisposedClosure
                                                               cudaLefts.Ptr.Handle,
                                                               n,
                                                               cudaRights.Ptr.Handle,
                                                               n,
                                                               lBetas,
                                                               cudaResults.Ptr.Handle,
                                                               // ReSharper restore AccessToDisposedClosure
                                                               n,
                                                               m)));

                                        gpu.Synchronize();

                                        PrintPerformance(timer, "ManyMatrixMultiplication.cuBLAS", m * n, n, n);

                                        Gpu.Copy(cudaResult, result);
                                    }
                                }
                    }
        }
Beispiel #8
0
        public static void Alea(
            Gpu gpu,
            Real[] mSquaredDistances,
            Real[] mCoordinates,
            int c,
            int n)
        {
            using var cudaSquaredDistance = gpu.AllocateDevice(mSquaredDistances);
            using var cudaCoordinates     = gpu.AllocateDevice(mCoordinates);
            var timer = Stopwatch.StartNew();

            const int blockSize = 128;

            var gridSize = Util.DivUp(n * n, blockSize);
            var lp       = new LaunchParam(gridSize, blockSize);

            gpu.Launch(AleaKernel, lp, cudaSquaredDistance.Ptr, cudaCoordinates.Ptr, c, n);
            gpu.Synchronize();

            Util.PrintPerformance(timer, "SquaredDistance.Alea", n, c, n);

            Gpu.Copy(cudaSquaredDistance, mSquaredDistances);
        }
Beispiel #9
0
        public static void Alea(Gpu gpu, Real[] result, Real[] left, Real[] right, int n)
        {
            using (var cudaResult = gpu.AllocateDevice(result))
                using (var cudaLeft = gpu.AllocateDevice(left))
                    using (var cudaRight = gpu.AllocateDevice(right))
                    {
                        var timer = Stopwatch.StartNew();

                        global::Alea.cuBLAS.Blas.Get(gpu).Gemm(
                            global::Alea.cuBLAS.Operation.N,
                            global::Alea.cuBLAS.Operation.N,
                            n, n, n,
                            1, cudaLeft.Ptr, n,
                            cudaRight.Ptr, n, 0,
                            cudaResult.Ptr, n);

                        gpu.Synchronize();

                        PrintPerformance(timer, "MatrixMultiplication.Alea.cuBLAS", n, n, n);

                        Gpu.Copy(cudaResult, result);
                    }
        }
        public void FullyConnectedBackwardFilter()
        {
            FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian);
            Tensor
                x  = CreateRandomTensor(400, fc.InputInfo.Size),
                dy = CreateRandomTensor(x.Entities, fc.OutputInfo.Size);

            Tensor.New(fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor dJdw1);
            CpuDnn.FullyConnectedBackwardFilter(x, dy, dJdw1);
            dJdw1.Reshape(1, dJdw1.Size, out dJdw1);
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float>
                   x_gpu = gpu.AllocateDevice(x),
                   dy_gpu = gpu.AllocateDevice(dy),
                   djdb_gpu = gpu.AllocateDevice <float>(fc.Weights.Length))
            {
                Dnn.Get(gpu).FullyConnectedBackwardFilter(x.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, x_gpu.Ptr, dy_gpu.Ptr, djdb_gpu.Ptr);
                djdb_gpu.CopyToHost(1, fc.Weights.Length, out Tensor dJdw2);
                Assert.IsTrue(dJdw1.ContentEquals(dJdw2));
                Tensor.Free(x, dy, dJdw1, dJdw2);
            }
        }
Beispiel #11
0
        public void CopyToRows()
        {
            float[] test = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
            Tensor.NewZeroed(3, 10, out Tensor tensor);
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float> m_gpu = gpu.AllocateDevice(test))
            {
                m_gpu.CopyTo(tensor, 5, 3);
            }
            float[,] expected =
            {
                { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 },
                { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 },
                { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 }
            };
            Assert.IsTrue(tensor.ToArray2D().ContentEquals(expected));
        }
Beispiel #12
0
        public void AllocateDeviceRows()
        {
            float[,] source =
            {
                { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 },
                { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 },
                { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 }
            };
            Tensor.From(source, out Tensor tensor);
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float> m_gpu = gpu.AllocateDevice(tensor, 5, 3))
            {
                float[]
                copy = Gpu.CopyToHost(m_gpu),
                expected = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
                Assert.IsTrue(copy.ContentEquals(expected));
            }
        }
        public void PerActivationBatchNormalizationForward()
        {
            // Setup
            Tensor x = CreateRandomTensor(400, 250);

            Tensor.NewZeroed(1, 250, out Tensor mu);
            Tensor.LikeZeroed(mu, out Tensor sigma2);
            Tensor.New(1, 250, out Tensor gamma);
            Tensor.NewZeroed(1, 250, out Tensor beta);
            for (int i = 0; i < 250; i++)
            {
                gamma[i] = ThreadSafeRandom.NextFloat();
            }

            // Cpu
            Tensor.Like(x, out Tensor y1);
            CpuDnn.BatchNormalizationForward(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, 1, mu, sigma2, gamma, beta, y1);

            // Gpu
            Gpu gpu = Gpu.Default;

            using (DeviceMemory <float>
                   x_gpu = gpu.AllocateDevice(x),
                   y_gpu = gpu.AllocateDevice <float>(x.Size),
                   gamma_gpu = gpu.AllocateDevice(gamma),
                   beta_gpu = gpu.AllocateDevice(beta),
                   run_mean = gpu.AllocateDevice <float>(mu.Size),
                   run_var = gpu.AllocateDevice <float>(mu.Size))
            {
                TensorDescriptor desc = new TensorDescriptor();
                desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1);
                TensorDescriptor gammaBetadesc = new TensorDescriptor();
                gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, x.Length, 1, 1);
                Dnn.Get(gpu).BatchNormalizationForwardTraining(
                    BatchNormMode.PER_ACTIVATION, 1, 0,
                    desc, x_gpu.Ptr, desc, y_gpu.Ptr,
                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr,
                    1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON,
 public void Init()
 {
     Font       = new Font(FontFamily.GenericMonospace, 10);
     width      = ScreenDivider.Panel2.ClientSize.Width / granularity / 4 * 4;
     height     = ScreenDivider.Panel2.ClientSize.Height / granularity / 4 * 4;
     directions = new float3[width * height];
     dirDevMem  = gpu.AllocateDevice(directions);
     dirDevPtr  = dirDevMem.Ptr;
     pixels     = new byte[width * height * bytes];
     pixDevMem  = gpu.AllocateDevice(pixels);
     pixDevPtr  = pixDevMem.Ptr;
     b          = new Bitmap(width, height, width * bytes,
                             PixelFormat.Format24bppRgb,
                             Marshal.UnsafeAddrOfPinnedArrayElement(pixels, 0));
     center        = new float3(0, 0, 0);
     camera        = new float3(3, 0, 0);
     lightLocation = new float3(20, 20, 20);
     GridSize      = new dim3(width / BlockSize.x, height / BlockSize.y);
     launchParam   = new LaunchParam(GridSize, BlockSize);
     movementSize  = baseMovementSize;
     GetDirections();
     x     = new float3(0, 0, 1);
     y     = new float3(0, 1, 0);
     z     = new float3(-1, 0, 0);
     seeds = new[] {
         new float3(1.8f, -0.12f, 0.5f),
         new float3(1.9073f, 2.72f, -1.16f),
         new float3(2.02f, -1.57f, 1.62f),
         new float3(1.65f, 0.37f, -1.023f),
         new float3(1.77f, -0.22f, -0.663f),
         new float3(1.66f, 1.52f, 0.19f),
         new float3(1.58f, -1.45f, -2.333f),
         new float3(1.87f, 3.141f, 0.02f),
         new float3(1.81f, 1.44f, -2.99f),
         new float3(1.93f, 1.34637f, 1.58f),
         new float3(1.88f, 1.52f, -1.373f),
         new float3(1.6f, -2.51f, -2.353f),
         new float3(2.08f, 1.493f, 3.141f),
         new float3(2.0773f, 2.906f, -1.34f),
         new float3(1.78f, -0.1f, -3.003f),
         new float3(2.0773f, 2.906f, -1.34f),
         new float3(1.8093f, 3.141f, 3.074f),
         new float3(1.95f, 1.570796f, 0),
         new float3(1.91f, 0.06f, -0.76f),
         new float3(1.8986f, -0.4166f, 0.00683f),
         new float3(2.03413f, 1.688f, -1.57798f),
         new float3(1.6516888f, 0.026083898f, -0.7996324f),
         new float3(1.77746f, -1.66f, 0.0707307f),
         new float3(2.13f, -1.77f, -1.62f),
         new float3(1, 0, 0)
     };
     offsets = new[] {
         new float3(0.353333f, 0.458333f, -0.081667f),
         new float3(0.493000f, 0.532167f, -0.449167f),
         new float3(0.551667f, -1.031667f, -0.255000f),
         new float3(0.235000f, 0.036667f, 0.128333f),
         new float3(0.346667f, 0.236667f, 0.321667f),
         new float3(0.638333f, 0.323333f, 0.181667f),
         new float3(0.258333f, 0.021667f, 0.420000f),
         new float3(0.595000f, -0.021500f, -0.491667f),
         new float3(0.484167f, -0.127500f, 0.694167f),
         new float3(0.385000f, -0.187167f, -0.260000f),
         new float3(0.756667f, 0.210000f, -0.016667f),
         new float3(0.333333f, 0.068333f, 0.238333f),
         new float3(1.238333f, -0.993333f, 1.038333f),
         new float3(0.206333f, 0.255500f, -0.180833f),
         new float3(0.245000f, -0.283333f, 0.066667f),
         new float3(0.206333f, 0.255500f, -0.180833f),
         new float3(0.182317f, 0.072492f, 0.518550f),
         new float3(1.125000f, 0.500000f, 0.000000f),
         new float3(0.573333f, 0.115000f, 0.190000f),
         new float3(0.418833f, 0.901117f, 0.418333f),
         new float3(0.800637f, 0.683333f, 0.231772f),
         new float3(0.643105f, 0.856235f, 0.153051f),
         new float3(0.781117f, 0.140627f, -0.330263f),
         new float3(0.831667f, 0.508333f, 0.746667f),
         new float3(0.000000f, 0.000000f, 0.000000f),
     };
     typeof(SplitterPanel).GetProperty("DoubleBuffered", BindingFlags.NonPublic |
                                       BindingFlags.Instance).SetValue(ScreenDivider.Panel2, true, null);
 }