public unsafe void FullyConnectedBackwardData() { FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian); Tensor dy = CreateRandomTensor(400, fc.OutputInfo.Size); fixed(float *pw = fc.Weights, pb = fc.Biases) { Tensor.Reshape(pw, fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor w); Tensor.Reshape(pb, 1, fc.OutputInfo.Size, out Tensor b); Tensor.New(dy.Entities, fc.InputInfo.Size, out Tensor dx1); CpuDnn.FullyConnectedBackwardData(w, dy, dx1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> dy_gpu = gpu.AllocateDevice(dy), w_gpu = gpu.AllocateDevice(w), dx_gpu = gpu.AllocateDevice <float>(dx1.Size)) { Dnn.Get(gpu).FullyConnectedBackwardData(dy.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, dy_gpu.Ptr, w_gpu.Ptr, dx_gpu.Ptr); dx_gpu.CopyToHost(dx1.Entities, dx1.Length, out Tensor dx2); Assert.IsTrue(dx1.ContentEquals(dx2)); Tensor.Free(dy, dx1, dx2); } } }
public static void Alea( Gpu gpu, Real[] mIntraReturn, Real[] vClose, Real[] vIsAlive, Real[] vIsValidDay, int m, int n) { using (var cudaIntraReturn = gpu.AllocateDevice(mIntraReturn)) using (var cudaClose = gpu.AllocateDevice(vClose)) using (var cudaIsAlive = gpu.AllocateDevice(vIsAlive)) using (var cudaIsValidDay = gpu.AllocateDevice(vIsValidDay)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8)); gpu.Launch(AleaKernel, lp, cudaIntraReturn.Ptr, cudaClose.Ptr, cudaIsAlive.Ptr, cudaIsValidDay.Ptr, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "IntraReturn.Alea", 5, m, n); Gpu.Copy(cudaIntraReturn, mIntraReturn); } }
/// <summary> /// Checks whether or not the Cuda features are currently supported /// </summary> public static bool IsGpuAccelerationSupported() { try { // CUDA test Gpu gpu = Gpu.Default; if (gpu == null) { return(false); } if (!Dnn.IsAvailable) { return(false); // cuDNN } using (DeviceMemory <float> sample_gpu = gpu.AllocateDevice <float>(1024)) { deviceptr <float> ptr = sample_gpu.Ptr; void Kernel(int i) => ptr[i] = i; Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test float[] sample = Gpu.CopyToHost(sample_gpu); return(Enumerable.Range(0, 1024).Select <int, float>(i => i).ToArray().ContentEquals(sample)); } } catch { // Missing .dll or other errors return(false); } }
public void ActivationForward() { Tensor x = CreateRandomTensor(400, 1200); Tensor.Like(x, out Tensor y1); CpuDnn.ActivationForward(x, ActivationFunctions.Sigmoid, y1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> x_gpu = gpu.AllocateDevice(x), y_gpu = gpu.AllocateDevice <float>(x.Size)) { Dnn.Get(gpu).ActivationForward(x.Entities, x.Length, x_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Sigmoid); y_gpu.CopyToHost(y1.Entities, y1.Length, out Tensor y2); Assert.IsTrue(y1.ContentEquals(y2)); Tensor.Free(x, y1, y2); } }
public static void Alea(Gpu gpu, Real[] matrix, Real[] vector, int m, int n) { using (var cudaMatrix = gpu.AllocateDevice(matrix)) using (var cudaVector = gpu.AllocateDevice(vector)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8)); gpu.Launch(AleaKernel, lp, cudaMatrix.Ptr, cudaVector.Ptr, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "AddVector.Alea", 3, m, n); Gpu.Copy(cudaMatrix, matrix); } }
public void ActivationBackward() { Tensor x = CreateRandomTensor(400, 1200), dy = CreateRandomTensor(400, 1200); Tensor.Like(x, out Tensor dx1); CpuDnn.ActivationBackward(x, dy, ActivationFunctions.SigmoidPrime, dx1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> x_gpu = gpu.AllocateDevice(x), dy_gpu = gpu.AllocateDevice(dy)) { Dnn.Get(gpu).ActivationBackward(x.Entities, x.Length, x_gpu.Ptr, dy_gpu.Ptr, ActivationFunctions.SigmoidPrime, dy_gpu.Ptr); dy_gpu.CopyToHost(dy.Entities, dy.Length, out Tensor dx2); Assert.IsTrue(dx1.ContentEquals(dx2)); Tensor.Free(x, dy, dx1, dx2); } }
public static unsafe void Alea(Gpu gpu, Real[] result, Real[] left, Real[] right, int m, int n) { using (var cudaResult = gpu.AllocateDevice(result)) using (var cudaLeft = gpu.AllocateDevice(left)) using (var cudaRight = gpu.AllocateDevice(right)) { var alphas = new Real[] { 1 }; var betas = new Real[] { 0 }; var results = Enumerable.Range(0, m).Select(i => cudaResult.Ptr.Handle + i * n * n * sizeof(Real)).ToArray(); var lefts = Enumerable.Range(0, m).Select(i => cudaLeft.Ptr.Handle + i * n * n * sizeof(Real)).ToArray(); var rights = Enumerable.Range(0, m).Select(i => cudaRight.Ptr.Handle).ToArray(); using (var cudaResults = gpu.AllocateDevice(results)) using (var cudaLefts = gpu.AllocateDevice(lefts)) using (var cudaRights = gpu.AllocateDevice(rights)) { fixed(Real *pAlphas = alphas) fixed(Real * pBetas = betas) { var timer = Stopwatch.StartNew(); var blas = global::Alea.cuBLAS.Blas.Get(gpu); var lAlphas = pAlphas; var lBetas = pBetas; gpu.EvalAction(() => global::Alea.cuBLAS.Interop.cublasSafeCall( #if DOUBLE_PRECISION global::Alea.cuBLAS.Interop.cublasDgemmBatched( #else global::Alea.cuBLAS.Interop.cublasSgemmBatched( #endif blas.Handle, global::Alea.cuBLAS.Operation.N, global::Alea.cuBLAS.Operation.N, n, n, n, lAlphas, // ReSharper disable AccessToDisposedClosure cudaLefts.Ptr.Handle, n, cudaRights.Ptr.Handle, n, lBetas, cudaResults.Ptr.Handle, // ReSharper restore AccessToDisposedClosure n, m))); gpu.Synchronize(); PrintPerformance(timer, "ManyMatrixMultiplication.cuBLAS", m * n, n, n); Gpu.Copy(cudaResult, result); } } } }
public static void Alea( Gpu gpu, Real[] mSquaredDistances, Real[] mCoordinates, int c, int n) { using var cudaSquaredDistance = gpu.AllocateDevice(mSquaredDistances); using var cudaCoordinates = gpu.AllocateDevice(mCoordinates); var timer = Stopwatch.StartNew(); const int blockSize = 128; var gridSize = Util.DivUp(n * n, blockSize); var lp = new LaunchParam(gridSize, blockSize); gpu.Launch(AleaKernel, lp, cudaSquaredDistance.Ptr, cudaCoordinates.Ptr, c, n); gpu.Synchronize(); Util.PrintPerformance(timer, "SquaredDistance.Alea", n, c, n); Gpu.Copy(cudaSquaredDistance, mSquaredDistances); }
public static void Alea(Gpu gpu, Real[] result, Real[] left, Real[] right, int n) { using (var cudaResult = gpu.AllocateDevice(result)) using (var cudaLeft = gpu.AllocateDevice(left)) using (var cudaRight = gpu.AllocateDevice(right)) { var timer = Stopwatch.StartNew(); global::Alea.cuBLAS.Blas.Get(gpu).Gemm( global::Alea.cuBLAS.Operation.N, global::Alea.cuBLAS.Operation.N, n, n, n, 1, cudaLeft.Ptr, n, cudaRight.Ptr, n, 0, cudaResult.Ptr, n); gpu.Synchronize(); PrintPerformance(timer, "MatrixMultiplication.Alea.cuBLAS", n, n, n); Gpu.Copy(cudaResult, result); } }
public void FullyConnectedBackwardFilter() { FullyConnectedLayer fc = new FullyConnectedLayer(TensorInfo.Linear(231), 125, ActivationType.Sigmoid, WeightsInitializationMode.GlorotUniform, BiasInitializationMode.Gaussian); Tensor x = CreateRandomTensor(400, fc.InputInfo.Size), dy = CreateRandomTensor(x.Entities, fc.OutputInfo.Size); Tensor.New(fc.InputInfo.Size, fc.OutputInfo.Size, out Tensor dJdw1); CpuDnn.FullyConnectedBackwardFilter(x, dy, dJdw1); dJdw1.Reshape(1, dJdw1.Size, out dJdw1); Gpu gpu = Gpu.Default; using (DeviceMemory <float> x_gpu = gpu.AllocateDevice(x), dy_gpu = gpu.AllocateDevice(dy), djdb_gpu = gpu.AllocateDevice <float>(fc.Weights.Length)) { Dnn.Get(gpu).FullyConnectedBackwardFilter(x.Entities, fc.InputInfo.Size, fc.OutputInfo.Size, x_gpu.Ptr, dy_gpu.Ptr, djdb_gpu.Ptr); djdb_gpu.CopyToHost(1, fc.Weights.Length, out Tensor dJdw2); Assert.IsTrue(dJdw1.ContentEquals(dJdw2)); Tensor.Free(x, dy, dJdw1, dJdw2); } }
public void CopyToRows() { float[] test = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; Tensor.NewZeroed(3, 10, out Tensor tensor); Gpu gpu = Gpu.Default; using (DeviceMemory <float> m_gpu = gpu.AllocateDevice(test)) { m_gpu.CopyTo(tensor, 5, 3); } float[,] expected = { { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 }, { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 }, { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 } }; Assert.IsTrue(tensor.ToArray2D().ContentEquals(expected)); }
public void AllocateDeviceRows() { float[,] source = { { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 }, { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 }, { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 } }; Tensor.From(source, out Tensor tensor); Gpu gpu = Gpu.Default; using (DeviceMemory <float> m_gpu = gpu.AllocateDevice(tensor, 5, 3)) { float[] copy = Gpu.CopyToHost(m_gpu), expected = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; Assert.IsTrue(copy.ContentEquals(expected)); } }
public void PerActivationBatchNormalizationForward() { // Setup Tensor x = CreateRandomTensor(400, 250); Tensor.NewZeroed(1, 250, out Tensor mu); Tensor.LikeZeroed(mu, out Tensor sigma2); Tensor.New(1, 250, out Tensor gamma); Tensor.NewZeroed(1, 250, out Tensor beta); for (int i = 0; i < 250; i++) { gamma[i] = ThreadSafeRandom.NextFloat(); } // Cpu Tensor.Like(x, out Tensor y1); CpuDnn.BatchNormalizationForward(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, 1, mu, sigma2, gamma, beta, y1); // Gpu Gpu gpu = Gpu.Default; using (DeviceMemory <float> x_gpu = gpu.AllocateDevice(x), y_gpu = gpu.AllocateDevice <float>(x.Size), gamma_gpu = gpu.AllocateDevice(gamma), beta_gpu = gpu.AllocateDevice(beta), run_mean = gpu.AllocateDevice <float>(mu.Size), run_var = gpu.AllocateDevice <float>(mu.Size)) { TensorDescriptor desc = new TensorDescriptor(); desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1); TensorDescriptor gammaBetadesc = new TensorDescriptor(); gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, x.Length, 1, 1); Dnn.Get(gpu).BatchNormalizationForwardTraining( BatchNormMode.PER_ACTIVATION, 1, 0, desc, x_gpu.Ptr, desc, y_gpu.Ptr, gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr, 1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON,
public void Init() { Font = new Font(FontFamily.GenericMonospace, 10); width = ScreenDivider.Panel2.ClientSize.Width / granularity / 4 * 4; height = ScreenDivider.Panel2.ClientSize.Height / granularity / 4 * 4; directions = new float3[width * height]; dirDevMem = gpu.AllocateDevice(directions); dirDevPtr = dirDevMem.Ptr; pixels = new byte[width * height * bytes]; pixDevMem = gpu.AllocateDevice(pixels); pixDevPtr = pixDevMem.Ptr; b = new Bitmap(width, height, width * bytes, PixelFormat.Format24bppRgb, Marshal.UnsafeAddrOfPinnedArrayElement(pixels, 0)); center = new float3(0, 0, 0); camera = new float3(3, 0, 0); lightLocation = new float3(20, 20, 20); GridSize = new dim3(width / BlockSize.x, height / BlockSize.y); launchParam = new LaunchParam(GridSize, BlockSize); movementSize = baseMovementSize; GetDirections(); x = new float3(0, 0, 1); y = new float3(0, 1, 0); z = new float3(-1, 0, 0); seeds = new[] { new float3(1.8f, -0.12f, 0.5f), new float3(1.9073f, 2.72f, -1.16f), new float3(2.02f, -1.57f, 1.62f), new float3(1.65f, 0.37f, -1.023f), new float3(1.77f, -0.22f, -0.663f), new float3(1.66f, 1.52f, 0.19f), new float3(1.58f, -1.45f, -2.333f), new float3(1.87f, 3.141f, 0.02f), new float3(1.81f, 1.44f, -2.99f), new float3(1.93f, 1.34637f, 1.58f), new float3(1.88f, 1.52f, -1.373f), new float3(1.6f, -2.51f, -2.353f), new float3(2.08f, 1.493f, 3.141f), new float3(2.0773f, 2.906f, -1.34f), new float3(1.78f, -0.1f, -3.003f), new float3(2.0773f, 2.906f, -1.34f), new float3(1.8093f, 3.141f, 3.074f), new float3(1.95f, 1.570796f, 0), new float3(1.91f, 0.06f, -0.76f), new float3(1.8986f, -0.4166f, 0.00683f), new float3(2.03413f, 1.688f, -1.57798f), new float3(1.6516888f, 0.026083898f, -0.7996324f), new float3(1.77746f, -1.66f, 0.0707307f), new float3(2.13f, -1.77f, -1.62f), new float3(1, 0, 0) }; offsets = new[] { new float3(0.353333f, 0.458333f, -0.081667f), new float3(0.493000f, 0.532167f, -0.449167f), new float3(0.551667f, -1.031667f, -0.255000f), new float3(0.235000f, 0.036667f, 0.128333f), new float3(0.346667f, 0.236667f, 0.321667f), new float3(0.638333f, 0.323333f, 0.181667f), new float3(0.258333f, 0.021667f, 0.420000f), new float3(0.595000f, -0.021500f, -0.491667f), new float3(0.484167f, -0.127500f, 0.694167f), new float3(0.385000f, -0.187167f, -0.260000f), new float3(0.756667f, 0.210000f, -0.016667f), new float3(0.333333f, 0.068333f, 0.238333f), new float3(1.238333f, -0.993333f, 1.038333f), new float3(0.206333f, 0.255500f, -0.180833f), new float3(0.245000f, -0.283333f, 0.066667f), new float3(0.206333f, 0.255500f, -0.180833f), new float3(0.182317f, 0.072492f, 0.518550f), new float3(1.125000f, 0.500000f, 0.000000f), new float3(0.573333f, 0.115000f, 0.190000f), new float3(0.418833f, 0.901117f, 0.418333f), new float3(0.800637f, 0.683333f, 0.231772f), new float3(0.643105f, 0.856235f, 0.153051f), new float3(0.781117f, 0.140627f, -0.330263f), new float3(0.831667f, 0.508333f, 0.746667f), new float3(0.000000f, 0.000000f, 0.000000f), }; typeof(SplitterPanel).GetProperty("DoubleBuffered", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(ScreenDivider.Panel2, true, null); }