public static void Cuda( Real[] mIntraReturn, Real[] vClose, Real[] vIsAlive, Real[] vIsValidDay, int m, int n) { var gpu = Gpu.Default; using (var cudaIntraReturn = gpu.AllocateDevice(mIntraReturn)) using (var cudaClose = gpu.AllocateDevice(vClose)) using (var cudaIsAlive = gpu.AllocateDevice(vIsAlive)) using (var cudaIsValidDay = gpu.AllocateDevice(vIsValidDay)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8)); gpu.Launch(CudaKernel, lp, cudaIntraReturn.Ptr, cudaClose.Ptr, cudaIsAlive.Ptr, cudaIsValidDay.Ptr, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "IntraReturn.Cuda", 5, m, n); Gpu.Copy(cudaIntraReturn, mIntraReturn); } }
private void OnMouseMove(object sender, MouseEventArgs e) { if (isMouseDown) { float2 offset = new float2((e.X - mouseLocation.x) * mouseSensitivity, (e.Y - mouseLocation.y) * mouseSensitivity); float c1 = DeviceFunction.Cos(offset.x); float s1 = DeviceFunction.Sin(offset.x); float t1 = 1 - c1; float c2 = DeviceFunction.Cos(offset.y); float s2 = DeviceFunction.Sin(offset.y); float t2 = 1 - c2; float camDist = L(camera); gpu.Launch(RotateDirection, launchParam, dirDevPtr, y, t1, c1, s1, width, height); x = RotateVec(x, y, t1, c1, s1); z = RotateVec(z, y, t1, c1, s1); camera = RotateVec(camera, y, t1, c1, s1); gpu.Launch(RotateDirection, launchParam, dirDevPtr, x, t2, c2, s2, width, height); y = RotateVec(y, x, t2, c2, s2); z = RotateVec(z, x, t2, c2, s2); camera = RotateVec(camera, x, t2, c2, s2); Gpu.Copy(dirDevMem, directions); x = D(x, L(x)); y = D(y, L(y)); z = D(z, L(z)); ScreenDivider.Panel2.Invalidate(); } mouseLocation = new int2(e.X, e.Y); }
public static void Cuda( Real[] mSquaredDistances, Real[] mCoordinates, int c, int n) { var gpu = Gpu.Default; using (var cudaSquaredDistance = gpu.AllocateDevice(mSquaredDistances)) using (var cudaCoordinates = gpu.AllocateDevice(mCoordinates)) { var timer = Stopwatch.StartNew(); const int blockSize = 128; var gridSize = Util.DivUp(n * n, blockSize); var lp = new LaunchParam(gridSize, blockSize); gpu.Launch(Kernel, lp, cudaSquaredDistance.Ptr, cudaCoordinates.Ptr, c, n); gpu.Synchronize(); Util.PrintPerformance(timer, "SquaredDistance.Cuda", n, c, n); Gpu.Copy(cudaSquaredDistance, mSquaredDistances); } }
public static void Cuda(Real[] result, Real[] left, Real[] right, int n) { var gpu = Gpu.Default; using (var cudaResult = gpu.AllocateDevice(result)) using (var cudaLeft = gpu.AllocateDevice(left)) using (var cudaRight = gpu.AllocateDevice(right)) { var timer = Stopwatch.StartNew(); Alea.cuBLAS.Blas.Get(gpu).Gemm( Alea.cuBLAS.Operation.N, Alea.cuBLAS.Operation.N, n, n, n, 1, cudaLeft.Ptr, n, cudaRight.Ptr, n, 0, cudaResult.Ptr, n); gpu.Synchronize(); PrintPerformance(timer, "MatrixMultiplication.cuBLAS", n, n, n); Gpu.Copy(cudaResult, result); } }
public static unsafe void Alea(Gpu gpu, Real[] result, Real[] left, Real[] right, int m, int n) { using (var cudaResult = gpu.AllocateDevice(result)) using (var cudaLeft = gpu.AllocateDevice(left)) using (var cudaRight = gpu.AllocateDevice(right)) { var alphas = new Real[] { 1 }; var betas = new Real[] { 0 }; var results = Enumerable.Range(0, m).Select(i => cudaResult.Ptr.Handle + i * n * n * sizeof(Real)).ToArray(); var lefts = Enumerable.Range(0, m).Select(i => cudaLeft.Ptr.Handle + i * n * n * sizeof(Real)).ToArray(); var rights = Enumerable.Range(0, m).Select(i => cudaRight.Ptr.Handle).ToArray(); using (var cudaResults = gpu.AllocateDevice(results)) using (var cudaLefts = gpu.AllocateDevice(lefts)) using (var cudaRights = gpu.AllocateDevice(rights)) { fixed(Real *pAlphas = alphas) fixed(Real * pBetas = betas) { var timer = Stopwatch.StartNew(); var blas = global::Alea.cuBLAS.Blas.Get(gpu); var lAlphas = pAlphas; var lBetas = pBetas; gpu.EvalAction(() => global::Alea.cuBLAS.Interop.cublasSafeCall( #if DOUBLE_PRECISION global::Alea.cuBLAS.Interop.cublasDgemmBatched( #else global::Alea.cuBLAS.Interop.cublasSgemmBatched( #endif blas.Handle, global::Alea.cuBLAS.Operation.N, global::Alea.cuBLAS.Operation.N, n, n, n, lAlphas, // ReSharper disable AccessToDisposedClosure cudaLefts.Ptr.Handle, n, cudaRights.Ptr.Handle, n, lBetas, cudaResults.Ptr.Handle, // ReSharper restore AccessToDisposedClosure n, m))); gpu.Synchronize(); PrintPerformance(timer, "ManyMatrixMultiplication.cuBLAS", m * n, n, n); Gpu.Copy(cudaResult, result); } } } }
protected override void AllocateCache(double[][] inputs, long[] rneurons, double[] from, double[] to, bool[] rneuronsCache, double[] fromCache, double[] toCache) { Allocate(inputs, rneurons, from, to); rneuronsCacheArr = gpu.Allocate(rneuronsCache); Gpu.Copy(fromCache, fromCacheArr); Gpu.Copy(toCache, toCacheArr); }
protected override void Allocate(double[][] inputs, long[] rneurons, double[] from, double[] to) { rneuronsArr = gpu.Allocate(rneurons); resultsArr = gpu.Allocate <long>(rneurons.Length); inputsArr = gpu.Allocate(inputs); Gpu.Copy(from, fromArr); Gpu.Copy(to, toArr); }
protected virtual void AllocateCache(double[][] inputs, long[] rneurons, double[] from, double[] to, bool[] rneuronsCache, double[] fromCache, double[] toCache) { Allocate(inputs, rneurons, from, to); rneuronsCacheArr = gpu.Allocate <bool>(rneuronsCache.Length); Gpu.Copy(rneuronsCache, rneuronsCacheArr); Gpu.Copy(fromCache, fromCacheArr); Gpu.Copy(toCache, toCacheArr); }
protected virtual void Allocate(double[][] inputs, long[] rneurons, double[] from, double[] to) { rneuronsArr = gpu.Allocate <long>(rneurons.Length); resultsArr = gpu.Allocate <long>(rneurons.Length); inputsArr = gpu.Allocate <double[]>(inputs.Length); Gpu.Copy(rneurons, rneuronsArr); Gpu.Copy(inputs, inputsArr); Gpu.Copy(from, fromArr); Gpu.Copy(to, toArr); }
public void MarchRays() { gpu.Launch(MarchRay, launchParam, dirDevPtr, pixDevPtr, camera, lightLocation, cols, minDist, maxDist, 1000, bytes, width, iterations, side, seed, shift, shadowStrength, ambientOccStrength); Gpu.Copy(pixDevMem, pixels); b = new Bitmap(width, height, width * bytes, PixelFormat.Format24bppRgb, Marshal.UnsafeAddrOfPinnedArrayElement(pixels, 0)); //for (int i = 0; i < Width * Height; i++) //{ // int greyscale = (int)(pixels[i] * 255); // b.SetPixel(i % Width, Height - 1 - i / Width, Color.FromArgb(greyscale, greyscale, greyscale)); //} }
protected override void LaunchKernel(long[] rneurons, double[][] inputs, double[] from, double[] to, int lparam1, int lparam2) { var results = new long[rneurons.Length]; var lp = new LaunchParam(lparam1, lparam2); Allocate(inputs, rneurons, from, to); gpu.Launch(RlmAleaGpu.Kernel, lp, rneuronsArr, inputsArr, resultsArr, fromArr, toArr); Gpu.Copy(resultsArr, results); Free(); FindBestSolution(results); }
protected override RlmCacheDataArray LaunchKernel(long[] rneurons, double[][] inputs, double[] from, double[] to, bool[] rneuronsCache, double[] fromCache, double[] toCache, int lparam1, int lparam2) { var results = new long[rneurons.Length]; var lp = new LaunchParam(lparam1, lparam2); AllocateCache(inputs, rneurons, from, to, rneuronsCache, fromCache, toCache); gpu.Launch(RlmAleaGpu.KernelCache, lp, rneuronsArr, inputsArr, resultsArr, fromArr, toArr, rneuronsCacheArr, fromCacheArr, toCacheArr); Gpu.Copy(resultsArr, results); Gpu.Copy(rneuronsCacheArr, rneuronsCache); FreeCache(); return(FindBestSolutionAndBuildCache(rneurons, results, inputs, rneuronsCache)); }
public static void Alea(Gpu gpu, Real[] matrix, Real[] vector, int m, int n) { using (var cudaMatrix = gpu.AllocateDevice(matrix)) using (var cudaVector = gpu.AllocateDevice(vector)) { var timer = Stopwatch.StartNew(); var gridSizeX = Util.DivUp(n, 32); var gridSizeY = Util.DivUp(m, 8); var lp = new LaunchParam(new dim3(gridSizeX, gridSizeY), new dim3(32, 8)); gpu.Launch(AleaKernel, lp, cudaMatrix.Ptr, cudaVector.Ptr, m, n); gpu.Synchronize(); Util.PrintPerformance(timer, "AddVector.Alea", 3, m, n); Gpu.Copy(cudaMatrix, matrix); } }
public static void RunGpu() { var n = GetData(out var x, out var y); var result = new float[n]; var gpu = Gpu.Default; var xDevice = gpu.Allocate <float>(n); var yDevice = gpu.Allocate <float>(n); var resultDevice = gpu.Allocate <float>(n); Gpu.Copy(x, xDevice); Gpu.Copy(y, yDevice); var lp = new LaunchParam(16, 256); gpu.Launch(Kernel, lp, resultDevice, xDevice, yDevice); Gpu.Copy(resultDevice, result); Gpu.Free(xDevice); Gpu.Free(yDevice); Gpu.Free(resultDevice); }
private static double[,] CosineSimilarityGpu(Gpu gpu, double[][] dataset) { int size = dataset.Length * dataset.Length; var gpuDataset = gpu.Allocate(dataset); // Allocate directly on gpu. var gpuDistances = gpu.Allocate <double>(dataset.Length, dataset.Length); gpu.For(0, size, index => { int i = index / dataset.Length; int j = index % dataset.Length; double dotProduct = 0; double magnitudeOne = 0; double magnitudeTwo = 0; for (int k = 0; k < dataset[i].Length; k++) { dotProduct += (dataset[i][k] * dataset[j][k]); magnitudeOne += (dataset[i][k] * dataset[i][k]); magnitudeTwo += (dataset[j][k] * dataset[j][k]); } double distance = Math.Max(0, 1 - (dotProduct / Math.Sqrt(magnitudeOne * magnitudeTwo))); gpuDistances[i, j] = distance; }); // Gpu -> Cpu. var result = new double[dataset.Length, dataset.Length]; Gpu.Copy(gpuDistances, result); // Release gpu memory. Gpu.Free(gpuDataset); Gpu.Free(gpuDistances); return(result); }
public void GetDirections() { gpu.Launch(GetDirection, launchParam, dirDevPtr, focalLength, (float)width, (float)height); Gpu.Copy(dirDevMem, directions); }