public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); int cols = dm.ColumnCount, rows = dm.RowCount; int restRows = rows - cols; //double[] a = dm.Storage.ToColumnMajorArray(); double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray(); double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray(); dm = null; double[] a_d = gpu.CopyToDevice <double>(a); a = null; double[] c_d = gpu.Allocate <double>(cols * cols); double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * rows]; gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols); gpu.FreeAll(); a_d = gpu.CopyToDevice <double>(b); b = null; c_d = gpu.Allocate <double>(restRows * cols); x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a); }
private float cuda_malloc_test(int size, bool up) { int[] a = new int[size]; int[] dev_a = _gpu.Allocate <int>(size); _gpu.StartTimer(); for (int i = 0; i < 100; i++) { if (up) { _gpu.CopyToDevice(a, dev_a); } else { _gpu.CopyFromDevice(dev_a, a); } } float elapsedTime = _gpu.StopTimer(); _gpu.FreeAll(); GC.Collect(); return(elapsedTime); }
public static void ExecDoSomeMath(int start, int end) { InitGPU(); NN = end - start; double[] result = new double[NN]; //Allocate GPU int[] dev_start = _gpu.Allocate <int>(start); int[] dev_end = _gpu.Allocate <int>(end); double[] dev_result = _gpu.Allocate <double>(result); _gpu.CopyToDevice(new int[1] { start }, dev_start); _gpu.CopyToDevice(new int[1] { end }, dev_end); _gpu.Launch(128, 1).DoSomeMath(dev_start, dev_end, dev_result); _gpu.CopyFromDevice(dev_result, result); _gpu.FreeAll(); foreach (var r in result) { Console.WriteLine(r); } }
//GPU PFAC Carving - using PFAC for searching Bytes public void LaunchPFACCarving(int gpuCore) { lock (gpuThreadLock[GPUid]) { gpu.SetCurrentContext(); gpu.LaunchAsync(gpuOperatingCores, blockSize, gpuCore, "PFACAnalyse", dev_buffer[gpuCore], initialState, dev_lookup, dev_targetEndLength, dev_resultCount[gpuCore], dev_foundCount[gpuCore], dev_foundID[gpuCore], dev_foundLoc[gpuCore]); gpu.SynchronizeStream(gpuCore); } gpu.CopyFromDevice(dev_resultCount[gpuCore], resultCount[gpuCore]); for (int i = 0; i < resultCount[gpuCore].Length; i++) { if (resultCount[gpuCore][i] > 0) { gpu.CopyFromDevice(dev_foundID[gpuCore], foundID[gpuCore]); gpu.CopyFromDevice(dev_foundLoc[gpuCore], foundLoc[gpuCore]); break; } } //gpu.Synchronize(); FreeBuffers(gpuCore); }
public void Test_SingleThreadOneGPU_1() { _gpu1.SetCurrentContext(); _gpuuintBufferIn0 = _gpu1.CopyToDevice(_uintBufferIn0); _gpu1.CopyFromDevice(_gpuuintBufferIn0, _uintBufferOut0); Assert.IsTrue(Compare(_uintBufferIn0, _uintBufferOut0)); ClearOutputsAndGPU(1); }
public void Test_SingleThreadCopy() { _gpuuintBufferIn1 = _gpu.CopyToDevice(_uintBufferIn1); _gpu.CopyFromDevice(_gpuuintBufferIn1, _uintBufferOut1); Assert.IsTrue(Compare(_uintBufferIn1, _uintBufferOut1)); ClearOutputs(); _gpu.FreeAll(); }
//[Test] public void TestCGSolver() { Stopwatch sw = new Stopwatch(); float one = 1.0f; float zero = 0.0f; _hiMatrixMN = new float[N * N]; _hoVectorN = new float[N]; CreateDiagonalMatrix(_hiMatrixMN, N, 6); _hiVectorN = new float[N]; _hiVectorN2 = new float[N]; FillBuffer(_hiVectorN2, 6); _diMatrixMN = _gpu.CopyToDevice(_hiMatrixMN); _diVectorN = _gpu.Allocate(_hiVectorN); _diVectorN2 = _gpu.CopyToDevice(_hiVectorN2); _diPerRow = _gpu.Allocate <int>(N); _diVectorP = _gpu.Allocate <float>(N); _diVectorAX = _gpu.Allocate <float>(N); int nnz = _sparse.NNZ(N, N, _diMatrixMN, _diPerRow); _diCSRVals = _gpu.Allocate <float>(nnz); _diCSRCols = _gpu.Allocate <int>(nnz); _diCSRRows = _gpu.Allocate <int>(N + 1); _sparse.Dense2CSR(N, N, _diMatrixMN, _diPerRow, _diCSRVals, _diCSRRows, _diCSRCols); sw.Start(); SolveResult result = _solver.CG(N, nnz, _diCSRVals, _diCSRRows, _diCSRCols, _diVectorN, _diVectorN2, _diVectorP, _diVectorAX, 0.01f, 1000); long time = sw.ElapsedMilliseconds; _sparse.CSRMV(N, N, nnz, ref one, _diCSRVals, _diCSRRows, _diCSRCols, _diVectorN, ref zero, _diVectorN2); _gpu.CopyFromDevice(_diVectorN2, _hoVectorN); float maxError = 0.0f; for (int i = 0; i < N; i++) { float error = Math.Abs(_hoVectorN[i] - _hiVectorN2[i]); if (error > maxError) { maxError = error; } } Console.WriteLine("Time : {0} ms", time); Console.WriteLine("Iterate Count : {0}", result.IterateCount); Console.WriteLine("Residual : {0}", result.LastError); Console.WriteLine("max error : {0}", maxError); _gpu.FreeAll(); }
public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size) { // allocate the memory on the GPU int[,] GPU_A = gpu.Allocate<int>(A); int[,] GPU_B = gpu.Allocate<int>(B); int[,] GPU_C = gpu.Allocate<int>(C); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(A, GPU_A); gpu.CopyToDevice(B, GPU_B); dim3 threadsPerBlock; // find the number of threads and blocks if (Size < maxTheadBlockSize) { threadsPerBlock = new dim3(Size, Size); } else { threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize); } dim3 block = new dim3(Size, Size); // launch GPU_MA gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(GPU_C, C); gpu.Free(GPU_A); gpu.Free(GPU_B); gpu.Free(GPU_C); return 1; }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate<float>(1001); _gpu.Launch(1,1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
static Bitmap Render(GPGPU gpu, int frameNum) { uint[,] deviceImage = gpu.Allocate <uint>(width, height); float[] pX1_gpu = gpu.CopyToDevice <float>(pX1); float[] pY1_gpu = gpu.CopyToDevice <float>(pY1); float[] pZ1_gpu = gpu.CopyToDevice <float>(pZ1); float[] colorPosition_gpu = gpu.CopyToDevice <float>(colorPosition); float[] currentTime_gpu = gpu.CopyToDevice <float>(currentTime); dim3 threadsPerBlock = new dim3(8, 8); dim3 numBlocks = new dim3(width / threadsPerBlock.x, height / threadsPerBlock.y); gpu.Launch(numBlocks, threadsPerBlock).renderKernel(deviceImage, pX1_gpu, pY1_gpu, pZ1_gpu, colorPosition_gpu, currentTime_gpu); uint[,] finalImage = new uint[width, height]; gpu.CopyFromDevice <uint>(deviceImage, finalImage); gpu.Free(deviceImage); gpu.Free(pX1_gpu); gpu.Free(pY1_gpu); gpu.Free(pZ1_gpu); gpu.Free(colorPosition_gpu); gpu.Free(currentTime_gpu); GCHandle pixels = GCHandle.Alloc(finalImage, GCHandleType.Pinned); Bitmap bmp = new Bitmap(width, height, width * sizeof(int), PixelFormat.Format32bppRgb, pixels.AddrOfPinnedObject()); bmp.Save("spring" + frameNum + ".png"); pixels.Free(); return(bmp); }
public static float[] CallGPU() { CudafyModes.Target = eGPUType.OpenCL; CudafyModes.DeviceId = 0; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU)); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0); gpu.LoadModule(km); km.Serialize(); float[] input = Utils.GenerateRandomVector(); float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension(); float[] output = new float[Utils.N]; Stopwatch gpuSW = new Stopwatch(); gpuSW.Start(); float[] dev_output = gpu.Allocate <float>(output); float[] dev_input = gpu.CopyToDevice(input); float[,,] dev_NN = gpu.CopyToDevice(NN); gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output); gpu.CopyFromDevice(dev_output, output); gpu.FreeAll(); gpuSW.Stop(); Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds); return(output); }
public static void MyFirstBlasEmulatorTest() { Console.WriteLine("MyTest()"); // Get GPU device CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); // Create GPGPUBLAS (CUBLAS Wrapper) using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu)) { const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); gpu.CopyFromDevice <float>(device_c, c); } }
/// <summary> /// Вызов и исполнение функции проверки что массив отсортирован /// </summary> public static void ExecuteSorted(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(D); gpu.CopyToDevice(_a, devA); gpu.Launch(1, 1).Split(devA, devB, devC, _middle); gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction); gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction); gpu.CopyFromDevice(devD, D); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void Execute() { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric)); km.Serialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); gpu.LoadModule(km); var input = new Generic <ushort, ushort>(); input.A = 187; int[] devoutput = gpu.Allocate <int>(1); gpu.Launch(1, 1, "Kernel", input, devoutput); int output; gpu.CopyFromDevice(devoutput, out output); Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED")); }
public Bitmap Render(Rendering.ExecutionOptions options, Action <string> log) { try { var result = new Bitmap(options.Width, options.Height); int width = options.Width; int height = options.Height; log("Initializing and copying data to GPU memory"); int[,] iterations = new int[height, width]; var dev_iterations = gpu.CopyToDevice(iterations); var gridSize = new dim3(height, width); var blockSize = BlockSize; var minX = (float)options.MinX; var maxX = (float)options.MaxX; var minY = (float)options.MinY; var maxY = (float)options.MaxY; var stepX = (maxX - minX) / ((float)width); var stepY = (maxY - minY) / ((float)height); log("Launching Mandelbrot calculations"); gpu.Launch(gridSize, blockSize, "CalculateMandelbrot", minX, maxY, stepX, stepY, dev_iterations); log("Mandelbrot calculations done, fetching results from GPU memory"); gpu.CopyFromDevice(dev_iterations, iterations); log("Generating the final image"); Rendering.fastDrawBitmap(result, iterations); return(result); } finally { gpu.FreeAll(); } }
public static uint[] Evaluate(ulong[] hands, int numCards) { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int blockSize = 256; int blockx = hands.Length / blockSize; if (hands.Length % blockSize != 0) { blockx++; } ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length); uint[] dev_ranks = gpu.Allocate <uint>(hands.Length); gpu.CopyToDevice(hands, dev_hands); gpu.StartTimer(); gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks); var ts = gpu.StopTimer(); uint[] toReturn = new uint[hands.Length]; gpu.CopyFromDevice(dev_ranks, toReturn); return(toReturn); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate <float>(1001); _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
public static void Example2(GPGPU gpu) { ArrayView view1 = new ArrayView(); ArrayView view2 = new ArrayView(); float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray(); // Two views of the array, simply applying an offset to the array; could slice instead for example. view1.CreateView(data, 100); view2.CreateView(data, 200); for (int i = 0; i < 1000; ++i) { data[i] = data[i] * 10f; } // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance. var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1); var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2); var dev_result = gpu.Allocate <float>(5); var hostResult = new float[5]; gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result); gpu.CopyFromDevice(dev_result, hostResult); bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f); Console.WriteLine(pass ? "Pass" : "Fail"); }
/// <summary> /// Вызов и исполнение одной элементарной функции по имени функции /// </summary> /// <param name="function"></param> public static void Execute(string function) { Debug.Assert(_indexes1.Last() == _sequencies1.Length); Debug.Assert(_indexes2.Last() == _sequencies2.Length); CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); // copy the arrays 'a' and 'b' to the GPU int[] devIndexes1 = gpu.CopyToDevice(_indexes1); int[] devIndexes2 = gpu.CopyToDevice(_indexes2); int[] devSequencies1 = gpu.CopyToDevice(_sequencies1); int[] devSequencies2 = gpu.CopyToDevice(_sequencies2); int[,] devMatrix = gpu.Allocate(_matrix); int rows = _matrix.GetLength(0); int columns = _matrix.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, function, devSequencies1, devIndexes1, devSequencies2, devIndexes2, devMatrix); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devMatrix, _matrix); // free the memory allocated on the GPU gpu.FreeAll(); }
/// <summary> /// Выполнение сортировки слияниями /// Пример использования: /// CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray); /// CudafySequencies.Execute("Compare"); /// var compare = CudafySequencies.GetMartix(); /// CudafyArray.SetArray(Enumerable.Range(0,n).ToArray()); /// CudafyArray.SetCompare(compare); /// CudafyArray.MergeSort(); /// var indexesOfSorted = CudafyArray.GetArray(); /// </summary> public static void MergeSort(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); gpu.CopyToDevice(_a, devA); for (int i = 0; i < _ceiling; i++) { int gridSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); gpu.Launch(gridSize, blockSize) .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0, _length, direction); } gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void primaGPU() { CudafyModule modul_kernel = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(modul_kernel); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] list_cpu = new int[KONSTANTA_THREAD]; int[] list_cpy = new int[KONSTANTA_THREAD]; int[] list = vga.Allocate <int>(KONSTANTA_THREAD); vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list); vga.CopyFromDevice(list, list_cpy); vga.FreeAll(); int index = 0; for (int z = 0; z < list_cpy.Length; z++) { if (list_cpy[z] != -1) { list_cpu[index] = list_cpy[z]; //Console.WriteLine(list_cpu[index]); index++; } } waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Seconds.ToString(); Console.WriteLine("Total GPU ------ {0} detik> ", total); }
protected override int CalculateEnergyDiffParallel(byte *energy, int *energyDiff, int width, int height, bool xDir, out int sMin) { if (xDir) { int blockSize2 = BlockSize / 2; int blockCountX1 = (width + BlockSize - 1) / BlockSize; int blockCountX2 = width / BlockSize == blockSize2 ? blockCountX1 : blockCountX1 + 1; int blockCountY = (height + blockSize2 - 1) / blockSize2; int gpuBlockCount = 256; int gpuThreadCount = 1; for (int i = 0; i < blockCountY; i++) { int yStart = i * blockSize2; int yStop = Math.Min(yStart + blockSize2, height); _gpu.Launch(blockCountX1, 1).DecTriangle(_energyGPU, _energyDiffGPU, BlockSize, blockCountX1, width, height, _initWidth, yStart, yStop); yStart = Math.Min(yStart + 1, height); _gpu.Launch(blockCountX2, 1).IncTriangle(_energyGPU, _energyDiffGPU, BlockSize, blockCountX2, width, height, _initWidth, yStart, yStop); } } _gpu.CopyFromDevice(_energyDiffGPU, energyDiffArray); return(GetMinIndAndValue(energyDiff, width, height, xDir, out sMin)); }
public static void eksekusi() { CudafyModule kernel_modul = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(kernel_modul); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] array_vga = vga.Allocate <int>(KONSTANTA_THREAD); int[] array_hasil = new int[KONSTANTA_THREAD]; //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD); //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD]; //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD]; vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga); vga.CopyFromDevice(array_vga, array_hasil); vga.FreeAll(); //for(int z = 0; z < array_hasil.Length; z++) //{ // Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]); //} vga.FreeAll(); waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Milliseconds.ToString(); Console.WriteLine("Total VGA ------ > " + total); }
public static bool TestGpuDoublePrecision(int DeviceId) { if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL)) { return(false); } try { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); gpu.LoadModule(km); double c; double[] dev_c = gpu.Allocate <double>(); gpu.Launch().add_double(2.5d, 7.5d, dev_c); gpu.CopyFromDevice(dev_c, out c); gpu.Free(dev_c); return(c == 10.0d); } catch { return(false); } }
/// <summary> /// Приведение матрицы к "каноническому" виду, методом Гаусса-Жордана, /// то есть к матрице, получаемой в результате эквивалентных преобразований /// над строками, и у которой выполнено следующее - если i - индекс первого ненулевого значения в строке, то во всех /// остальных строках матрицы по индексу i содержится только ноль. /// Очевидно, что если индекса первого нулевого значения нет (-1), то вся строка нулевая. /// Приведение матрицы к каноническому виду используется при решении систем линейных уравнений и при поиске /// фундаментальной системы решений системы линейных уравнений. /// В данной реализации используется матрица на полем GF(2), то есть булева матрица. /// </summary> /// <param name="function"></param> public static void ExecuteGaussJordan() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[,] devA = gpu.Allocate(_a); int[,] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(_d); int[] devE = gpu.Allocate(E); gpu.CopyToDevice(_a, devA); int rows = _a.GetLength(0); int columns = _a.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, "RepeatZero", devA, devB, devC, devD, devE); for (int i = 0; i < Math.Min(rows, columns); i++) { gpu.Launch(gridSize, blockSize, "IndexOfNonZero", devA, devB, devC, devD, devE); gpu.CopyFromDevice(devC, _c); while (i < Math.Min(rows, columns) && _c[i] == -1) { i++; } if (i >= Math.Min(rows, columns)) { break; } int j = _c[i]; gpu.Launch(gridSize, blockSize, "BooleanGaussJordan", devA, devB, i, j); int[,] t = devA; devA = devB; devB = t; } gpu.CopyFromDevice(devA, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void GetData() { Stopwatch sw = new Stopwatch(); sw.Start(); gpu.CopyFromDevice(dev_units, units); //gpu.CopyFromDevice(dev_pixelMap, pixelMap); gpu.CopyFromDevice(dev_camera, camera); gpu.CopyFromDevice(dev_imageBytes, imageBytes); //gpu.CopyFromDeviceAsync(dev_imageBytes, 0, imageBytesAddress, 0, imageBytes.Length); //pinned.Free(); double t2 = sw.ElapsedMilliseconds; // 8-9ms }
public static void Execute(byte[] bitmap) { DateTime dt = DateTime.Now; CudafyModule km = CudafyModule.TryDeserialize(csFILENAME); // Check the module exists and matches the .NET modules, else make new if (km == null || !km.TryVerifyChecksums()) { Console.WriteLine("There was no cached module available so we make a new one."); km = CudafyModule.Deserialize(typeof(ray_serialize).Name); km.Serialize(csFILENAME); } GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1); gpu.LoadModule(km); Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate temp memory, initialize it, copy to constant memory on the GPU Sphere[] temp_s = new Sphere[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToConstantMemory(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(DIM / 16, DIM / 16); dim3 threads = new dim3(16, 16); gpu.Launch(grids, threads, "kernel", dev_bitmap); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.DeviceFreeAll(); }
public static void Execute(byte[] bitmap) { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(SphereOpenCL), typeof(ray_opencl_const)); km.TrySerialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate memory for the Sphere dataset //SphereOpenCL[] s = gpu.Allocate<SphereOpenCL>(SPHERES); // allocate temp memory, initialize it, copy to constant memory on the GPU SphereOpenCL[] temp_s = new SphereOpenCL[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } //gpu.CopyToDevice(temp_s, s); gpu.CopyToConstantMemory(temp_s, spheres); // generate a bitmap from our sphere data dim3 grids = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16); dim3 threads = new dim3(16, 16); //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic gpu.Launch(grids, threads, ((Action <GThread, byte[]>)thekernel), dev_bitmap); // Strongly typed // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.FreeAll(); }
public static float[] prepareAndCalculateFloatData(float[] prevMeasures, float[] actMeasures) { if ((prevMeasures != null) && (actMeasures != null)) { float[] previousMeasuresGPU = gpu.Allocate <float>(prevMeasures); float[] actualMeasuresGPU = gpu.Allocate <float>(actMeasures); gpu.CopyToDevice(prevMeasures, previousMeasuresGPU); gpu.CopyToDevice(actMeasures, actualMeasuresGPU); gpu.Launch(prevMeasures.Length, 1).calculateDataWithCudafy(previousMeasuresGPU, actualMeasuresGPU); gpu.CopyFromDevice(previousMeasuresGPU, prevMeasures); gpu.FreeAll(); return(prevMeasures); } else { return(null); } }
public static void Execute() { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy(); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // Create some arrays on the host int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = i; b[i] = 2 * i; } // copy the arrays 'a' and 'b' to the GPU int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); // Launch 128 blocks of 128 threads each gpu.Launch(128, 128).add(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // verify that the GPU did the work we requested bool success = true; for (int i = 0; i < N; i++) { if ((a[i] + b[i]) != c[i]) { Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]); success = false; break; } } if (success) { Console.WriteLine("We did it!"); } // free the memory allocated on the GPU gpu.FreeAll(); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int c = 0; int[] dev_c = gpu.Allocate<int>(); // cudaMalloc one Int32 gpu.Launch().add(2, 7, dev_c); // or gpu.Launch(1, 1, "add", 2, 7, dev_c); gpu.CopyFromDevice(dev_c, out c); Console.WriteLine("2 + 7 = {0}", c); gpu.Launch().sub(2, 7, dev_c); gpu.CopyFromDevice(dev_c, out c); Console.WriteLine("2 - 7 = {0}", c); gpu.Free(dev_c); }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion)); Console.WriteLine(km.CompilerOutput); _gpu.LoadModule(km); int[] data = new int[64]; int[] data_d = _gpu.CopyToDevice(data); int[] res_d = _gpu.Allocate(data); int[] res = new int[64]; _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d); _gpu.CopyFromDevice(data_d, res); for(int i = 0; i < 64; i++) if (data[i] != res[i]) { Console.WriteLine("Failed"); break; } }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(SIMDFunctions)); //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions)); _gpu.LoadModule(km); int w = 1024; int h = 1024; for (int loop = 0; loop < 3; loop++) { uint[] a = new uint[w * h]; Fill(a); uint[] dev_a = _gpu.CopyToDevice(a); uint[] b = new uint[w * h]; Fill(b); uint[] dev_b = _gpu.CopyToDevice(b); uint[] c = new uint[w * h]; uint[] dev_c = _gpu.Allocate(c); _gpu.StartTimer(); _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c); _gpu.CopyFromDevice(dev_c, c); float time = _gpu.StopTimer(); Console.WriteLine("Time: {0}", time); if (loop == 0) { bool passed = true; GThread thread = new GThread(1, 1, null); for (int i = 0; i < w * h; i++) { uint exp = thread.vadd2(a[i], b[i]); if (exp != c[i]) passed = false; } Console.WriteLine("Test {0}", passed ? "passed. " : "failed!"); } _gpu.FreeAll(); } }
public static void Example1(GPGPU gpu, int threads) { double[] a = new double[threads]; double[] b = new double[threads]; Random r = new Random(); for (int i = 0; i < threads; i++) { a[i] = r.NextDouble(); b[i] = r.NextDouble(); } double[] gpuarr1 = gpu.CopyToDevice(a); double[] gpuarr2 = gpu.CopyToDevice(b); double[] result = new double[threads]; var gpuresult = gpu.Allocate<double>(result); gpu.Launch(threads, 1).Test2(gpuarr1, gpuarr2, gpuresult); gpu.CopyFromDevice(gpuresult, result); gpu.Free(gpuarr1); gpu.Free(gpuarr2); gpu.Free(gpuresult); }
public static int MA(int[] A, int[] B, int[] C, int Size, int Size1d, GPGPU gpu, int max_threadsPerBlock) { // allocate the memory on the GPU int[] GPU_A = gpu.Allocate<int>(A); int[] GPU_B = gpu.Allocate<int>(B); int[] GPU_C = gpu.Allocate<int>(C); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(A, GPU_A); gpu.CopyToDevice(B, GPU_B); int threadsPerBlock = 0; int blocksPerGrid = 0; if (Size1d < max_threadsPerBlock) { threadsPerBlock = Size1d; blocksPerGrid = 1; } else { threadsPerBlock = max_threadsPerBlock; blocksPerGrid = (Size1d / max_threadsPerBlock) + 1; } // launch GPU_MA gpu.Launch(threadsPerBlock, blocksPerGrid).GPU_MA(GPU_A, GPU_B, GPU_C, Size, Size1d); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(GPU_C, C); gpu.Free(GPU_A); gpu.Free(GPU_B); gpu.Free(GPU_C); return 1; }
private void meanToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length;ix++ ) if(Data.columnChoosen[ix]!=-1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); judul = "SLR"; if (dr == DialogResult.OK) { if (columnChoosen.Count == 2) { int columny = Data.columnChoosen[0]; int columnx = Data.columnChoosen[1]; try { CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int Ny = jumlahthread(columny); int Nx = jumlahthread(columnx); int N = new int(); if (Ny > Nx) N = Ny; else N = Nx; float[] ay = new float[Ny]; float[] by = new float[Ny]; float[] ax = new float[Nx]; float[] bx = new float[Nx]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahDatay = jumlahdata(columny,Ny); int jumlahDatax = jumlahdata(columnx,Nx); ay = InitData(1, columny, Ny, ay, by); by = InitData(2, columny, Ny, ay, by); ax = InitData(1, columnx, Nx, ax, bx); bx = InitData(2, columnx, Nx, ax, bx); float temp,temp2; int missingCounty = 0; int missingCountx = 0; for (int b = 0; b < Data.variableView[columny].missing.Count; b++) { for (int a = 0; a < Ny; a++) { float.TryParse(Data.variableView[columny].missing[b], out temp); if (ay[a] == temp) { ay[a] = 0; missingCounty++; } } } for (int b = 0; b < Data.variableView[columnx].missing.Count; b++) { for (int a = 0; a < Nx; a++) { float.TryParse(Data.variableView[columnx].missing[b], out temp); if (ax[a] == temp) { ax[a] = 0; missingCountx++; } } } if (Data.variableView[columny].missingRange.Count > 1) { for (int a = 0; a < Ny; a++) { float.TryParse(Data.variableView[columny].missingRange[0], out temp); float.TryParse(Data.variableView[columny].missingRange[1], out temp2); if (ay[a] >= temp && ay[a] <= temp2) { ay[a] = 0; missingCounty++; } } } if (Data.variableView[columnx].missingRange.Count > 1) { for (int a = 0; a < Nx; a++) { float.TryParse(Data.variableView[columnx].missingRange[0], out temp); float.TryParse(Data.variableView[columnx].missingRange[1], out temp2); if (ax[a] >= temp && ax[a] <= temp2) { ax[a] = 0; missingCounty++; } } } Debug.WriteLine("y : " + missingCounty + "/nx : " + missingCountx); float[] dev_a = _gpu.CopyToDevice(ay); float[] dev_b = _gpu.CopyToDevice(ax); float[] dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N); float[] save1 = new float[N]; _gpu.CopyFromDevice(dev_c, save1); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(by); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N); float[] save2 = new float[N]; _gpu.CopyFromDevice(dev_c, save2); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(save1); dev_b = _gpu.CopyToDevice(save2); dev_c = _gpu.Allocate<float>(c); float sumxy = jumlahan(N,dev_a,dev_b,dev_c,c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); //results.Add(mean); c = new float[Nx]; dev_a = _gpu.CopyToDevice(ax); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); float sumx = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); c = new float[Ny]; dev_a = _gpu.CopyToDevice(ay); dev_b = _gpu.CopyToDevice(by); dev_c = _gpu.Allocate<float>(c); float sumy = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); c = new float[N]; dev_a = _gpu.CopyToDevice(ax); dev_b = _gpu.CopyToDevice(ax); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx); save1 = new float[N]; _gpu.CopyFromDevice(dev_c, save1); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(bx); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx); save2 = new float[N]; _gpu.CopyFromDevice(dev_c, save2); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(save1); dev_b = _gpu.CopyToDevice(save2); dev_c = _gpu.Allocate<float>(c); float sumxquad = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); _gpu.FreeAll(); float jumlahData = new float(); if (jumlahDatax>jumlahDatay) jumlahData = jumlahDatax; else jumlahData = jumlahDatay; float beta = ((jumlahData * sumxy) - (sumx * sumy)) / ((jumlahData * sumxquad) - (sumx * sumx)); ab[0] = beta.ToString(); float alpha = (sumy / (jumlahDatay - missingCounty)) - beta * (sumx / jumlahDatax - missingCountx); ab[1] = alpha.ToString(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new ResultSLR(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }
public float computeSum2(float[] array) { try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = 0; for (int i = 0; i < N; i++) { if (array[i] != null && array[i].ToString() != "") { a[i] = array[i]; jumlahData++; } if (array[i + N] != null && array[i + N].ToString() != "") { b[i] = array[i + N]; jumlahData++; } } // float temp, temp2; // int missingCount = 0; //for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) //{ // for (int ax = 0; ax < N; ax++) // { // float.TryParse(Data.variableView[column].missing[bx], out temp); // if (a[ax] == temp) // { // a[ax] = 0; // missingCount++; // } // } //} //if (Data.variableView[column].missingRange.Count > 1) //{ // for (int ax = 0; ax < N; ax++) // { // float.TryParse(Data.variableView[column].missingRange[0], out temp); // float.TryParse(Data.variableView[column].missingRange[1], out temp2); // if (a[ax] >= temp && a[ax] <= temp2) // { // a[ax] = 0; // missingCount++; // } // } // } // Debug.WriteLine(missingCount); //float meanSequential = 0; //for (int i = 0; i < N; i++) // meanSequential += a[i] + b[i]; //meanSequential = meanSequential / (jumlahData - missingCount); ; float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } // Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential); // results.Add((c[0] + c[1]) / (jumlahData - missingCount)); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); //return (c[0] + c[1]) / (jumlahData - missingCount); return (c[0] + c[1]); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } return 0; }
public void computeStdv(int column) { try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; //karena a dan b diisi "data" berdasar ganjil genap float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU jumlahData = 0; for (int i = 0; i < N; i++) //ini buat membagi data ke a b { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out a[i]); jumlahData++; } if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "") { float.TryParse(sheet[i + N, column].ToString(), out b[i]); jumlahData++; } } float temp, temp2; missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); /* float meanSequential = 0; for (int i = 0; i < N; i++) meanSequential += a[i] + b[i]; meanSequential = meanSequential / (jumlahData - missingCount); ; */ float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } float[] mean = new float[1]; mean[0] = (c[0] + c[1]) / (jumlahData - missingCount); float[] dev_mean = _gpu.CopyToDevice(mean); // float[] data2 = new float[jumlahData]; // float[] dev_data2 = _gpu.Allocate<float>(data2); float[] temp4 = new float[jumlahData]; float[] dev_temp4 = _gpu.Allocate<float>(temp4); float[] data = new float[jumlahData]; for (int i = 0; i < jumlahData; i++) //parse semua data ke array data { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out data[i]); } } float[] dev_data = _gpu.CopyToDevice(data); float[] x = new float[jumlahData+1]; float[] dev_x = _gpu.Allocate<float>(x); _gpu.Launch((jumlahData + 127) / 128, 128).minusMean(dev_mean, dev_data, dev_x, jumlahData, dev_temp4); _gpu.CopyFromDevice(dev_x, x); _gpu.Free(dev_mean); _gpu.Free(dev_data); _gpu.Free(dev_x); _gpu.Free(dev_temp4); // fill the arrays 'a' and 'b' on the CPU N = ((jumlahData+1) / 2); float[] isi1 = new float[N]; float[] isi2 = new float[N]; for (int i = 0; i < N; i++) //ini buat membagi data ke a b { if (x[i] != null && x.ToString() != "") { float.TryParse(x[i].ToString(), out isi1[i]); } if (x[i + N] != null && x[i + N].ToString() != "") { float.TryParse(x[i + N].ToString(), out isi2[i]); } } float[] isic = new float[N]; float[] dev_isi1 = _gpu.CopyToDevice(isi1); float[] dev_isi2 = _gpu.CopyToDevice(isi2); float[] dev_isic = _gpu.Allocate<float>(isic); bool first1 = true; while (N > 1) { if (!first1) { isi1 = new float[N]; isi2 = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (isic.Count() - N); i++) baru[i] = isic[N + i]; dev_isi1 = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_isi2 = _gpu.CopyToDevice(baru); c = new float[N]; dev_isic = _gpu.Allocate<float>(isic); } float[] isid = new float[N]; _gpu.CopyFromDevice(dev_isi1, isid); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_isi1, dev_isi2, dev_isic, N); _gpu.CopyFromDevice(dev_isic, isic); _gpu.Free(dev_isi1); _gpu.Free(dev_isi2); _gpu.Free(dev_isic); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } float temp3 = (float)Math.Sqrt((isic[0] + isic[1]) / (jumlahData - 1)); // Debug.WriteLine("mean-nya adalah " + (c[0] + c[1])); Debug.WriteLine("STDV = " + ((isic[0] + isic[1]) / (jumlahData - 1))); results.Add(temp3); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } }
private void meanPararelToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length; ix++) if (Data.columnChoosen[ix] != -1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); if (dr == DialogResult.OK) { for (int index = 0; index < Data.columnChoosen.Length; index++) if (Data.columnChoosen[index] != -1) { int column = Data.columnChoosen[index]; try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = 0; for (int i = 0; i < N; i++) { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out a[i]); jumlahData++; } if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "") { float.TryParse(sheet[i + N, column].ToString(), out b[i]); jumlahData++; } } float temp, temp2; int missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); float meanSequential = 0; for (int i = 0; i < N; i++) meanSequential += a[i] + b[i]; meanSequential = meanSequential / (jumlahData - missingCount); ; float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential); results.Add((c[0] + c[1]) / (jumlahData - missingCount)); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new FormResultsMean(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }
public static void Example2(GPGPU gpu) { ArrayView view1 = new ArrayView(); ArrayView view2 = new ArrayView(); float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray(); // Two views of the array, simply applying an offset to the array; could slice instead for example. view1.CreateView(data, 100); view2.CreateView(data, 200); for (int i = 0; i < 1000; ++i) data[i] = data[i] * 10f; // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance. var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1); var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2); var dev_result = gpu.Allocate<float>(5); var hostResult = new float[5]; gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result); gpu.CopyFromDevice(dev_result, hostResult); bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f); Console.WriteLine(pass ? "Pass" : "Fail"); }
private void varianceToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length; ix++) if (Data.columnChoosen[ix] != -1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); judul = "Variance"; if (dr == DialogResult.OK) { for (int index = 0; index < Data.columnChoosen.Length; index++) if (Data.columnChoosen[index] != -1) { int column = Data.columnChoosen[index]; try { CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = jumlahthread(column); float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = jumlahdata(column,N); a = InitData(1, column, N,a,b); b = InitData(2, column, N,a,b); float temp, temp2; int missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); int N1 = N; float hasil = jumlahan(N, dev_a, dev_b, dev_c, c); float mean = (hasil / (jumlahData - missingCount)); _gpu.FreeAll(); c = new float[N1]; dev_a = _gpu.CopyToDevice(a); dev_b = _gpu.CopyToDevice(b); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_a, dev_c, mean, N1); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_c); float[] d = new float[N]; dev_c = _gpu.Allocate<float>(d); _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_b, dev_c, mean, N1); _gpu.CopyFromDevice(dev_c, d); _gpu.Free(dev_b); _gpu.Free(dev_c); _gpu.FreeAll(); if (jumlahData%2 != 0) { d[N1-1] = 0; } float[] f = new float[N1]; hasil = new float(); dev_a = _gpu.CopyToDevice(c); dev_b = _gpu.CopyToDevice(d); dev_c = _gpu.Allocate<float>(c); hasil = jumlahan(N, dev_a, dev_b, dev_c, c); float variance = (hasil / (jumlahData - missingCount-1)); results.Add(variance); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new ResultForm(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }