/// <summary> /// Вызов и исполнение функции проверки что массив отсортирован /// </summary> public static void ExecuteSorted(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(D); gpu.CopyToDevice(_a, devA); gpu.Launch(1, 1).Split(devA, devB, devC, _middle); gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction); gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction); gpu.CopyFromDevice(devD, D); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); int cols = dm.ColumnCount, rows = dm.RowCount; int restRows = rows - cols; //double[] a = dm.Storage.ToColumnMajorArray(); double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray(); double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray(); dm = null; double[] a_d = gpu.CopyToDevice <double>(a); a = null; double[] c_d = gpu.Allocate <double>(cols * cols); double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * rows]; gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols); gpu.FreeAll(); a_d = gpu.CopyToDevice <double>(b); b = null; c_d = gpu.Allocate <double>(restRows * cols); x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a); }
public void SetUp() { //CudafyModes.Architecture = eArchitecture.sm_30; _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId); Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported."); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc, 0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values d_inputIntArray = _gpu.CopyToDevice(inputIntArray); d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE); gpuIntResult = new int[WARP_SIZE]; cpuIntResult = new int[WARP_SIZE]; inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f, 1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f, 7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f, 377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray); d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE); gpuFloatResult = new float[WARP_SIZE]; cpuFloatResult = new float[WARP_SIZE]; }
public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size) { // allocate the memory on the GPU int[,] GPU_A = gpu.Allocate<int>(A); int[,] GPU_B = gpu.Allocate<int>(B); int[,] GPU_C = gpu.Allocate<int>(C); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(A, GPU_A); gpu.CopyToDevice(B, GPU_B); dim3 threadsPerBlock; // find the number of threads and blocks if (Size < maxTheadBlockSize) { threadsPerBlock = new dim3(Size, Size); } else { threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize); } dim3 block = new dim3(Size, Size); // launch GPU_MA gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(GPU_C, C); gpu.Free(GPU_A); gpu.Free(GPU_B); gpu.Free(GPU_C); return 1; }
public static uint[] Evaluate(ulong[] hands, int numCards) { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int blockSize = 256; int blockx = hands.Length / blockSize; if (hands.Length % blockSize != 0) { blockx++; } ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length); uint[] dev_ranks = gpu.Allocate <uint>(hands.Length); gpu.CopyToDevice(hands, dev_hands); gpu.StartTimer(); gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks); var ts = gpu.StopTimer(); uint[] toReturn = new uint[hands.Length]; gpu.CopyFromDevice(dev_ranks, toReturn); return(toReturn); }
private float cuda_malloc_test(int size, bool up) { int[] a = new int[size]; int[] dev_a = _gpu.Allocate <int>(size); _gpu.StartTimer(); for (int i = 0; i < 100; i++) { if (up) { _gpu.CopyToDevice(a, dev_a); } else { _gpu.CopyFromDevice(dev_a, a); } } float elapsedTime = _gpu.StopTimer(); _gpu.FreeAll(); GC.Collect(); return(elapsedTime); }
public void Test_TwoThreadCopy() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1); _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1); _gpu.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); SetInputs(); ClearOutputs(); Thread t1 = new Thread(Test_TwoThreadCopy_Thread1); Thread t2 = new Thread(Test_TwoThreadCopy_Thread2); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) { break; } } _gpu.DisableMultithreading(); _gpu.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _hostInput = new double[N * BATCH]; _hostInputCplx = new ComplexD[N * BATCH]; _hostOutput = new double[N * BATCH]; _hostOutputCplx = new ComplexD[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate<double>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate<ComplexD>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); _fft = GPGPUFFT.Create(_gpu); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexD cf = new ComplexD(); cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
public void SetUp() { //CudafyModes.Architecture = eArchitecture.sm_30; _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId); Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported."); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc, 0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb}; // arbitrary values d_inputIntArray = _gpu.CopyToDevice(inputIntArray); d_outputIntArray = _gpu.Allocate<int>(WARP_SIZE); gpuIntResult = new int[WARP_SIZE]; cpuIntResult = new int[WARP_SIZE]; inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f, 1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f, 7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f, 377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f}; // arbitrary values d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray); d_outputFloatArray = _gpu.Allocate<float>(WARP_SIZE); gpuFloatResult = new float[WARP_SIZE]; cpuFloatResult = new float[WARP_SIZE]; }
/// <summary> /// Выполнение сортировки слияниями /// Пример использования: /// CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray); /// CudafySequencies.Execute("Compare"); /// var compare = CudafySequencies.GetMartix(); /// CudafyArray.SetArray(Enumerable.Range(0,n).ToArray()); /// CudafyArray.SetCompare(compare); /// CudafyArray.MergeSort(); /// var indexesOfSorted = CudafyArray.GetArray(); /// </summary> public static void MergeSort(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); gpu.CopyToDevice(_a, devA); for (int i = 0; i < _ceiling; i++) { int gridSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); gpu.Launch(gridSize, blockSize) .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0, _length, direction); } gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); _hostInput = new float[ciROWS, ciCOLS]; _hostInput2 = new float[ciROWS, ciCOLS]; _hostOutput = new float[ciROWS, ciCOLS]; _devPtr = _gpu.Allocate<float>(_hostInput); _devPtr2 = _gpu.Allocate<float>(_hostOutput); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); _hostInput = new float[ciROWS, ciCOLS]; _hostInput2 = new float[ciROWS, ciCOLS]; _hostOutput = new float[ciROWS, ciCOLS]; _devPtr = _gpu.Allocate <float>(_hostInput); _devPtr2 = _gpu.Allocate <float>(_hostOutput); }
public static void Execute(byte[] bitmap) { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(SphereOpenCL), typeof(ray_opencl)); km.TrySerialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate memory for the Sphere dataset SphereOpenCL[] s = gpu.Allocate <SphereOpenCL>(SPHERES); // allocate temp memory, initialize it, copy to constant memory on the GPU SphereOpenCL[] temp_s = new SphereOpenCL[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToDevice(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16); dim3 threads = new dim3(16, 16); //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic gpu.Launch(grids, threads, ((Action <GThread, SphereOpenCL[], byte[]>)thekernel), s, dev_bitmap); // Strongly typed // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.FreeAll(); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); Console.Write("BLAS Version={0}", _blas.GetVersion()); _hostInput1 = new float[ciN]; _hostInput2 = new float[ciN]; _hostOutput1 = new float[ciN]; _hostOutput2 = new float[ciN]; _devPtr1 = _gpu.Allocate<float>(_hostInput1); _devPtr2 = _gpu.Allocate<float>(_hostOutput1); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); Console.Write("BLAS Version={0}", _blas.GetVersion()); _hostInput1 = new float[ciN]; _hostInput2 = new float[ciN]; _hostOutput1 = new float[ciN]; _hostOutput2 = new float[ciN]; _devPtr1 = _gpu.Allocate <float>(_hostInput1); _devPtr2 = _gpu.Allocate <float>(_hostOutput1); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); float c; // allocate memory on the cpu side float[] a = new float[N]; float[] b = new float[N]; float[] partial_c = new float[blocksPerGrid]; // allocate the memory on the GPU float[] dev_a = gpu.Allocate <float>(N); float[] dev_b = gpu.Allocate <float>(N); float[] dev_partial_c = gpu.Allocate <float>(blocksPerGrid); float[] dev_test = gpu.Allocate <float>(blocksPerGrid * blocksPerGrid); // fill in the host memory with data for (int i = 0; i < N; i++) { a[i] = i; b[i] = i * 2; } // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(a, dev_a); gpu.CopyToDevice(b, dev_b); gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_partial_c, partial_c); // finish up on the CPU side c = 0; for (int i = 0; i < blocksPerGrid; i++) { c += partial_c[i]; } Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1))); // free memory on the gpu side gpu.FreeAll(); // free memory on the cpu side // No worries... }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(Program.testArchitecture); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); gpu.LoadModule(km); const int warps = 4; const int count = warps * 32; var random = new Random(); var input = new int[count]; var output = new int[count / 32]; var expectedOutput = new int[count / 32]; for (var i = 0; i < warps; i++) { expectedOutput[i] = 0; } for (var i = 0; i < count; i++) { input[i] = random.Next(2); } for (var i = 0; i < count; i++) { expectedOutput[i / 32] += input[i] << (i % 32); } var devInput = gpu.Allocate <int>(count); var devOutput = gpu.Allocate <int>(warps); gpu.CopyToDevice(input, devInput); gpu.Launch(1, count, "BallotKernel", devInput, devOutput); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devOutput, output); gpu.Free(devInput); gpu.Free(devOutput); for (var i = 0; i < warps; i++) { Console.WriteLine("Warp {0} Ballot: {1}", i, output[i]); Console.WriteLine("Expected: {0} \t{1}", expectedOutput[i], expectedOutput[i] == output[i] ? "PASSED" : "FAILED"); } }
public static float[] CallGPU() { CudafyModes.Target = eGPUType.OpenCL; CudafyModes.DeviceId = 0; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU)); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0); gpu.LoadModule(km); km.Serialize(); float[] input = Utils.GenerateRandomVector(); float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension(); float[] output = new float[Utils.N]; Stopwatch gpuSW = new Stopwatch(); gpuSW.Start(); float[] dev_output = gpu.Allocate <float>(output); float[] dev_input = gpu.CopyToDevice(input); float[,,] dev_NN = gpu.CopyToDevice(NN); gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output); gpu.CopyFromDevice(dev_output, output); gpu.FreeAll(); gpuSW.Stop(); Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds); return(output); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate<float>(1001); _gpu.Launch(1,1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
static Bitmap Render(GPGPU gpu, int frameNum) { uint[,] deviceImage = gpu.Allocate <uint>(width, height); float[] pX1_gpu = gpu.CopyToDevice <float>(pX1); float[] pY1_gpu = gpu.CopyToDevice <float>(pY1); float[] pZ1_gpu = gpu.CopyToDevice <float>(pZ1); float[] colorPosition_gpu = gpu.CopyToDevice <float>(colorPosition); float[] currentTime_gpu = gpu.CopyToDevice <float>(currentTime); dim3 threadsPerBlock = new dim3(8, 8); dim3 numBlocks = new dim3(width / threadsPerBlock.x, height / threadsPerBlock.y); gpu.Launch(numBlocks, threadsPerBlock).renderKernel(deviceImage, pX1_gpu, pY1_gpu, pZ1_gpu, colorPosition_gpu, currentTime_gpu); uint[,] finalImage = new uint[width, height]; gpu.CopyFromDevice <uint>(deviceImage, finalImage); gpu.Free(deviceImage); gpu.Free(pX1_gpu); gpu.Free(pY1_gpu); gpu.Free(pZ1_gpu); gpu.Free(colorPosition_gpu); gpu.Free(currentTime_gpu); GCHandle pixels = GCHandle.Alloc(finalImage, GCHandleType.Pinned); Bitmap bmp = new Bitmap(width, height, width * sizeof(int), PixelFormat.Format32bppRgb, pixels.AddrOfPinnedObject()); bmp.Save("spring" + frameNum + ".png"); pixels.Free(); return(bmp); }
public static void Execute() { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric)); km.Serialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); gpu.LoadModule(km); var input = new Generic <ushort, ushort>(); input.A = 187; int[] devoutput = gpu.Allocate <int>(1); gpu.Launch(1, 1, "Kernel", input, devoutput); int output; gpu.CopyFromDevice(devoutput, out output); Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED")); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); Console.WriteLine("CUDA driver version={0}", _gpu.GetDriverVersion()); _fft = GPGPUFFT.Create(_gpu); _hostInput = new float[N * BATCH]; _hostInputCplx = new ComplexF[N * BATCH]; _hostOutput = new float[N * BATCH]; _hostOutputCplx = new ComplexF[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate<float>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate<ComplexF>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); Console.WriteLine("CUFFT version={0}", _fft.GetVersion()); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexF cf = new ComplexF(); cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
public static bool TestGpuDoublePrecision(int DeviceId) { if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL)) { return(false); } try { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); gpu.LoadModule(km); double c; double[] dev_c = gpu.Allocate <double>(); gpu.Launch().add_double(2.5d, 7.5d, dev_c); gpu.CopyFromDevice(dev_c, out c); gpu.Free(dev_c); return(c == 10.0d); } catch { return(false); } }
public static void Example2(GPGPU gpu) { ArrayView view1 = new ArrayView(); ArrayView view2 = new ArrayView(); float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray(); // Two views of the array, simply applying an offset to the array; could slice instead for example. view1.CreateView(data, 100); view2.CreateView(data, 200); for (int i = 0; i < 1000; ++i) { data[i] = data[i] * 10f; } // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance. var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1); var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2); var dev_result = gpu.Allocate <float>(5); var hostResult = new float[5]; gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result); gpu.CopyFromDevice(dev_result, hostResult); bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f); Console.WriteLine(pass ? "Pass" : "Fail"); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate <float>(1001); _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
/// <summary> /// Вызов и исполнение одной элементарной функции по имени функции /// </summary> /// <param name="function"></param> public static void Execute(string function) { Debug.Assert(_indexes1.Last() == _sequencies1.Length); Debug.Assert(_indexes2.Last() == _sequencies2.Length); CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); // copy the arrays 'a' and 'b' to the GPU int[] devIndexes1 = gpu.CopyToDevice(_indexes1); int[] devIndexes2 = gpu.CopyToDevice(_indexes2); int[] devSequencies1 = gpu.CopyToDevice(_sequencies1); int[] devSequencies2 = gpu.CopyToDevice(_sequencies2); int[,] devMatrix = gpu.Allocate(_matrix); int rows = _matrix.GetLength(0); int columns = _matrix.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, function, devSequencies1, devIndexes1, devSequencies2, devIndexes2, devMatrix); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devMatrix, _matrix); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void primaGPU() { CudafyModule modul_kernel = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(modul_kernel); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] list_cpu = new int[KONSTANTA_THREAD]; int[] list_cpy = new int[KONSTANTA_THREAD]; int[] list = vga.Allocate <int>(KONSTANTA_THREAD); vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list); vga.CopyFromDevice(list, list_cpy); vga.FreeAll(); int index = 0; for (int z = 0; z < list_cpy.Length; z++) { if (list_cpy[z] != -1) { list_cpu[index] = list_cpy[z]; //Console.WriteLine(list_cpu[index]); index++; } } waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Seconds.ToString(); Console.WriteLine("Total GPU ------ {0} detik> ", total); }
public static void eksekusi() { CudafyModule kernel_modul = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(kernel_modul); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] array_vga = vga.Allocate <int>(KONSTANTA_THREAD); int[] array_hasil = new int[KONSTANTA_THREAD]; //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD); //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD]; //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD]; vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga); vga.CopyFromDevice(array_vga, array_hasil); vga.FreeAll(); //for(int z = 0; z < array_hasil.Length; z++) //{ // Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]); //} vga.FreeAll(); waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Milliseconds.ToString(); Console.WriteLine("Total VGA ------ > " + total); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _sparse = GPGPUSPARSE.Create(_gpu); _hiMatrixMN = new double[M * N]; _hiMatrixMN2 = new double[M * N]; _hoMatrixMN = new double[M * N]; _hoPerVector = new int[M]; _hoPerVector2 = new int[N]; _diPerVector2 = _gpu.Allocate(_hoPerVector2); _diMatrixMN = _gpu.Allocate(_hiMatrixMN); _diMatrixMN2 = _gpu.Allocate(_hiMatrixMN2); _diPerVector = _gpu.Allocate(_hoPerVector); }
/// <summary> /// Приведение матрицы к "каноническому" виду, методом Гаусса-Жордана, /// то есть к матрице, получаемой в результате эквивалентных преобразований /// над строками, и у которой выполнено следующее - если i - индекс первого ненулевого значения в строке, то во всех /// остальных строках матрицы по индексу i содержится только ноль. /// Очевидно, что если индекса первого нулевого значения нет (-1), то вся строка нулевая. /// Приведение матрицы к каноническому виду используется при решении систем линейных уравнений и при поиске /// фундаментальной системы решений системы линейных уравнений. /// В данной реализации используется матрица на полем GF(2), то есть булева матрица. /// </summary> /// <param name="function"></param> public static void ExecuteGaussJordan() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[,] devA = gpu.Allocate(_a); int[,] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(_d); int[] devE = gpu.Allocate(E); gpu.CopyToDevice(_a, devA); int rows = _a.GetLength(0); int columns = _a.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, "RepeatZero", devA, devB, devC, devD, devE); for (int i = 0; i < Math.Min(rows, columns); i++) { gpu.Launch(gridSize, blockSize, "IndexOfNonZero", devA, devB, devC, devD, devE); gpu.CopyFromDevice(devC, _c); while (i < Math.Min(rows, columns) && _c[i] == -1) { i++; } if (i >= Math.Min(rows, columns)) { break; } int j = _c[i]; gpu.Launch(gridSize, blockSize, "BooleanGaussJordan", devA, devB, i, j); int[,] t = devA; devA = devB; devB = t; } gpu.CopyFromDevice(devA, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_a = gpu.Allocate <int>(a); int[] dev_b = gpu.Allocate <int>(b); int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = -i; b[i] = i * i; } // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(a, dev_a); gpu.CopyToDevice(b, dev_b); // launch add on N threads (really blocks) gpu.Launch(N, 1).adder(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // display the results for (int i = 0; i < N; i++) { Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]); } // free the memory allocated on the GPU gpu.Free(dev_a); gpu.Free(dev_b); gpu.Free(dev_c); }
public static void Execute(byte[] bitmap) { DateTime dt = DateTime.Now; CudafyModule km = CudafyModule.TryDeserialize(csFILENAME); // Check the module exists and matches the .NET modules, else make new if (km == null || !km.TryVerifyChecksums()) { Console.WriteLine("There was no cached module available so we make a new one."); km = CudafyModule.Deserialize(typeof(ray_serialize).Name); km.Serialize(csFILENAME); } GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1); gpu.LoadModule(km); Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate temp memory, initialize it, copy to constant memory on the GPU Sphere[] temp_s = new Sphere[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToConstantMemory(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(DIM / 16, DIM / 16); dim3 threads = new dim3(16, 16); gpu.Launch(grids, threads, "kernel", dev_bitmap); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.DeviceFreeAll(); }
public static float[] prepareAndCalculateFloatData(float[] prevMeasures, float[] actMeasures) { if ((prevMeasures != null) && (actMeasures != null)) { float[] previousMeasuresGPU = gpu.Allocate <float>(prevMeasures); float[] actualMeasuresGPU = gpu.Allocate <float>(actMeasures); gpu.CopyToDevice(prevMeasures, previousMeasuresGPU); gpu.CopyToDevice(actMeasures, actualMeasuresGPU); gpu.Launch(prevMeasures.Length, 1).calculateDataWithCudafy(previousMeasuresGPU, actualMeasuresGPU); gpu.CopyFromDevice(previousMeasuresGPU, prevMeasures); gpu.FreeAll(); return(prevMeasures); } else { return(null); } }
private void initGPU() { // Translate all members with the Cudafy attribute in the given type to CUDA and compile. CudafyModule km = CudafyTranslator.Cudafy(typeof(Population), typeof(UserUpdate), typeof(Fitness), typeof(FitnessParameter), typeof(PredictionPerformances), typeof(Experiment), typeof(SimOptions)); // Get the first CUDA device and load the module generated above. gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); gpu.LoadModule(km); // Allocate the memory on the GPU of same size as specified arrays dev_fitnesses = gpu.Allocate <float>(fs); dev_fitnessParams = gpu.Allocate <FitnessParameter>(options.NumberOfIndividuals); dev_groundTruth = gpu.CopyToDevice(researchData.GroundTruth); dev_userTrust = gpu.CopyToDevice(researchData.UserTrusts); dev_updates = gpu.CopyToDevice(researchData.Updates); //FitnessData dev_fitnessData = gpu.CopyToDevice(fitnessData); }
public static void Execute() { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy(); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // Create some arrays on the host int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = i; b[i] = 2 * i; } // copy the arrays 'a' and 'b' to the GPU int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); // Launch 128 blocks of 128 threads each gpu.Launch(128, 128).add(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // verify that the GPU did the work we requested bool success = true; for (int i = 0; i < N; i++) { if ((a[i] + b[i]) != c[i]) { Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]); success = false; break; } } if (success) { Console.WriteLine("We did it!"); } // free the memory allocated on the GPU gpu.FreeAll(); }
public static void Basics() { CudafyModule cm = CudafyTranslator.Cudafy(CudafyModes.Architecture); Console.WriteLine(cm.CompilerOutput); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(cm); int i, total; RandStateXORWOW[] devStates = gpu.Allocate <RandStateXORWOW>(64 * 64); int[] devResults = gpu.Allocate <int>(64 * 64); int[] hostResults = new int[64 * 64]; gpu.Set(devResults); #if !NET35 gpu.Launch(64, 64).setup_kernel(devStates); for (i = 0; i < 10; i++) { gpu.Launch(64, 64).generate_kernel(devStates, devResults); } #else gpu.Launch(64, 64, "setup_kernel", devStates); for (i = 0; i < 10; i++) { gpu.Launch(64, 64, "generate_kernel", devStates, devResults); } #endif gpu.CopyFromDevice(devResults, hostResults); total = 0; for (i = 0; i < 64 * 64; i++) { total += hostResults[i]; } Console.WriteLine("Fraction with low bit set was {0}", (float)total / (64.0f * 64.0f * 100000.0f * 10.0f)); gpu.FreeAll(); }
public void Initialize(int bytes) { CudafyModule km = CudafyTranslator.Cudafy(); _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); _gpu.LoadModule(km); _dev_bitmap = _gpu.Allocate<byte>(bytes); _blocks = new dim3(DIM / 16, DIM / 16); _threads = new dim3(16, 16); }
public void SetUp() { _gpu = CudafyHost.GetDevice(); _sparse = GPGPUSPARSE.Create(_gpu); _hiVectorX = new float[N]; _hiVectorY = new float[N]; _hoVectorY = new float[N]; FillBufferSparse(_hiVectorX, out NNZ); FillBuffer(_hiVectorY); _hiIndicesX = new int[NNZ]; _hoValsX = new float[NNZ]; _hiValsX = new float[NNZ]; GetSparseIndex(_hiVectorX, _hiValsX, _hiIndicesX); _diValsX = _gpu.Allocate(_hiValsX); _diIndicesX = _gpu.Allocate(_hiIndicesX); _diVectorY = _gpu.Allocate(_hiVectorY); }
public static int MA(int[] A, int[] B, int[] C, int Size, int Size1d, GPGPU gpu, int max_threadsPerBlock) { // allocate the memory on the GPU int[] GPU_A = gpu.Allocate<int>(A); int[] GPU_B = gpu.Allocate<int>(B); int[] GPU_C = gpu.Allocate<int>(C); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(A, GPU_A); gpu.CopyToDevice(B, GPU_B); int threadsPerBlock = 0; int blocksPerGrid = 0; if (Size1d < max_threadsPerBlock) { threadsPerBlock = Size1d; blocksPerGrid = 1; } else { threadsPerBlock = max_threadsPerBlock; blocksPerGrid = (Size1d / max_threadsPerBlock) + 1; } // launch GPU_MA gpu.Launch(threadsPerBlock, blocksPerGrid).GPU_MA(GPU_A, GPU_B, GPU_C, Size, Size1d); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(GPU_C, C); gpu.Free(GPU_A); gpu.Free(GPU_B); gpu.Free(GPU_C); return 1; }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion)); Console.WriteLine(km.CompilerOutput); _gpu.LoadModule(km); int[] data = new int[64]; int[] data_d = _gpu.CopyToDevice(data); int[] res_d = _gpu.Allocate(data); int[] res = new int[64]; _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d); _gpu.CopyFromDevice(data_d, res); for(int i = 0; i < 64; i++) if (data[i] != res[i]) { Console.WriteLine("Failed"); break; } }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(SIMDFunctions)); //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions)); _gpu.LoadModule(km); int w = 1024; int h = 1024; for (int loop = 0; loop < 3; loop++) { uint[] a = new uint[w * h]; Fill(a); uint[] dev_a = _gpu.CopyToDevice(a); uint[] b = new uint[w * h]; Fill(b); uint[] dev_b = _gpu.CopyToDevice(b); uint[] c = new uint[w * h]; uint[] dev_c = _gpu.Allocate(c); _gpu.StartTimer(); _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c); _gpu.CopyFromDevice(dev_c, c); float time = _gpu.StopTimer(); Console.WriteLine("Time: {0}", time); if (loop == 0) { bool passed = true; GThread thread = new GThread(1, 1, null); for (int i = 0; i < w * h; i++) { uint exp = thread.vadd2(a[i], b[i]); if (exp != c[i]) passed = false; } Console.WriteLine("Test {0}", passed ? "passed. " : "failed!"); } _gpu.FreeAll(); } }
public static void Example1(GPGPU gpu, int threads) { double[] a = new double[threads]; double[] b = new double[threads]; Random r = new Random(); for (int i = 0; i < threads; i++) { a[i] = r.NextDouble(); b[i] = r.NextDouble(); } double[] gpuarr1 = gpu.CopyToDevice(a); double[] gpuarr2 = gpu.CopyToDevice(b); double[] result = new double[threads]; var gpuresult = gpu.Allocate<double>(result); gpu.Launch(threads, 1).Test2(gpuarr1, gpuarr2, gpuresult); gpu.CopyFromDevice(gpuresult, result); gpu.Free(gpuarr1); gpu.Free(gpuarr2); gpu.Free(gpuresult); }
private void meanToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length;ix++ ) if(Data.columnChoosen[ix]!=-1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); judul = "SLR"; if (dr == DialogResult.OK) { if (columnChoosen.Count == 2) { int columny = Data.columnChoosen[0]; int columnx = Data.columnChoosen[1]; try { CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int Ny = jumlahthread(columny); int Nx = jumlahthread(columnx); int N = new int(); if (Ny > Nx) N = Ny; else N = Nx; float[] ay = new float[Ny]; float[] by = new float[Ny]; float[] ax = new float[Nx]; float[] bx = new float[Nx]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahDatay = jumlahdata(columny,Ny); int jumlahDatax = jumlahdata(columnx,Nx); ay = InitData(1, columny, Ny, ay, by); by = InitData(2, columny, Ny, ay, by); ax = InitData(1, columnx, Nx, ax, bx); bx = InitData(2, columnx, Nx, ax, bx); float temp,temp2; int missingCounty = 0; int missingCountx = 0; for (int b = 0; b < Data.variableView[columny].missing.Count; b++) { for (int a = 0; a < Ny; a++) { float.TryParse(Data.variableView[columny].missing[b], out temp); if (ay[a] == temp) { ay[a] = 0; missingCounty++; } } } for (int b = 0; b < Data.variableView[columnx].missing.Count; b++) { for (int a = 0; a < Nx; a++) { float.TryParse(Data.variableView[columnx].missing[b], out temp); if (ax[a] == temp) { ax[a] = 0; missingCountx++; } } } if (Data.variableView[columny].missingRange.Count > 1) { for (int a = 0; a < Ny; a++) { float.TryParse(Data.variableView[columny].missingRange[0], out temp); float.TryParse(Data.variableView[columny].missingRange[1], out temp2); if (ay[a] >= temp && ay[a] <= temp2) { ay[a] = 0; missingCounty++; } } } if (Data.variableView[columnx].missingRange.Count > 1) { for (int a = 0; a < Nx; a++) { float.TryParse(Data.variableView[columnx].missingRange[0], out temp); float.TryParse(Data.variableView[columnx].missingRange[1], out temp2); if (ax[a] >= temp && ax[a] <= temp2) { ax[a] = 0; missingCounty++; } } } Debug.WriteLine("y : " + missingCounty + "/nx : " + missingCountx); float[] dev_a = _gpu.CopyToDevice(ay); float[] dev_b = _gpu.CopyToDevice(ax); float[] dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N); float[] save1 = new float[N]; _gpu.CopyFromDevice(dev_c, save1); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(by); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, N); float[] save2 = new float[N]; _gpu.CopyFromDevice(dev_c, save2); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(save1); dev_b = _gpu.CopyToDevice(save2); dev_c = _gpu.Allocate<float>(c); float sumxy = jumlahan(N,dev_a,dev_b,dev_c,c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); //results.Add(mean); c = new float[Nx]; dev_a = _gpu.CopyToDevice(ax); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); float sumx = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); c = new float[Ny]; dev_a = _gpu.CopyToDevice(ay); dev_b = _gpu.CopyToDevice(by); dev_c = _gpu.Allocate<float>(c); float sumy = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); c = new float[N]; dev_a = _gpu.CopyToDevice(ax); dev_b = _gpu.CopyToDevice(ax); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx); save1 = new float[N]; _gpu.CopyFromDevice(dev_c, save1); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(bx); dev_b = _gpu.CopyToDevice(bx); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N + 127) / 128, 128).multiplyVector(dev_a, dev_b, dev_c, Nx); save2 = new float[N]; _gpu.CopyFromDevice(dev_c, save2); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); dev_a = _gpu.CopyToDevice(save1); dev_b = _gpu.CopyToDevice(save2); dev_c = _gpu.Allocate<float>(c); float sumxquad = jumlahan(Nx, dev_a, dev_b, dev_c, c); //_gpu.Free(dev_a); //_gpu.Free(dev_b); //_gpu.Free(dev_c); _gpu.FreeAll(); float jumlahData = new float(); if (jumlahDatax>jumlahDatay) jumlahData = jumlahDatax; else jumlahData = jumlahDatay; float beta = ((jumlahData * sumxy) - (sumx * sumy)) / ((jumlahData * sumxquad) - (sumx * sumx)); ab[0] = beta.ToString(); float alpha = (sumy / (jumlahDatay - missingCounty)) - beta * (sumx / jumlahDatax - missingCountx); ab[1] = alpha.ToString(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new ResultSLR(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }
private void varianceToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length; ix++) if (Data.columnChoosen[ix] != -1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); judul = "Variance"; if (dr == DialogResult.OK) { for (int index = 0; index < Data.columnChoosen.Length; index++) if (Data.columnChoosen[index] != -1) { int column = Data.columnChoosen[index]; try { CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = jumlahthread(column); float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = jumlahdata(column,N); a = InitData(1, column, N,a,b); b = InitData(2, column, N,a,b); float temp, temp2; int missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); int N1 = N; float hasil = jumlahan(N, dev_a, dev_b, dev_c, c); float mean = (hasil / (jumlahData - missingCount)); _gpu.FreeAll(); c = new float[N1]; dev_a = _gpu.CopyToDevice(a); dev_b = _gpu.CopyToDevice(b); dev_c = _gpu.Allocate<float>(c); _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_a, dev_c, mean, N1); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_c); float[] d = new float[N]; dev_c = _gpu.Allocate<float>(d); _gpu.Launch((N1 + 127) / 128, 128).powerVector(dev_b, dev_c, mean, N1); _gpu.CopyFromDevice(dev_c, d); _gpu.Free(dev_b); _gpu.Free(dev_c); _gpu.FreeAll(); if (jumlahData%2 != 0) { d[N1-1] = 0; } float[] f = new float[N1]; hasil = new float(); dev_a = _gpu.CopyToDevice(c); dev_b = _gpu.CopyToDevice(d); dev_c = _gpu.Allocate<float>(c); hasil = jumlahan(N, dev_a, dev_b, dev_c, c); float variance = (hasil / (jumlahData - missingCount-1)); results.Add(variance); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new ResultForm(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }
public void computeStdv(int column) { try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; //karena a dan b diisi "data" berdasar ganjil genap float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU jumlahData = 0; for (int i = 0; i < N; i++) //ini buat membagi data ke a b { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out a[i]); jumlahData++; } if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "") { float.TryParse(sheet[i + N, column].ToString(), out b[i]); jumlahData++; } } float temp, temp2; missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); /* float meanSequential = 0; for (int i = 0; i < N; i++) meanSequential += a[i] + b[i]; meanSequential = meanSequential / (jumlahData - missingCount); ; */ float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } float[] mean = new float[1]; mean[0] = (c[0] + c[1]) / (jumlahData - missingCount); float[] dev_mean = _gpu.CopyToDevice(mean); // float[] data2 = new float[jumlahData]; // float[] dev_data2 = _gpu.Allocate<float>(data2); float[] temp4 = new float[jumlahData]; float[] dev_temp4 = _gpu.Allocate<float>(temp4); float[] data = new float[jumlahData]; for (int i = 0; i < jumlahData; i++) //parse semua data ke array data { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out data[i]); } } float[] dev_data = _gpu.CopyToDevice(data); float[] x = new float[jumlahData+1]; float[] dev_x = _gpu.Allocate<float>(x); _gpu.Launch((jumlahData + 127) / 128, 128).minusMean(dev_mean, dev_data, dev_x, jumlahData, dev_temp4); _gpu.CopyFromDevice(dev_x, x); _gpu.Free(dev_mean); _gpu.Free(dev_data); _gpu.Free(dev_x); _gpu.Free(dev_temp4); // fill the arrays 'a' and 'b' on the CPU N = ((jumlahData+1) / 2); float[] isi1 = new float[N]; float[] isi2 = new float[N]; for (int i = 0; i < N; i++) //ini buat membagi data ke a b { if (x[i] != null && x.ToString() != "") { float.TryParse(x[i].ToString(), out isi1[i]); } if (x[i + N] != null && x[i + N].ToString() != "") { float.TryParse(x[i + N].ToString(), out isi2[i]); } } float[] isic = new float[N]; float[] dev_isi1 = _gpu.CopyToDevice(isi1); float[] dev_isi2 = _gpu.CopyToDevice(isi2); float[] dev_isic = _gpu.Allocate<float>(isic); bool first1 = true; while (N > 1) { if (!first1) { isi1 = new float[N]; isi2 = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (isic.Count() - N); i++) baru[i] = isic[N + i]; dev_isi1 = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_isi2 = _gpu.CopyToDevice(baru); c = new float[N]; dev_isic = _gpu.Allocate<float>(isic); } float[] isid = new float[N]; _gpu.CopyFromDevice(dev_isi1, isid); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_isi1, dev_isi2, dev_isic, N); _gpu.CopyFromDevice(dev_isic, isic); _gpu.Free(dev_isi1); _gpu.Free(dev_isi2); _gpu.Free(dev_isic); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } float temp3 = (float)Math.Sqrt((isic[0] + isic[1]) / (jumlahData - 1)); // Debug.WriteLine("mean-nya adalah " + (c[0] + c[1])); Debug.WriteLine("STDV = " + ((isic[0] + isic[1]) / (jumlahData - 1))); results.Add(temp3); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } }
private void meanPararelToolStripMenuItem_Click(object sender, EventArgs e) { DialogResult dr = new DialogResult(); Form dlg1 = new AnalyzeForm(); dr = dlg1.ShowDialog(); for (int ix = 0; ix < Data.columnChoosen.Length; ix++) if (Data.columnChoosen[ix] != -1) columnChoosen.Add(Data.variableView[Data.columnChoosen[ix]].nama); if (dr == DialogResult.OK) { for (int index = 0; index < Data.columnChoosen.Length; index++) if (Data.columnChoosen[index] != -1) { int column = Data.columnChoosen[index]; try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = 0; for (int i = 0; i < N; i++) { if (sheet[i, column] != null && sheet[i, column].ToString() != "") { float.TryParse(sheet[i, column].ToString(), out a[i]); jumlahData++; } if (sheet[i + N, column] != null && sheet[i + N, column].ToString() != "") { float.TryParse(sheet[i + N, column].ToString(), out b[i]); jumlahData++; } } float temp, temp2; int missingCount = 0; for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missing[bx], out temp); if (a[ax] == temp) { a[ax] = 0; missingCount++; } } } if (Data.variableView[column].missingRange.Count > 1) { for (int ax = 0; ax < N; ax++) { float.TryParse(Data.variableView[column].missingRange[0], out temp); float.TryParse(Data.variableView[column].missingRange[1], out temp2); if (a[ax] >= temp && a[ax] <= temp2) { a[ax] = 0; missingCount++; } } } Debug.WriteLine(missingCount); float meanSequential = 0; for (int i = 0; i < N; i++) meanSequential += a[i] + b[i]; meanSequential = meanSequential / (jumlahData - missingCount); ; float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential); results.Add((c[0] + c[1]) / (jumlahData - missingCount)); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } } DialogResult dialog = new DialogResult(); Form dialogResult = new FormResultsMean(); dialog = dialogResult.ShowDialog(); // Console.ReadLine(); } else dlg1.Close(); }
public static void Example2(GPGPU gpu) { ArrayView view1 = new ArrayView(); ArrayView view2 = new ArrayView(); float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray(); // Two views of the array, simply applying an offset to the array; could slice instead for example. view1.CreateView(data, 100); view2.CreateView(data, 200); for (int i = 0; i < 1000; ++i) data[i] = data[i] * 10f; // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance. var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1); var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2); var dev_result = gpu.Allocate<float>(5); var hostResult = new float[5]; gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result); gpu.CopyFromDevice(dev_result, hostResult); bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f); Console.WriteLine(pass ? "Pass" : "Fail"); }
public float computeSum2(float[] array) { try { // This 'smart' method will Cudafy all members with the Cudafy attribute in the calling type (i.e. Program) CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_20); // If cudafying will not work for you (CUDA SDK + VS not set up right) then comment out above and // uncomment below. Remember to also comment out the Structs and 3D arrays region below. // CUDA 5.5 SDK must be installed and cl.exe (VC++ compiler) must be in path. //CudafyModule km = CudafyModule.Deserialize(typeof(Program).Name); //var options = NvccCompilerOptions.Createx64(eArchitecture.sm_12); //km.CompilerOptionsList.Add(options); _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpu.LoadModule(km); GPGPUProperties gpprop = _gpu.GetDeviceProperties(false); var sheet = reoGridControl2.CurrentWorksheet; // Get the first CUDA device and load our module int N = sheet.RowCount / 2; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; // fill the arrays 'a' and 'b' on the CPU int jumlahData = 0; for (int i = 0; i < N; i++) { if (array[i] != null && array[i].ToString() != "") { a[i] = array[i]; jumlahData++; } if (array[i + N] != null && array[i + N].ToString() != "") { b[i] = array[i + N]; jumlahData++; } } // float temp, temp2; // int missingCount = 0; //for (int bx = 0; bx < Data.variableView[column].missing.Count; bx++) //{ // for (int ax = 0; ax < N; ax++) // { // float.TryParse(Data.variableView[column].missing[bx], out temp); // if (a[ax] == temp) // { // a[ax] = 0; // missingCount++; // } // } //} //if (Data.variableView[column].missingRange.Count > 1) //{ // for (int ax = 0; ax < N; ax++) // { // float.TryParse(Data.variableView[column].missingRange[0], out temp); // float.TryParse(Data.variableView[column].missingRange[1], out temp2); // if (a[ax] >= temp && a[ax] <= temp2) // { // a[ax] = 0; // missingCount++; // } // } // } // Debug.WriteLine(missingCount); //float meanSequential = 0; //for (int i = 0; i < N; i++) // meanSequential += a[i] + b[i]; //meanSequential = meanSequential / (jumlahData - missingCount); ; float[] dev_a = _gpu.CopyToDevice(a); float[] dev_b = _gpu.CopyToDevice(b); float[] dev_c = _gpu.Allocate<float>(c); bool first = true; int N_awal = N; while (N > 1) { if (!first) { a = new float[N]; b = new float[N]; // c = new int[N]; float[] baru = new float[N]; for (int i = 0; i < (c.Count() - N); i++) baru[i] = c[N + i]; dev_a = _gpu.CopyToDevice(c.Take(N).ToArray()); dev_b = _gpu.CopyToDevice(baru); c = new float[N]; dev_c = _gpu.Allocate<float>(c); } float[] d = new float[N]; _gpu.CopyFromDevice(dev_a, d); // _gpu.Launch(N, 1).addVector(dev_a, dev_b, dev_c, N); _gpu.Launch((N + 127) / 128, 128).addVector(dev_a, dev_b, dev_c, N); _gpu.CopyFromDevice(dev_c, c); _gpu.Free(dev_a); _gpu.Free(dev_b); _gpu.Free(dev_c); if (N % 2 == 0) N = N / 2; else N = (N + 1) / 2; first = false; } // Debug.WriteLine("mean-nya adalah " + (c[0] + c[1]) / (jumlahData - missingCount) + " mean dari sequensial adalah " + meanSequential); // results.Add((c[0] + c[1]) / (jumlahData - missingCount)); //for (int i = 0; i < N; i++) // Debug.Assert(a[i] + b[i] == c[i]); _gpu.FreeAll(); //return (c[0] + c[1]) / (jumlahData - missingCount); return (c[0] + c[1]); } catch (CudafyLanguageException cle) { } catch (CudafyCompileException cce) { } catch (CudafyHostException che) { Console.Write(che.Message); } return 0; }
public void Test_TwoThreadTwoGPUVer2() { eArchitecture arch = CudafyModes.Target == eGPUType.OpenCL ? eArchitecture.OpenCL : eArchitecture.sm_11; _gpu0 = CudafyHost.GetDevice(CudafyModes.Target, 0); var cm = CudafyTranslator.Cudafy(arch, typeof(MultiGPUTests)); _gpu0.SetCurrentContext(); _gpu0.LoadModule(cm); _gpuuintBufferIn0 = _gpu0.Allocate(_uintBufferIn0); _gpu1 = CudafyHost.GetDevice(CudafyModes.Target, 1); // Cannot load same module to two devices, therefore need to clone. var cm1 = cm.Clone(); _gpu1.SetCurrentContext(); _gpu1.LoadModule(cm1); _gpuuintBufferIn1 = _gpu1.Allocate(_uintBufferIn1); _gpu0.EnableMultithreading(); _gpu1.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0V2); Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1V2); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) break; } _gpu0.DisableMultithreading(); _gpu0.FreeAll(); _gpu1.DisableMultithreading(); _gpu1.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }
public void Test_TwoThreadCopy() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1); _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1); _gpu.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); SetInputs(); ClearOutputs(); Thread t1 = new Thread(Test_TwoThreadCopy_Thread1); Thread t2 = new Thread(Test_TwoThreadCopy_Thread2); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) break; } _gpu.DisableMultithreading(); _gpu.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }