public static void MyFirstBlasEmulatorTest() { Console.WriteLine("MyTest()"); // Get GPU device CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); // Create GPGPUBLAS (CUBLAS Wrapper) using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu)) { const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); gpu.CopyFromDevice <float>(device_c, c); } }
public static float[] CallGPU() { CudafyModes.Target = eGPUType.OpenCL; CudafyModes.DeviceId = 0; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU)); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0); gpu.LoadModule(km); km.Serialize(); float[] input = Utils.GenerateRandomVector(); float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension(); float[] output = new float[Utils.N]; Stopwatch gpuSW = new Stopwatch(); gpuSW.Start(); float[] dev_output = gpu.Allocate <float>(output); float[] dev_input = gpu.CopyToDevice(input); float[,,] dev_NN = gpu.CopyToDevice(NN); gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output); gpu.CopyFromDevice(dev_output, output); gpu.FreeAll(); gpuSW.Stop(); Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds); return(output); }
// // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing // private static void BlasSample(int deviceId) { CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId); CudafyModes.DeviceId = deviceId; eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); throw new NotImplementedException(); }
public static void Main() { Console.WriteLine("CUDAfy Example\nCollecting necessary resources..."); CudafyModes.Target = eGPUType.Cuda; // To use OpenCL, change this enum CudafyModes.DeviceId = 0; CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda; //Check for available devices if (CudafyHost.GetDeviceCount(CudafyModes.Target) == 0) { throw new System.ArgumentException("No suitable devices found.", "original"); } //Init device var gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); Console.WriteLine("Running example using {0}", gpu.GetDeviceProperties(false).Name); //Load module for GPU var km = CudafyTranslator.Cudafy(); gpu.LoadModule(km); //Define local arrays var a = new int[N]; var b = new int[N]; var c = new int[N]; // allocate the memory on the GPU var dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (var i = 0; i < N; i++) { a[i] = i; b[i] = i * i; } // copy the arrays 'a' and 'b' to the GPU var dev_a = gpu.CopyToDevice(a); var dev_b = gpu.CopyToDevice(b); gpu.Launch(1, N).add(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // display the results for (var i = 0; i < N; i++) { Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]); } // free the memory allocated on the GPU gpu.FreeAll(); Console.WriteLine("Done!"); Console.ReadKey(); }
public void SetUp() { //CudafyModes.Architecture = eArchitecture.sm_30; _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId); Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported."); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc, 0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values d_inputIntArray = _gpu.CopyToDevice(inputIntArray); d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE); gpuIntResult = new int[WARP_SIZE]; cpuIntResult = new int[WARP_SIZE]; inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f, 1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f, 7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f, 377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray); d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE); gpuFloatResult = new float[WARP_SIZE]; cpuFloatResult = new float[WARP_SIZE]; }
public void SetUp() { _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId); _byteBufferIn = new byte[N]; _byteBufferOut = new byte[N]; _sbyteBufferIn = new sbyte[N]; _sbyteBufferOut = new sbyte[N]; _ushortBufferIn = new ushort[N]; _ushortBufferOut = new ushort[N]; _uintBufferIn = new uint[N]; _uintBufferOut = new uint[N]; _ulongBufferIn = new ulong[N]; _ulongBufferOut = new ulong[N]; _cplxDBufferIn = new ComplexD[N]; _cplxDBufferOut = new ComplexD[N]; _cplxFBufferIn = new ComplexF[N]; _cplxFBufferOut = new ComplexF[N]; SetInputs(); ClearOutputsAndGPU(); }
public static void Execute() { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric)); km.Serialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); gpu.LoadModule(km); var input = new Generic <ushort, ushort>(); input.A = 187; int[] devoutput = gpu.Allocate <int>(1); gpu.Launch(1, 1, "Kernel", input, devoutput); int output; gpu.CopyFromDevice(devoutput, out output); Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED")); }
public void InitGPU() { // Work around for bug in Cudafy trying to find the path.. var os64Bit = Environment.Is64BitOperatingSystem; if (os64Bit) { var dir = Environment.GetEnvironmentVariable("ProgramFiles"); Environment.SetEnvironmentVariable("ProgramFiles", "C:\\Program Files\\"); dir = Environment.GetEnvironmentVariable("ProgramFiles"); } if (Gpu == null) { Gpu = CudafyHost.GetDevice(_gpuType, 0); //Blas = GPGPUBLAS.Create(Gpu); if (_gpuType == eGPUType.Cuda) { Blas = new SharpBLAS(Gpu); Rand = GPGPURAND.Create(Gpu, curandRngType.CURAND_RNG_PSEUDO_DEFAULT); Rand.SetPseudoRandomGeneratorSeed((ulong)RandomHelpers.Next(9999)); } CudafyTranslator.GenerateDebug = true; Debug.WriteLine("CUDA workdir = " + CudafyTranslator.WorkingDirectory); Console.WriteLine("Recompile module"); CudafyTranslator.Language = eLanguage.Cuda; var km = CudafyTranslator.Cudafy(eArchitecture.sm_30); km = CudafyTranslator.Cudafy(); km.Serialize(); Gpu.LoadModule(km); } }
/// <summary> /// Вызов и исполнение одной элементарной функции по имени функции /// </summary> /// <param name="function"></param> public static void Execute(string function) { Debug.Assert(_indexes1.Last() == _sequencies1.Length); Debug.Assert(_indexes2.Last() == _sequencies2.Length); CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); // copy the arrays 'a' and 'b' to the GPU int[] devIndexes1 = gpu.CopyToDevice(_indexes1); int[] devIndexes2 = gpu.CopyToDevice(_indexes2); int[] devSequencies1 = gpu.CopyToDevice(_sequencies1); int[] devSequencies2 = gpu.CopyToDevice(_sequencies2); int[,] devMatrix = gpu.Allocate(_matrix); int rows = _matrix.GetLength(0); int columns = _matrix.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, function, devSequencies1, devIndexes1, devSequencies2, devIndexes2, devMatrix); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devMatrix, _matrix); // free the memory allocated on the GPU gpu.FreeAll(); }
/// <summary> /// Вызов и исполнение функции проверки что массив отсортирован /// </summary> public static void ExecuteSorted(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(D); gpu.CopyToDevice(_a, devA); gpu.Launch(1, 1).Split(devA, devB, devC, _middle); gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction); gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction); gpu.CopyFromDevice(devD, D); // free the memory allocated on the GPU gpu.FreeAll(); }
public void Test_TwoThreadTwoGPU() { _gpu0 = CudafyHost.CreateDevice(CudafyModes.Target, 0); _gpu1 = CudafyHost.CreateDevice(CudafyModes.Target, 1); _gpu0.EnableMultithreading(); _gpu1.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0); Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) { break; } } _gpu0.DisableMultithreading(); _gpu0.FreeAll(); _gpu1.DisableMultithreading(); _gpu1.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _hostInput = new double[N * BATCH]; _hostInputCplx = new ComplexD[N * BATCH]; _hostOutput = new double[N * BATCH]; _hostOutputCplx = new ComplexD[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate <double>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate <ComplexD>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); _fft = GPGPUFFT.Create(_gpu); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexD cf = new ComplexD(); cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
internal TensorOpGpu() { //CudafyTranslator.GenerateDebug = true; Module = CudafyTranslator.Cudafy(); Gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); Gpu.LoadModule(Module); }
internal static AnswerStruct GetAnswer() { using (var gpu = CudafyHost.GetDevice()) { gpu.LoadModule(CudafyTranslator.Cudafy()); var answer = new AnswerStruct[BlocksPerGrid];; var gpuAnswer = gpu.Allocate(answer); gpu.Launch(BlocksPerGrid, ThreadsPerBlock, GpuFindPathDistance, gpuAnswer); gpu.Synchronize(); gpu.CopyFromDevice(gpuAnswer, answer); gpu.FreeAll(); var bestDistance = float.MaxValue; var bestPermutation = 0L; for (var i = 0; i < BlocksPerGrid; i++) { if (answer[i].distance < bestDistance) { bestDistance = answer[i].distance; bestPermutation = answer[i].pathNo; } } return(new AnswerStruct { distance = bestDistance, pathNo = bestPermutation }); } }
public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); int cols = dm.ColumnCount, rows = dm.RowCount; int restRows = rows - cols; //double[] a = dm.Storage.ToColumnMajorArray(); double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray(); double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray(); dm = null; double[] a_d = gpu.CopyToDevice <double>(a); a = null; double[] c_d = gpu.Allocate <double>(cols * cols); double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * rows]; gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols); gpu.FreeAll(); a_d = gpu.CopyToDevice <double>(b); b = null; c_d = gpu.Allocate <double>(restRows * cols); x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a); }
public static void Execute() { int i = 0; foreach (GPGPUProperties prop in CudafyHost.GetDeviceProperties(CudafyModes.Target, false)) { Console.WriteLine(" --- General Information for device {0} ---", i); Console.WriteLine("Name: {0}", prop.Name); Console.WriteLine("Platform Name: {0}", prop.PlatformName); Console.WriteLine("Device Id: {0}", prop.DeviceId); Console.WriteLine("Compute capability: {0}.{1}", prop.Capability.Major, prop.Capability.Minor); Console.WriteLine("Clock rate: {0}", prop.ClockRate); Console.WriteLine("Simulated: {0}", prop.IsSimulated); Console.WriteLine(); Console.WriteLine(" --- Memory Information for device {0} ---", i); Console.WriteLine("Total global mem: {0}", prop.TotalMemory); Console.WriteLine("Total constant Mem: {0}", prop.TotalConstantMemory); Console.WriteLine("Max mem pitch: {0}", prop.MemoryPitch); Console.WriteLine("Texture Alignment: {0}", prop.TextureAlignment); Console.WriteLine(); Console.WriteLine(" --- MP Information for device {0} ---", i); Console.WriteLine("Shared mem per mp: {0}", prop.SharedMemoryPerBlock); Console.WriteLine("Registers per mp: {0}", prop.RegistersPerBlock); Console.WriteLine("Threads in warp: {0}", prop.WarpSize); Console.WriteLine("Max threads per block: {0}", prop.MaxThreadsPerBlock); Console.WriteLine("Max thread dimensions: ({0}, {1}, {2})", prop.MaxThreadsSize.x, prop.MaxThreadsSize.y, prop.MaxThreadsSize.z); Console.WriteLine("Max grid dimensions: ({0}, {1}, {2})", prop.MaxGridSize.x, prop.MaxGridSize.y, prop.MaxGridSize.z); Console.WriteLine(); i++; } }
static void Main(string[] args) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int numFrames = numberOfSeconds * framesPerSecond; InitializeParticles(); File.WriteAllText("length.txt", numFrames.ToString()); for (int i = 0; i < numFrames; i++) { DateTime frameStart = DateTime.Now; Simulate(gpu); Bitmap frame = Render(gpu, i); TimeSpan frameTime = DateTime.Now - frameStart; Console.WriteLine("Frame " + i + " complete. Time: " + frameTime.TotalMilliseconds + "ms"); } }
public static bool TestGpuDoublePrecision(int DeviceId) { if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL)) { return(false); } try { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); gpu.LoadModule(km); double c; double[] dev_c = gpu.Allocate <double>(); gpu.Launch().add_double(2.5d, 7.5d, dev_c); gpu.CopyFromDevice(dev_c, out c); gpu.Free(dev_c); return(c == 10.0d); } catch { return(false); } }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); Console.WriteLine(_gpu.GetDriverVersion()); _fft = GPGPUFFT.Create(_gpu); _hostInput = new float[N * BATCH]; _hostInputCplx = new ComplexF[N * BATCH]; _hostOutput = new float[N * BATCH]; _hostOutputCplx = new ComplexF[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate <float>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate <ComplexF>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); Console.WriteLine(_fft.GetVersion()); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexF cf = new ComplexF(); cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
/// <summary> /// Выполнение сортировки слияниями /// Пример использования: /// CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray); /// CudafySequencies.Execute("Compare"); /// var compare = CudafySequencies.GetMartix(); /// CudafyArray.SetArray(Enumerable.Range(0,n).ToArray()); /// CudafyArray.SetCompare(compare); /// CudafyArray.MergeSort(); /// var indexesOfSorted = CudafyArray.GetArray(); /// </summary> public static void MergeSort(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); gpu.CopyToDevice(_a, devA); for (int i = 0; i < _ceiling; i++) { int gridSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); gpu.Launch(gridSize, blockSize) .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0, _length, direction); } gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void primaGPU() { CudafyModule modul_kernel = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(modul_kernel); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] list_cpu = new int[KONSTANTA_THREAD]; int[] list_cpy = new int[KONSTANTA_THREAD]; int[] list = vga.Allocate <int>(KONSTANTA_THREAD); vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list); vga.CopyFromDevice(list, list_cpy); vga.FreeAll(); int index = 0; for (int z = 0; z < list_cpy.Length; z++) { if (list_cpy[z] != -1) { list_cpu[index] = list_cpy[z]; //Console.WriteLine(list_cpu[index]); index++; } } waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Seconds.ToString(); Console.WriteLine("Total GPU ------ {0} detik> ", total); }
public static void eksekusi() { CudafyModule kernel_modul = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(kernel_modul); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] array_vga = vga.Allocate <int>(KONSTANTA_THREAD); int[] array_hasil = new int[KONSTANTA_THREAD]; //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD); //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD]; //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD]; vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga); vga.CopyFromDevice(array_vga, array_hasil); vga.FreeAll(); //for(int z = 0; z < array_hasil.Length; z++) //{ // Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]); //} vga.FreeAll(); waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Milliseconds.ToString(); Console.WriteLine("Total VGA ------ > " + total); }
public void Test_TwoThreadCopy() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1); _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1); _gpu.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); SetInputs(); ClearOutputs(); Thread t1 = new Thread(Test_TwoThreadCopy_Thread1); Thread t2 = new Thread(Test_TwoThreadCopy_Thread2); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) { break; } } _gpu.DisableMultithreading(); _gpu.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate <float>(1001); _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
public static uint[] Evaluate(ulong[] hands, int numCards) { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int blockSize = 256; int blockx = hands.Length / blockSize; if (hands.Length % blockSize != 0) { blockx++; } ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length); uint[] dev_ranks = gpu.Allocate <uint>(hands.Length); gpu.CopyToDevice(hands, dev_hands); gpu.StartTimer(); gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks); var ts = gpu.StopTimer(); uint[] toReturn = new uint[hands.Length]; gpu.CopyFromDevice(dev_ranks, toReturn); return(toReturn); }
public void SetUp() { _gpu = CudafyHost.GetDevice(); _sparse = GPGPUSPARSE.Create(_gpu); _blas = GPGPUBLAS.Create(_gpu); _solver = new Solver(_gpu, _blas, _sparse); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target, CudafyModes.DeviceId); _uintBufferIn1 = new uint[N]; _uintBufferOut1 = new uint[N]; _uintBufferIn2 = new uint[N]; _uintBufferOut2 = new uint[N]; }
private void PrintOutAviableDevices() { Console.WriteLine("Printing out avaiable devices..."); Console.WriteLine("For now this code will work only with Cuda devs..."); var numberOfCudaDevices = CudafyHost.GetDeviceCount(eGPUType.Cuda); var numberOfOpenClDevices = CudafyHost.GetDeviceCount(eGPUType.OpenCL); var numberOfEmulatorDevices = CudafyHost.GetDeviceCount(eGPUType.Emulator); Console.WriteLine("{0} devices of type Cuda found", numberOfCudaDevices); Console.WriteLine("{0} devices of type OpenCl found", numberOfOpenClDevices); Console.WriteLine("{0} devices of type Emulator found", numberOfEmulatorDevices); Console.WriteLine("Attempting to print out detailed info about Cuda devices.."); var cudaDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.Cuda); if (cudaDevicesProperties.Count() != numberOfCudaDevices) { Console.WriteLine("Something is terribly off! Number of cuda devices differ from received properites"); } foreach (var cudaDeviceProperties in cudaDevicesProperties) { Console.WriteLine(@"---"); PrintOutObjectPublicProperties(cudaDeviceProperties); Console.WriteLine(@"---"); } Console.WriteLine("Attempting to print out detailed info about openCl devices.."); var openClDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.OpenCL); if (openClDevicesProperties.Count() != numberOfOpenClDevices) { Console.WriteLine("Something is terribly off! Number of openCl devices differ from received properites"); } foreach (var openClDeviceProperties in openClDevicesProperties) { Console.WriteLine(@"---"); PrintOutObjectPublicProperties(openClDeviceProperties); Console.WriteLine(@"---"); } Console.WriteLine("Attempting to print out detailed info about emulator devices.."); var emulatorDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.Emulator); if (emulatorDevicesProperties.Count() != numberOfEmulatorDevices) { Console.WriteLine("Something is terribly off! Number of emulator devices differ from received properites"); } foreach (var emulatorDeviceProperties in emulatorDevicesProperties) { Console.WriteLine(@"---"); PrintOutObjectPublicProperties(emulatorDeviceProperties); Console.WriteLine(@"---"); } }
public void Initialize(int DeviceId) { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); Gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); Gpu.LoadModule(km); }