/// <summary> /// Вызов и исполнение одной элементарной функции по имени функции /// </summary> /// <param name="function"></param> public static void Execute(string function) { Debug.Assert(_indexes1.Last() == _sequencies1.Length); Debug.Assert(_indexes2.Last() == _sequencies2.Length); CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); // copy the arrays 'a' and 'b' to the GPU int[] devIndexes1 = gpu.CopyToDevice(_indexes1); int[] devIndexes2 = gpu.CopyToDevice(_indexes2); int[] devSequencies1 = gpu.CopyToDevice(_sequencies1); int[] devSequencies2 = gpu.CopyToDevice(_sequencies2); int[,] devMatrix = gpu.Allocate(_matrix); int rows = _matrix.GetLength(0); int columns = _matrix.GetLength(1); dim3 gridSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333)); gpu.Launch(gridSize, blockSize, function, devSequencies1, devIndexes1, devSequencies2, devIndexes2, devMatrix); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devMatrix, _matrix); // free the memory allocated on the GPU gpu.FreeAll(); }
/// <summary> /// Выполнение сортировки слияниями /// Пример использования: /// CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray); /// CudafySequencies.Execute("Compare"); /// var compare = CudafySequencies.GetMartix(); /// CudafyArray.SetArray(Enumerable.Range(0,n).ToArray()); /// CudafyArray.SetCompare(compare); /// CudafyArray.MergeSort(); /// var indexesOfSorted = CudafyArray.GetArray(); /// </summary> public static void MergeSort(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); gpu.CopyToDevice(_a, devA); for (int i = 0; i < _ceiling; i++) { int gridSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333)); gpu.Launch(gridSize, blockSize) .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0, _length, direction); } gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a); // free the memory allocated on the GPU gpu.FreeAll(); }
static void Main(string[] args) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int numFrames = numberOfSeconds * framesPerSecond; InitializeParticles(); File.WriteAllText("length.txt", numFrames.ToString()); for (int i = 0; i < numFrames; i++) { DateTime frameStart = DateTime.Now; Simulate(gpu); Bitmap frame = Render(gpu, i); TimeSpan frameTime = DateTime.Now - frameStart; Console.WriteLine("Frame " + i + " complete. Time: " + frameTime.TotalMilliseconds + "ms"); } }
public static float[] CallGPU() { CudafyModes.Target = eGPUType.OpenCL; CudafyModes.DeviceId = 0; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU)); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0); gpu.LoadModule(km); km.Serialize(); float[] input = Utils.GenerateRandomVector(); float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension(); float[] output = new float[Utils.N]; Stopwatch gpuSW = new Stopwatch(); gpuSW.Start(); float[] dev_output = gpu.Allocate <float>(output); float[] dev_input = gpu.CopyToDevice(input); float[,,] dev_NN = gpu.CopyToDevice(NN); gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output); gpu.CopyFromDevice(dev_output, output); gpu.FreeAll(); gpuSW.Stop(); Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds); return(output); }
public void SetUp() { //CudafyModes.Architecture = eArchitecture.sm_30; _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId); Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported."); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc, 0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb}; // arbitrary values d_inputIntArray = _gpu.CopyToDevice(inputIntArray); d_outputIntArray = _gpu.Allocate<int>(WARP_SIZE); gpuIntResult = new int[WARP_SIZE]; cpuIntResult = new int[WARP_SIZE]; inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f, 1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f, 7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f, 377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f}; // arbitrary values d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray); d_outputFloatArray = _gpu.Allocate<float>(WARP_SIZE); gpuFloatResult = new float[WARP_SIZE]; cpuFloatResult = new float[WARP_SIZE]; }
// // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing // private static void BlasSample(int deviceId) { CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId); CudafyModes.DeviceId = deviceId; eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); throw new NotImplementedException(); }
public static uint[] Evaluate(ulong[] hands, int numCards) { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int blockSize = 256; int blockx = hands.Length / blockSize; if (hands.Length % blockSize != 0) { blockx++; } ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length); uint[] dev_ranks = gpu.Allocate <uint>(hands.Length); gpu.CopyToDevice(hands, dev_hands); gpu.StartTimer(); gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks); var ts = gpu.StopTimer(); uint[] toReturn = new uint[hands.Length]; gpu.CopyFromDevice(dev_ranks, toReturn); return(toReturn); }
public void SetUp() { //CudafyModes.Architecture = eArchitecture.sm_30; _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId); Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported."); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc, 0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values d_inputIntArray = _gpu.CopyToDevice(inputIntArray); d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE); gpuIntResult = new int[WARP_SIZE]; cpuIntResult = new int[WARP_SIZE]; inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f, 1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f, 7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f, 377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray); d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE); gpuFloatResult = new float[WARP_SIZE]; cpuFloatResult = new float[WARP_SIZE]; }
public static void eksekusi() { CudafyModule kernel_modul = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(kernel_modul); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] array_vga = vga.Allocate <int>(KONSTANTA_THREAD); int[] array_hasil = new int[KONSTANTA_THREAD]; //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD); //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD]; //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD]; vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga); vga.CopyFromDevice(array_vga, array_hasil); vga.FreeAll(); //for(int z = 0; z < array_hasil.Length; z++) //{ // Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]); //} vga.FreeAll(); waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Milliseconds.ToString(); Console.WriteLine("Total VGA ------ > " + total); }
/// <summary> /// Translates and compiles the given types against specified compilation properties. /// </summary> /// <param name="props">The settings.</param> /// <param name="types">Types to search and translate.</param> /// <returns></returns> public static CudafyModule Cudafy(IEnumerable <CompileProperties> props, params Type[] types) { CudafyModule km = null; //var uniqueLanguages = new List<eLanguage>(); //if (props.Any(p => p.Language == eLanguage.Cuda)) // uniqueLanguages.Add(eLanguage.Cuda); //if (props.Any(p => p.Language == eLanguage.OpenCL)) // uniqueLanguages.Add(eLanguage.OpenCL); //foreach (var lang in uniqueLanguages) foreach (var p in props) { CudafyTranslator.Language = p.Language; _architecture = p.Architecture; CUDALanguage.ComputeCapability = GetComputeCapability(p.Architecture); km = DoCudafy(km, types); if (km == null) { throw new CudafyFatalException(CudafyFatalException.csUNEXPECTED_STATE_X, "CudafyModule km = null"); } } km.WorkingDirectory = WorkingDirectory; km.Compile(props.ToArray()); Type lastType = types.Last(t => t != null); if (lastType != null) { km.Name = lastType.Name; } return(km); }
public static bool TestGpuDoublePrecision(int DeviceId) { if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL)) { return(false); } try { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); gpu.LoadModule(km); double c; double[] dev_c = gpu.Allocate <double>(); gpu.Launch().add_double(2.5d, 7.5d, dev_c); gpu.CopyFromDevice(dev_c, out c); gpu.Free(dev_c); return(c == 10.0d); } catch { return(false); } }
/// <summary> /// Вызов и исполнение функции проверки что массив отсортирован /// </summary> public static void ExecuteSorted(int direction = 1) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(D); gpu.CopyToDevice(_a, devA); gpu.Launch(1, 1).Split(devA, devB, devC, _middle); gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction); gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction); gpu.CopyFromDevice(devD, D); // free the memory allocated on the GPU gpu.FreeAll(); }
public static void primaGPU() { CudafyModule modul_kernel = CudafyTranslator.Cudafy(); GPGPU vga = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); vga.LoadModule(modul_kernel); Stopwatch waktu = new Stopwatch(); waktu.Start(); int[] list_cpu = new int[KONSTANTA_THREAD]; int[] list_cpy = new int[KONSTANTA_THREAD]; int[] list = vga.Allocate <int>(KONSTANTA_THREAD); vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list); vga.CopyFromDevice(list, list_cpy); vga.FreeAll(); int index = 0; for (int z = 0; z < list_cpy.Length; z++) { if (list_cpy[z] != -1) { list_cpu[index] = list_cpy[z]; //Console.WriteLine(list_cpu[index]); index++; } } waktu.Stop(); TimeSpan ts = waktu.Elapsed; String total = ts.Seconds.ToString(); Console.WriteLine("Total GPU ------ {0} detik> ", total); }
public static void Execute() { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric)); km.Serialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); gpu.LoadModule(km); var input = new Generic <ushort, ushort>(); input.A = 187; int[] devoutput = gpu.Allocate <int>(1); gpu.Launch(1, 1, "Kernel", input, devoutput); int output; gpu.CopyFromDevice(devoutput, out output); Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED")); }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile)); _gpu = CudafyHost.GetDevice(CudafyModes.Target); _gpu.LoadModule(km); ParamsStruct[] host_par = new ParamsStruct[1]; ParamsStruct[] result = new ParamsStruct[1]; host_par[0].OP = 96.95; host_par[0].Price = 1332.24; host_par[0].Strike = 1235; host_par[0].TD = 31; host_par[0].R = 0.0001355; host_par[0].Q = 0.0166; host_par[0].N = 100;// 1000; host_par[0].kind = 1; ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par); float[] PA = _gpu.Allocate <float>(1001); _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA); _gpu.CopyFromDevice(dev_par, 0, result, 0, 1); Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B); //Console.ReadKey(); }
public void Initialize(int DeviceId) { CudafyModes.Target = eGPUType.OpenCL; CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(); Gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId); Gpu.LoadModule(km); }
public static void Execute(byte[] bitmap) { DateTime dt = DateTime.Now; CudafyModule km = CudafyModule.TryDeserialize(csFILENAME); // Check the module exists and matches the .NET modules, else make new if (km == null || !km.TryVerifyChecksums()) { Console.WriteLine("There was no cached module available so we make a new one."); km = CudafyModule.Deserialize(typeof(ray_serialize).Name); km.Serialize(csFILENAME); } GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1); gpu.LoadModule(km); Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate temp memory, initialize it, copy to constant memory on the GPU Sphere[] temp_s = new Sphere[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToConstantMemory(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(DIM / 16, DIM / 16); dim3 threads = new dim3(16, 16); gpu.Launch(grids, threads, "kernel", dev_bitmap); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.DeviceFreeAll(); }
public static void Execute(byte[] bitmap) { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(typeof(SphereOpenCL), typeof(ray_opencl_const)); km.TrySerialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate memory for the Sphere dataset //SphereOpenCL[] s = gpu.Allocate<SphereOpenCL>(SPHERES); // allocate temp memory, initialize it, copy to constant memory on the GPU SphereOpenCL[] temp_s = new SphereOpenCL[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } //gpu.CopyToDevice(temp_s, s); gpu.CopyToConstantMemory(temp_s, spheres); // generate a bitmap from our sphere data dim3 grids = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16); dim3 threads = new dim3(16, 16); //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic gpu.Launch(grids, threads, ((Action <GThread, byte[]>)thekernel), dev_bitmap); // Strongly typed // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.FreeAll(); }
public GPULiquidSystem() { CudafyTranslator.Language = eLanguage.OpenCL; CudafyModule km = CudafyTranslator.Cudafy(typeof(GPULiquidSystem)); System.IO.File.WriteAllText("LiquidSystem.cl", km.SourceCode); _gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0); _gpu.LoadModule(km); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); gpu.Launch().thekernel(); // or gpu.Launch(1, 1, "kernel"); Console.WriteLine("Sample kernel started successfully!"); }
///// <summary> ///// Initializes a new instance of the <see cref="KernelMethodInfo"/> class. ///// </summary> ///// <param name="type">The type.</param> ///// <param name="method">The method.</param> ///// <param name="gpuMethodType">Type of the gpu method.</param> ///// <param name="noDummyInclude"></param> ///// <param name="parentModule"></param> //public KernelMethodInfo(Type type, MethodInfo method, eKernelMethodType gpuMethodType, bool noDummyInclude, CudafyModule parentModule) // : this(type, method, gpuMethodType, false, false, parentModule) //{ //} /// <summary> /// Initializes a new instance of the <see cref="KernelMethodInfo"/> class. /// </summary> /// <param name="type">The type.</param> /// <param name="method">The method.</param> /// <param name="gpuMethodType">Type of the gpu method.</param> /// <param name="isDummy">if set to <c>true</c> is dummy.</param> /// <param name="behaviour"></param> /// <param name="parentModule">Module of which this is a part.</param> public KernelMethodInfo(Type type, MethodInfo method, eKernelMethodType gpuMethodType, bool isDummy, eCudafyDummyBehaviour behaviour, CudafyModule parentModule) { Type = type; Method = method; MethodType = gpuMethodType; DeserializedChecksum = 0; IsDummy = isDummy; Behaviour = behaviour; ParentModule = parentModule; }
public void SetUp() { //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default); var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism); _cm = CudafyTranslator.Cudafy(new CompileProperties[] {y}, this.GetType()); Console.WriteLine(_cm.CompilerOutput); _cm.Serialize(); _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId); _gpu.LoadModule(_cm); }
public static void Execute() { // Translates this class to CUDA C and then compliles CudafyModule km = CudafyTranslator.Cudafy(); // Get the first GPU and load the module GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // Create some arrays on the host int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = i; b[i] = 2 * i; } // copy the arrays 'a' and 'b' to the GPU int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); // Launch 128 blocks of 128 threads each gpu.Launch(128, 128).add(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // verify that the GPU did the work we requested bool success = true; for (int i = 0; i < N; i++) { if ((a[i] + b[i]) != c[i]) { Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]); success = false; break; } } if (success) { Console.WriteLine("We did it!"); } // free the memory allocated on the GPU gpu.FreeAll(); }
public void SetUp() { //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default); var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism); _cm = CudafyTranslator.Cudafy(new CompileProperties[] { y }, this.GetType()); Console.WriteLine(_cm.CompilerOutput); _cm.Serialize(); _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId); _gpu.LoadModule(_cm); }
public void LoadModule() { CudafyModule km = CudafyModule.TryDeserialize(); // Look for cdfy module file before generating // Ensure if using Cuda, use 2.0 architecture for Atomics compatibility if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(); } gpu.LoadModule(km); }
public double[] transpose(double[] inputArray) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_35); gpu.LoadModule(km); dim3 grid = new dim3(1000); gpu.Launch(); return(new double[1]); }
public static void RunTest() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); gpu.LoadModule(km); gpu.Launch().thekernel(); // or gpu.Launch(1, 1, "kernel"); Console.WriteLine("Sample kernel started successfully!"); }
public override void UnloadModule(CudafyModule module) { if (!_modules.Remove(module)) { throw new CudafyHostException(CudafyHostException.csMODULE_NOT_FOUND); } if (_module == module) { _module = null; } }
public void SetUp() { CudafyTranslator.GenerateDebug = true; _cm = CudafyModule.TryDeserialize(); _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(_gpu.GetArchitecture(), this.GetType(), (_gpu is OpenCLDevice) ? null : typeof(StringConstClass)); _cm.TrySerialize(); } _gpu.LoadModule(_cm); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); float c; // allocate memory on the cpu side float[] a = new float[N]; float[] b = new float[N]; float[] partial_c = new float[blocksPerGrid]; // allocate the memory on the GPU float[] dev_a = gpu.Allocate <float>(N); float[] dev_b = gpu.Allocate <float>(N); float[] dev_partial_c = gpu.Allocate <float>(blocksPerGrid); float[] dev_test = gpu.Allocate <float>(blocksPerGrid * blocksPerGrid); // fill in the host memory with data for (int i = 0; i < N; i++) { a[i] = i; b[i] = i * 2; } // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(a, dev_a); gpu.CopyToDevice(b, dev_b); gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_partial_c, partial_c); // finish up on the CPU side c = 0; for (int i = 0; i < blocksPerGrid; i++) { c += partial_c[i]; } Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1))); // free memory on the gpu side gpu.FreeAll(); // free memory on the cpu side // No worries... }
public void Initialize(int bytes) { CudafyModule km = CudafyTranslator.Cudafy(); _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); _gpu.LoadModule(km); _dev_bitmap = _gpu.Allocate <byte>(bytes); _blocks = new dim3(DIM / 16, DIM / 16); _threads = new dim3(16, 16); }
public virtual void SetUp() { _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId); var types = new List<Type>(); types.Add(this.GetType()); types.Add(typeof(MathSingleTest)); SupportsDouble = _gpu.GetDeviceProperties().SupportsDoublePrecision; if (SupportsDouble) types.Add(typeof(MathDoubleTest)); _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture, types.ToArray()); Debug.WriteLine(_cm.SourceCode); _gpu.LoadModule(_cm); }
public void SetUp() { _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId); _cm = CudafyModule.TryDeserialize(); if (_cm == null || !_cm.TryVerifyChecksums()) { _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);//typeof(PrimitiveStruct), typeof(BasicFunctionTests)); Console.WriteLine(_cm.CompilerOutput); _cm.TrySerialize(); } _gpu.LoadModule(_cm); }
public void SetUp() { //var cm = CudafyTranslator.Cudafy(typeof(PrimitiveStruct), GetType()); // if (CudafyModule.HasCudafyModuleInAssembly()) // Post-build event command line was: cudafycl.exe $(TargetPath) { // Do this for Release _cm = GetType().Assembly.GetCudafyModule(); } else // Post-build event command line was: cudafycl.exe $(TargetPath) -cdfy { // Do this for Debug string name = this.GetType().Assembly.GetName().Name; _cm = CudafyModule.TryDeserialize(name); } Assert.IsFalse(_cm == null); Assert.IsTrue(_cm.TryVerifyChecksums()); _gpu = CudafyHost.GetDevice(CudafyModes.Target); if(_cm != null) _gpu.LoadModule(_cm); }
public void SetUp() { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_20);//typeof(RelectorAddInFunctionsTests)); }
public void SetUp() { _cm = CudafyTranslator.Cudafy(typeof(Sphere), typeof(RelectorAddInTypeTests)); }