public void SetUp() { device = (from platformid in Cl.GetPlatformIDs(out error) from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error) select deviceid).First(); context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); }
public static void Main(string[] args) { int dimX = GetParameter("--dimX", 322, args); int dimY = GetParameter("--dimY", 242, args); int N = GetParameter("--N", 97760, args); bool lmem = GetFlag("--lmem", args); Console.WriteLine("Poisson equation solver: dimX = {0} dimY = {1} N = {2}, LMem: {3}", dimX, dimY, N, lmem); string options = string.Join(" ", args.Where(arg => arg.IndexOf("--") == -1)); Console.WriteLine("OpenCL program build options: " + options); Cl.ErrorCode error; Cl.Device device = (from platformid in Cl.GetPlatformIDs(out error) from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error) select deviceid).First(); clSafeCall(error); Cl.Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); clSafeCall(error); // create program from C# kernel IRBuildOptions.AutoInline = GetFlag("--inline", args); IRBuildOptions.WasteRegisters = GetFlag("--rwaste", args); Console.WriteLine("IR code build options: AutoInline: {0}; WasteRegisters: {1}", IRBuildOptions.AutoInline, IRBuildOptions.WasteRegisters); Cl.Program pcsharp = typeof(MainClass).BuildIR().ToGPUClProgram(device, context); // create program from OpenCL kernel Cl.Program popencl = Cl.CreateProgramWithSource(context, 1, new[] { PoissonRBSORCl }, null, out error); clSafeCall(error); // perform bandwidth comparison float x0 = (float)(-0.5 * Math.PI); float y0 = (float)(-0.5 * Math.PI); float x1 = -x0; float y1 = -y0; float omega = 0.8f; Console.WriteLine("C# benchmark:"); long tcsharp = PoissonRBSOR(device, context, pcsharp, lmem, x0, y0, x1, y1, dimX, dimY, N, omega, "unigpu.bin", options); pcsharp.Dispose(); Console.WriteLine("OpenCL benchmark:"); long topencl = PoissonRBSOR(device, context, popencl, lmem, x0, y0, x1, y1, dimX, dimY, N, omega, "opencl.bin", options); popencl.Dispose(); Console.WriteLine("OpenCL advantage: {0}", (double)tcsharp / (double)topencl); context.Dispose(); }
public void Setup() { Cl.ErrorCode error; Cl.Platform[] platforms = Cl.GetPlatformIDs(out error); List <Cl.Device> devicesList = new List <Cl.Device>(); CheckErr(error, "Cl.GetPlatformIDs"); foreach (Cl.Platform platform in platforms) { string platformName = Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString(); Console.WriteLine("Platform: " + platformName); CheckErr(error, "Cl.GetPlatformInfo"); //We will be looking only for GPU devices foreach (Cl.Device device in Cl.GetDeviceIDs(platform, Cl.DeviceType.Gpu, out error)) { CheckErr(error, "Cl.GetDeviceIDs"); Console.WriteLine("Device: " + device.ToString()); devicesList.Add(device); } } if (devicesList.Count <= 0) { Console.WriteLine("No devices found."); return; } _device = devicesList[0]; if (Cl.GetDeviceInfo(_device, Cl.DeviceInfo.ImageSupport, out error).CastTo <Cl.Bool>() == Cl.Bool.False) { Console.WriteLine("No image support."); return; } _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error); //Second parameter is amount of devices CheckErr(error, "Cl.CreateContext"); }
public void Setup() { Cl.ErrorCode error; _device = (from device in Cl.GetDeviceIDs( (from platform in Cl.GetPlatformIDs(out error) where Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString() == "NVIDIA CUDA" select platform).First(), Cl.DeviceType.Gpu, out error) select device).First(); _context = Cl.CreateContext(null, 1, new[] { _device }, null, IntPtr.Zero, out error); }
private static long PoissonRBSOR(Cl.Device device, Cl.Context context, Cl.Program program, bool lmem, float x0, float y0, float x1, float y1, int dimX, int dimY, int N, float omega, string fileName = null, string options = "") { Cl.ErrorCode error; Cl.Event clevent; // build program clSafeCall(Cl.BuildProgram(program, 1, new[] { device }, options, null, IntPtr.Zero)); Cl.BuildStatus status = Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).CastTo <Cl.BuildStatus>(); if (status != Cl.BuildStatus.Success) { throw new Exception(status.ToString()); } // save binary if (fileName != null) { Cl.InfoBuffer binarySizes = Cl.GetProgramInfo(program, Cl.ProgramInfo.BinarySizes, out error); clSafeCall(error); Cl.InfoBufferArray binaries = new Cl.InfoBufferArray( binarySizes.CastToEnumerable <IntPtr>(Enumerable.Range(0, 1)).Select(sz => new Cl.InfoBuffer(sz)).ToArray()); IntPtr szRet; clSafeCall(Cl.GetProgramInfo(program, Cl.ProgramInfo.Binaries, binaries.Size, binaries, out szRet)); byte[] binary = binaries[0].CastToArray <byte>(binarySizes.CastTo <IntPtr>(0).ToInt32()); File.WriteAllBytes(fileName, binary); } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonRBSOR" + (lmem ? "_LMem" : ""), out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); float hx = (x1 - x0) / dimX; float hy = (y1 - y0) / dimY; // boundary values float[] hgrid = new float[dimX * dimY]; int gstride = dimX; for (int i = 1; i < dimY - 1; i++) { int y_idx = i * gstride; float y_val = y0 + i * hy; hgrid[y_idx] = u(x0, y_val); hgrid[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val); } for (int j = 1; j < dimX - 1; j++) { float x_val = x0 + j * hx; hgrid[j] = u(x_val, y0); hgrid[j + (dimY - 1) * gstride] = u(x_val, y0 + (dimY - 1) * hy); } // laplacian values float[] hlaplacian = new float[(dimX - 2) * (dimY - 2)]; int lstride = dimX - 2; for (int i = 1; i < dimY - 1; i++) { for (int j = 1; j < dimX - 1; j++) { hlaplacian[j - 1 + (i - 1) * lstride] = J(x0 + j * hx, y0 + i * hy); } } // allocate device vectors Cl.Mem dgrid = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(float) * hgrid.Length), hgrid, out error); clSafeCall(error); Cl.Mem dlaplacian = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * hlaplacian.Length), hlaplacian, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dgrid)); clSafeCall(Cl.SetKernelArg(kernel, 1, dlaplacian)); clSafeCall(Cl.SetKernelArg(kernel, 2, dimX)); clSafeCall(Cl.SetKernelArg(kernel, 3, dimY)); clSafeCall(Cl.SetKernelArg(kernel, 4, gstride)); clSafeCall(Cl.SetKernelArg(kernel, 5, lstride)); clSafeCall(Cl.SetKernelArg(kernel, 6, hx)); clSafeCall(Cl.SetKernelArg(kernel, 7, hy)); clSafeCall(Cl.SetKernelArg(kernel, 8, omega)); if (lmem) { clSafeCall(Cl.SetKernelArg(kernel, 10, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null)); } IntPtr[] lo = { (IntPtr)TILE_SIZE_X, (IntPtr)TILE_SIZE_Y }; IntPtr[] gl = { (IntPtr)((dimX - 2 + (lmem ? AREA_SIZE_X : TILE_SIZE_X) - 1) / (lmem ? AREA_SIZE_X : TILE_SIZE_X) * TILE_SIZE_X), (IntPtr)((dimY - 2 + (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) - 1) / (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) * TILE_SIZE_Y) }; // execute RED kernel clSafeCall(Cl.SetKernelArg(kernel, 9, 1)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); // execute BLACK kernel clSafeCall(Cl.SetKernelArg(kernel, 9, 0)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (int idx = 1; idx < N; idx++) { // execute RED kernel clSafeCall(Cl.SetKernelArg(kernel, 9, 1)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); // execute BLACK kernel clSafeCall(Cl.SetKernelArg(kernel, 9, 0)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); } clSafeCall(Cl.Finish(cmdQueue)); stopwatch.Stop(); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dgrid, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * hgrid.Length), hgrid, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); cmdQueue.Dispose(); kernel.Dispose(); dgrid.Dispose(); float avgerr = 0, maxerr = 0; for (int i = 1; i < dimY - 1; i++) { for (int j = 1; j < dimX - 1; j++) { float theory = u(x0 + j * hx, y0 + i * hy); float err = Math.Abs(theory - hgrid[j + i * gstride]) / Math.Abs(theory); avgerr += err; maxerr = Math.Max(maxerr, err); } } avgerr /= dimX * dimY; long elapsedTime = stopwatch.ElapsedMilliseconds; Console.WriteLine("average error = {0}%\nmaximal error = {1}%\nelapsed time: {2}ms\niterations per second: {3}", avgerr * 100, maxerr * 100, elapsedTime, (double)N / (double)elapsedTime * 1000.0d); return(elapsedTime); }
private static byte[] ToAMDClBinary(this Program irprog, Cl.Device device) { // create OpenCL stub before registers naming string clstub = string.Join("\n", irprog.Kernels.Select(kernel => string.Format( "__kernel void {0}({1}) {{ {2} }}", kernel.Name, string.Join(", ", kernel.FormalParameters.Select( (fp, idx) => { switch (fp.StateSpace) { case StateSpaces.GLOBAL: return(string.Format("__global {0}* p{1}", fp.UnderlyingType.FormatC(), idx)); case StateSpaces.SHARED: return(string.Format("__local {0}* p{1}", fp.UnderlyingType.FormatC(), idx)); case StateSpaces.CONSTANT: return(string.Format("__constant {0}* p{1}", fp.UnderlyingType.FormatC(), idx)); case StateSpaces.REG: default: return(string.Format("{0} p{1}", fp.UnderlyingType.FormatC(), idx)); } })), string.Join(" ", kernel.FormalParameters.Where( fp => fp.StateSpace != StateSpaces.REG && fp.StateSpace != StateSpaces.CONSTANT).Select( fp => string.Format("*p{0} = 0;", kernel.FormalParameters.IndexOf(fp))))))); // create template binary from OpenCL stub Cl.ErrorCode error; Cl.Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); clSafeCall(error); Cl.Program program = Cl.CreateProgramWithSource(context, 1, new[] { clstub }, null, out error); clSafeCall(error); clSafeCall(Cl.BuildProgram(program, 1, new[] { device }, "-fno-bin-source -fno-bin-llvmir -fno-bin-exe -fbin-amdil", null, IntPtr.Zero)); Cl.BuildStatus status = Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).CastTo <Cl.BuildStatus>(); if (status != Cl.BuildStatus.Success) { throw new Exception(status.ToString()); } Cl.InfoBuffer binarySizes = Cl.GetProgramInfo(program, Cl.ProgramInfo.BinarySizes, out error); clSafeCall(error); Cl.InfoBufferArray binaries = new Cl.InfoBufferArray( binarySizes.CastToEnumerable <IntPtr>(Enumerable.Range(0, 1)).Select(sz => new Cl.InfoBuffer(sz)).ToArray()); IntPtr szRet; clSafeCall(Cl.GetProgramInfo(program, Cl.ProgramInfo.Binaries, binaries.Size, binaries, out szRet)); program.Dispose(); context.Dispose(); // inject generated code into the elf binary LinkingView elf = new LinkingView(binaries[0].CastToArray <byte>(binarySizes.CastTo <IntPtr>(0).ToInt32())); SymTabSection symtab = (SymTabSection)elf[".symtab"]; Section amdil = elf[".amdil"]; Section rodata = elf[".rodata"]; MemoryStream amdilcode = new MemoryStream(); foreach (Kernel kernel in irprog.Kernels) { SymbolWrapper _metadata = symtab["__OpenCL_" + kernel.Name + "_metadata"]; string[] str_metadata = Marshal.PtrToStringAnsi(Marshal.UnsafeAddrOfPinnedArrayElement( rodata.Data, (int)_metadata.st_value), (int)_metadata.st_size).Split('\n'); int setup_id = (from line in str_metadata let prms = line.Split(':') where prms[0] == ";uniqueid" select int.Parse(prms[1])).Single(); int raw_uav_id = (from line in str_metadata let prms = line.Split(':') where prms[0] == ";uavid" select int.Parse(prms[1])).Single(); SymbolWrapper _fmetadata = symtab["__OpenCL_" + kernel.Name + "_fmetadata"]; string[] str_fmetadata = Marshal.PtrToStringAnsi(Marshal.UnsafeAddrOfPinnedArrayElement( rodata.Data, (int)_fmetadata.st_value), (int)_fmetadata.st_size).Split('\n'); int func_id = (from line in str_fmetadata let prms = line.Split(':') where prms[0] == ";uniqueid" select int.Parse(prms[1])).Single(); // ugly, i know!!! raw_uav_id = Math.Max(raw_uav_id, 11); int arena_uav_id = raw_uav_id; byte[] code = Encoding.Convert(Encoding.Unicode, Encoding.ASCII, Encoding.Unicode.GetBytes( irprog.ToAMDIL(kernel.Name, setup_id, func_id, raw_uav_id, arena_uav_id))); SymbolWrapper _amdil = symtab["__OpenCL_" + kernel.Name + "_amdil"]; _amdil.st_value = (uint)amdilcode.Position; _amdil.st_size = (uint)code.Length; foreach (byte b in code) { amdilcode.WriteByte(b); } } amdil.Data = amdilcode.ToArray(); return(elf.BuildBinary()); }
public static Cl.Program ToGPUClProgram(this Program irprog, Cl.Device device, Cl.Context context) { byte[] code = irprog.ToGPUClBinary(device); Cl.ErrorCode error; Cl.ErrorCode[] binariesStatus = { Cl.ErrorCode.InvalidBinary }; Cl.Program program = Cl.CreateProgramWithBinary( context, 1, new[] { device }, new IntPtr[] { (IntPtr)code.Length }, new Cl.InfoBufferArray(new Cl.InfoBuffer(code)), binariesStatus, out error ); clSafeCall(error); clSafeCall(binariesStatus[0]); return(program); }