/// <summary> /// Creates all potential kernels in program /// </summary> public void CreateAllKernels() { Kernel[] kernels = Cl.CreateKernelsInProgram(_program, out _error); CLException.CheckException(_error); for (int i = 0; i < kernels.Length; i++) { InfoBuffer name = Cl.GetKernelInfo(kernels[i], KernelInfo.FunctionName, out _error); _kernels.Add(name.ToString(), kernels[i]); } }
public OpenClCompiler(Device device, string source) { _device = device; _ctx = device.CreateContext(); Source = source; _program = new Program(Cl.CreateProgramWithSource(_ctx, 1, new string[] { source }, null, out ErrorCode error)); Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); KernelCount = _program.NumKernels; Methodes = _program.KernelNames; _kernels = new Kernel[KernelCount]; if ((error = Cl.CreateKernelsInProgram(_program, KernelCount, _kernels, out _)) != ErrorCode.Success) { throw new Exception($"{error}"); } _queue = Cl.CreateCommandQueue(_ctx, _device, CommandQueueProperties.None, out _); }
public OpenClCompiler(Device device, string source) { _device = device; _ctx = device.CreateContext(); SVMCapabilities capabilities = _device.SvmCapabilities; IsCoarseGrainBufferSupported = (capabilities & SVMCapabilities.SvmCoarseGrainBuffer) == SVMCapabilities.SvmCoarseGrainBuffer; IsFineGrainBufferSupported = (capabilities & SVMCapabilities.SvmFineGrainBuffer) == SVMCapabilities.SvmFineGrainBuffer; IsFineGrainSystemSupported = (capabilities & SVMCapabilities.SvmFineGrainSystem) == SVMCapabilities.SvmFineGrainSystem; IsAtomicSupported = (capabilities & SVMCapabilities.SvmAtomics) == SVMCapabilities.SvmAtomics; Source = source; _program = new Program(Cl.CreateProgramWithSource(_ctx, 1, new string[] { source }, null, out ErrorCode error)); Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); KernelCount = _program.NumKernels; Methodes = _program.KernelNames; _kernels = new Kernel[KernelCount]; Cl.CreateKernelsInProgram(_program, KernelCount, _kernels, out _); }
public static Kernel[] CreateKernelsInProgram(Program program) { if (program == Program.Null) { throw new ArgumentNullException("context"); } unsafe { uint num_kernels = 0; ClHelper.GetError(Cl.CreateKernelsInProgram(program.Handle, 0, null, &num_kernels)); IntPtr *kernel_ptrs = stackalloc IntPtr[(int)num_kernels]; ClHelper.GetError(Cl.CreateKernelsInProgram(program.Handle, num_kernels, kernel_ptrs, null)); Kernel[] kernels = new Kernel[(int)num_kernels]; for (int i = 0; i < kernels.Length; ++i) { kernels[i] = new Kernel(kernel_ptrs[i]); } return(kernels); } }
private void ready() { ErrorCode error; context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); string source = System.IO.File.ReadAllText("kernels.cl"); program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error); error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero); InfoBuffer buildStatus = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error); if (buildStatus.CastTo <BuildStatus>() != BuildStatus.Success) { throw new Exception($"OpenCL could not build the kernel successfully: {buildStatus.CastTo<BuildStatus>()}"); } allGood(error); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); kernel = kernels[0]; allGood(error); queue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.None, out error); allGood(error); dataOut = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(globalSize * sizeof(int)), out error); allGood(error); var intSizePtr = new IntPtr(Marshal.SizeOf(typeof(int))); error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), dataOut); error |= Cl.SetKernelArg(kernel, 3, intSizePtr, new IntPtr(worldSeed)); error |= Cl.SetKernelArg(kernel, 4, intSizePtr, new IntPtr(globalSize)); allGood(error); }
static void Main(string[] args) { Console.WriteLine("Hello World!"); uint platformCount; ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount); Console.WriteLine("{0} platforms found", platformCount); var platformIds = new Platform[platformCount]; result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount); var platformCounter = 0; foreach (var platformId in platformIds) { IntPtr paramSize; result = Cl.GetPlatformInfo(platformId, PlatformInfo.Name, IntPtr.Zero, InfoBuffer.Empty, out paramSize); using (var buffer = new InfoBuffer(paramSize)) { result = Cl.GetPlatformInfo(platformIds[0], PlatformInfo.Name, paramSize, buffer, out paramSize); Console.WriteLine($"Platform {platformCounter}: {buffer}"); } platformCounter++; } Console.WriteLine($"Using first platform..."); uint deviceCount; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, 0, null, out deviceCount); Console.WriteLine("{0} devices found", deviceCount); var deviceIds = new Device[deviceCount]; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, deviceCount, deviceIds, out var numberDevices); var selectedDevice = deviceIds[0]; var context = Cl.CreateContext(null, 1, new[] { selectedDevice }, null, IntPtr.Zero, out var error); const string kernelSrc = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid] - 1500; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid] - 2000; } __kernel void double_everything(__global float *a) { int xid = get_global_id(0); a[xid] = a[xid] * 2; } "; var src = kernelSrc; Console.WriteLine("=== src ==="); Console.WriteLine(src); Console.WriteLine("============"); var program = Cl.CreateProgramWithSource(context, 1, new[] { src }, null, out var error2); error2 = Cl.BuildProgram(program, 1, new[] { selectedDevice }, string.Empty, null, IntPtr.Zero); if (error2 == ErrorCode.BuildProgramFailure) { Console.Error.WriteLine(Cl.GetProgramBuildInfo(program, selectedDevice, ProgramBuildInfo.Log, out error)); } Console.WriteLine(error2); // Get the kernels. var kernels = Cl.CreateKernelsInProgram(program, out error); Console.WriteLine($"Program contains {kernels.Length} kernels."); var kernelAdd = kernels[0]; var kernelDouble = kernels[2]; // float[] A = new float[1000]; float[] B = new float[1000]; float[] C = new float[1000]; for (var i = 0; i < 1000; i++) { A[i] = i; B[i] = i; } IMem <float> hDeviceMemA = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error); IMem <float> hDeviceMemB = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, B, out error); IMem <float> hDeviceMemC = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, C, out error); // Create a command queue. var cmdQueue = Cl.CreateCommandQueue(context, selectedDevice, CommandQueueProperties.None, out error); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernelDouble, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 1, new IntPtr(intPtrSize), hDeviceMemB); error = Cl.SetKernelArg(kernelAdd, 2, new IntPtr(intPtrSize), hDeviceMemC); // write data from host to device Event clevent; error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), A, 0, null, out clevent); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), B, 1, new [] { clevent }, out clevent); // execute kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelDouble, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); var infoBuffer = Cl.GetEventInfo(clevent, EventInfo.CommandExecutionStatus, out var e2); error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelAdd, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); Console.WriteLine($"Run result: {error}"); error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.False, 0, C.Length, C, 1, new [] { clevent }, out clevent); Cl.WaitForEvents(1, new [] { clevent }); for (var i = 0; i < 1000; i++) { Console.WriteLine($"[{i}]: {C[i]}"); } program.Dispose(); foreach (var res in typeof(SourceLoader).Assembly.GetManifestResourceNames()) { Console.WriteLine(res); } }
// Partially from OpenTK demo - Submitted by "mfagerlund" public void AddArrayAddsCorrectly() { const string correctSource = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid]; } "; ErrorCode error; using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Kernel kernel = kernels[0]; const int cnBlockSize = 4; const int cnBlocks = 3; IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize); // allocate host vectors float[] A = new float[cnDimension.ToInt32()]; float[] B = new float[cnDimension.ToInt32()]; float[] C = new float[cnDimension.ToInt32()]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = rand.Next() % 256; B[i] = rand.Next() % 256; } //Cl.IMem hDeviceMemA = Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), A, out error); //Assert.AreEqual(Cl.ErrorCode.Success, error); IMem <float> hDeviceMemA = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemB = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), B, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemC = Cl.CreateBuffer(_context, MemFlags.WriteOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), IntPtr.Zero, out error); Assert.AreEqual(ErrorCode.Success, error); CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); Event clevent; int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); // setup parameter values error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemA); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemB); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemC); Assert.AreEqual(ErrorCode.Success, error); // write data from host to device error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), A, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), B, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error); // execute kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { cnDimension }, null, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error, error.ToString()); // copy results from device back to host IntPtr event_handle = IntPtr.Zero; error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.True, 0, C.Length, C, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error, error.ToString()); for (int i = 0; i < A.Length; i++) { Assert.That(A[i] + B[i], Is.EqualTo(C[i])); } Cl.Finish(cmdQueue); Cl.ReleaseMemObject(hDeviceMemA); Cl.ReleaseMemObject(hDeviceMemB); Cl.ReleaseMemObject(hDeviceMemC); } }
public void ProgramAndKernelTests() { const string correctSource = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid]; } "; const string sourceWithErrors = @" // Erroneous kernel __kernel void add_array(__global float *a, __global float *b, __global float *c) { foo(); // <-- Error right here! int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; }"; ErrorCode error; using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { sourceWithErrors }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreNotEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Error); Console.WriteLine("There were error(s) compiling the provided kernel"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); } using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success); // Try to get information from the program Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Context, out error).CastTo <Context>(), _context); Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.NumDevices, out error).CastTo <int>(), 1); Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Devices, out error).CastTo <Device>(0), _device); Console.WriteLine("Program source was:"); Console.WriteLine(Cl.GetProgramInfo(program, ProgramInfo.Source, out error)); Kernel kernel = Cl.CreateKernel(program, "add_array", out error); Assert.AreEqual(error, ErrorCode.Success); kernel.Dispose(); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Assert.AreEqual(error, ErrorCode.Success); Assert.AreEqual(kernels.Length, 2); Assert.AreEqual("add_array", Cl.GetKernelInfo(kernels[0], KernelInfo.FunctionName, out error).ToString()); Assert.AreEqual("sub_array", Cl.GetKernelInfo(kernels[1], KernelInfo.FunctionName, out error).ToString()); } }
public void Prototype() { ErrorCode error; Device device = (from d in Cl.GetDeviceIDs( (from platform in Cl.GetPlatformIDs(out error) where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd select platform).First(), DeviceType.Gpu, out error) select d).First(); Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); string source = System.IO.File.ReadAllText("kernels.cl"); int chunkHalfLength = 300000000; int worldSeed = 420; int workItems = 3000; int outputAllocation = 100; IntPtr outputSize = new IntPtr(workItems * outputAllocation); var xr = new int[outputSize.ToInt32()]; var zr = new int[outputSize.ToInt32()]; var sc = new int[outputSize.ToInt32()]; using (Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { device }, "", null, IntPtr.Zero); Assert.AreEqual(error, ErrorCode.Success); var buildInfo = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(); Assert.AreEqual(buildInfo, BuildStatus.Success); Assert.AreEqual(error, ErrorCode.Success); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Assert.AreEqual(error, ErrorCode.Success); Kernel kernel = kernels[0]; IMem hDeviceMemXr = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemZr = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemSc = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error); Assert.AreEqual(ErrorCode.Success, error); CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, (CommandQueueProperties)0, out error); int intPtrSize = Marshal.SizeOf(typeof(IntPtr)); int intSize = Marshal.SizeOf(typeof(int)); error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemXr); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemZr); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemSc); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 3, new IntPtr(intSize), new IntPtr(chunkHalfLength)); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 4, new IntPtr(intSize), new IntPtr(worldSeed)); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 5, new IntPtr(intSize), new IntPtr(workItems)); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 6, new IntPtr(intSize), new IntPtr(outputAllocation)); Assert.AreEqual(ErrorCode.Success, error); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemXr, Bool.True, IntPtr.Zero, new IntPtr(outputSize.ToInt32() * sizeof(float)), xr, 0, null, out Event clevent); Assert.AreEqual(ErrorCode.Success, error); error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { new IntPtr(workItems) }, null, 0, null, out clevent); error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemXr, Bool.True, 0, xr.Length, xr, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error, error.ToString()); Cl.Finish(cmdQueue); } }
public void TestSlimeFinder() { const int squareLength = 1024; int globalSize = squareLength * squareLength; var candidates = new int[globalSize]; ErrorCode error; Device device = (from d in Cl.GetDeviceIDs( (from platform in Cl.GetPlatformIDs(out error) where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd select platform).First(), DeviceType.Gpu, out error) select d).First(); Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); string source = System.IO.File.ReadAllText("kernels.cl"); using Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error); error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero); InfoBuffer buildStatus = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error); Assert.AreEqual(buildStatus.CastTo <BuildStatus>(), BuildStatus.Success); Assert.AreEqual(error, ErrorCode.Success); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Kernel kernel = kernels[0]; Assert.AreEqual(error, ErrorCode.Success); CommandQueue queue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.None, out error); Assert.AreEqual(error, ErrorCode.Success); IMem dataOut = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(globalSize * sizeof(int)), out error); Assert.AreEqual(error, ErrorCode.Success); var intSizePtr = new IntPtr(Marshal.SizeOf(typeof(int))); error = Cl.SetKernelArg(kernel, 0, intSizePtr, new IntPtr(0)); error |= Cl.SetKernelArg(kernel, 1, intSizePtr, new IntPtr(0)); error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), dataOut); error |= Cl.SetKernelArg(kernel, 3, intSizePtr, new IntPtr(420)); error |= Cl.SetKernelArg(kernel, 4, intSizePtr, new IntPtr(globalSize)); Assert.AreEqual(error, ErrorCode.Success); int local_size = 256; int global_size = (int)Math.Ceiling(globalSize / (float)local_size) * local_size; var stopW = new Stopwatch(); stopW.Start(); error = Cl.EnqueueNDRangeKernel(queue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out Event clevent); Assert.AreEqual(error, ErrorCode.Success); Cl.Finish(queue); stopW.Stop(); Cl.EnqueueReadBuffer(queue, dataOut, Bool.True, IntPtr.Zero, (IntPtr)(globalSize * sizeof(int)), candidates, 0, null, out clevent); candidates.ForEach(c => { if (c > 50) { Console.Write($"{c},"); } }); Console.WriteLine($"\n{stopW.ElapsedMilliseconds} ms"); error = Cl.SetKernelArg(kernel, 0, intSizePtr, new IntPtr(16383)); error |= Cl.SetKernelArg(kernel, 1, intSizePtr, new IntPtr(16383)); stopW.Start(); error = Cl.EnqueueNDRangeKernel(queue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out clevent); Assert.AreEqual(error, ErrorCode.Success); Cl.Finish(queue); stopW.Stop(); Cl.EnqueueReadBuffer(queue, dataOut, Bool.True, IntPtr.Zero, (IntPtr)(globalSize * sizeof(int)), candidates, 0, null, out clevent); candidates.ForEach(c => { if (c > 50) { Console.Write($"{c},"); } }); Console.WriteLine($"\n{stopW.ElapsedMilliseconds} ms"); Cl.ReleaseKernel(kernel); Cl.ReleaseMemObject(dataOut); Cl.ReleaseCommandQueue(queue); Cl.ReleaseProgram(program); Cl.ReleaseContext(context); }
public void SquareArray() { // Adapted from //https://github.com/rsnemmen/OpenCL-examples/blob/master/square_array/square.cl int array_size = 100000; var bytes = (IntPtr)(array_size * sizeof(float)); var hdata = new float[array_size]; var houtput = new float[array_size]; for (int i = 0; i < array_size; i++) { hdata[i] = 1.0f * i; } ErrorCode error; Device device = (from d in Cl.GetDeviceIDs( (from platform in Cl.GetPlatformIDs(out error) where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd select platform).First(), DeviceType.Gpu, out error) select d).First(); Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); string source = System.IO.File.ReadAllText("squared.cl"); using (Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero); Assert.AreEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Kernel kernel = kernels[0]; CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, (CommandQueueProperties)0, out error); IMem ddata = Cl.CreateBuffer(context, MemFlags.ReadOnly, bytes, null, out error); IMem doutput = Cl.CreateBuffer(context, MemFlags.WriteOnly, bytes, null, out error); error = Cl.EnqueueWriteBuffer(cmdQueue, ddata, Bool.True, (IntPtr)0, bytes, hdata, 0, null, out Event clevent); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 0, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), ddata); error |= Cl.SetKernelArg(kernel, 1, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), doutput); error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(int))), new IntPtr(array_size)); Assert.AreEqual(error, ErrorCode.Success); int local_size = 256; var infoBufferr = new InfoBuffer(); error = Cl.GetKernelWorkGroupInfo(kernel, device, KernelWorkGroupInfo.WorkGroupSize, new IntPtr(sizeof(int)), new InfoBuffer(), out IntPtr localSize); var x = localSize.ToInt32();//Why is it giving me 8??? Vega 56 has 256 work group size int global_size = (int)Math.Ceiling(array_size / (float)local_size) * local_size; error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out clevent); Cl.Finish(cmdQueue); Cl.EnqueueReadBuffer(cmdQueue, doutput, Bool.True, IntPtr.Zero, bytes, houtput, 0, null, out clevent); houtput.ForEach(o => Console.Write($"{o}, ")); Cl.ReleaseKernel(kernel); Cl.ReleaseMemObject(ddata); Cl.ReleaseMemObject(doutput); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseProgram(program); Cl.ReleaseContext(context); } }