public void Run(ComputeContext context, TextWriter log) { try { ComputeProgram program = new ComputeProgram(context, kernelSources); program.Build(null, null, null, IntPtr.Zero); log.WriteLine("Program successfully built."); ICollection<ComputeKernel> kernels = program.CreateAllKernels(); log.WriteLine("Kernels successfully created."); // cleanup kernels foreach (ComputeKernel kernel in kernels) { kernel.Dispose(); } kernels.Clear(); // cleanup program program.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public void Run(ComputeContext context, TextWriter log) { this.log = log; try { program = new ComputeProgram(context, clSource); program.Build(null, null, notify, IntPtr.Zero); } catch (Exception e) { log.WriteLine(e.ToString()); } // cleanup program program.Dispose(); }
public static void Test() { string source = File.ReadAllText("MonteCarloSimulate.cl"); //Choose Device ComputePlatform platform = ComputePlatform.Platforms[0]; ComputeDevice device = platform.QueryDevices()[0]; ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); //Setup of stuff on our side ComputeContext context = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero); //Build the program, which gets us the kernel ComputeProgram program = new ComputeProgram(context, source); program.Build(null, null, null, IntPtr.Zero); //can use notify as the 3rd command... if you want this to be non-blocking ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate"); //Create arguments int sideSize = 4096; int[] inMatrixA = new int[sideSize * sideSize]; int[] inMatrixB = new int[sideSize * sideSize]; int[] outMatrixC = new int[sideSize * sideSize]; Random random = new Random((int)DateTime.Now.Ticks); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) for (int x = 0; x < sideSize; x++) { inMatrixA[y * sideSize + x] = random.Next(3); inMatrixB[y * sideSize + x] = random.Next(3); outMatrixC[y * sideSize + x] = 0; } ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixA); ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixB); ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, outMatrixC); long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize); //Sets arguments kernel.SetMemoryArgument(0, bufferMatrixA); kernel.SetMemoryArgument(1, bufferMatrixB); kernel.SetMemoryArgument(2, bufferMatrixC); kernel.SetLocalArgument(3, sideSize * 2); kernel.SetValueArgument<int>(4, sideSize); //kernel.SetLocalArgument(1, localWorkSize); string offset = " "; for (int x = 0; x < sideSize; x++) offset += " "; if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { Console.Write(offset); for (int x = 0; x < sideSize; x++) Console.Write(inMatrixA[y * sideSize + x] + " "); Console.WriteLine(); } //Runs commands ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); long executionTime = DateTime.Now.Ticks; //Execute kernel //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize()) commands.Execute(kernel, null, new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) }, new long[] { localWorkSize, 1 }, null); //globalWorkSize can be any size //localWorkSize product much not be greater than device.MaxComputeUnits //and it must not be greater than kernel.GetWorkGroupSize() //ESSENTIALLY, the program iterates through globalWorkSize //in increments of localWorkSize. Both are multidimensional, //but this just saves us the time of doing that //(1 dimension can be put to multiple if the max dimension lengths //are known very easily with remainder). //Also, you should probably use this //kernel.GetPreferredWorkGroupSizeMultiple(device); commands.Finish(); commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null); commands.Finish(); executionTime = DateTime.Now.Ticks - executionTime; GC.Collect(); program.Dispose(); Console.WriteLine(); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { for (int x = 0; x < sideSize; x++) Console.Write(inMatrixB[y * sideSize + x] + " "); Console.Write(" "); for (int x = 0; x < sideSize; x++) Console.Write(outMatrixC[y * sideSize + x] + " "); Console.WriteLine(); } int testY = random.Next(sideSize); int testX = random.Next(sideSize); int sum = 0; for (int q = 0; q < sideSize; q++) sum += inMatrixA[q * sideSize + testX] * inMatrixB[testY * sideSize + q]; Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]); Console.WriteLine(executionTime / 10000.0); }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commands.ReadFromBuffer(c, ref arrC, false, eventList); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); // Print the results to a log/console. for (int i = 0; i < count; i++) log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); // cleanup program program.Dispose(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }