internal OpenCLEvent(CLEventHandle handle, OpenCLCommandQueue queue) { Handle = handle; SetID(Handle.Value); CommandQueue = queue; Type = (OpenCLCommandType)GetInfo<CLEventHandle, OpenCLEventInfo, int>(Handle, OpenCLEventInfo.CommandType, CL10.GetEventInfo); Context = queue.Context; if (OpenCLTools.ParseVersionString(CommandQueue.Device.Platform.Version, 1) > new Version(1, 0)) HookNotifier(); //Trace.WriteLine("Create " + this + " in Thread(" + Thread.CurrentThread.ManagedThreadId + ").", "Information"); }
internal OpenCLEvent(CLEventHandle handle, OpenCLCommandQueue queue) { Handle = handle; SetID(Handle.Value); CommandQueue = queue; Type = (OpenCLCommandType)GetInfo <CLEventHandle, OpenCLEventInfo, int>(Handle, OpenCLEventInfo.CommandType, CL10.GetEventInfo); Context = queue.Context; if (OpenCLTools.ParseVersionString(CommandQueue.Device.Platform.Version, 1) > new Version(1, 0)) { HookNotifier(); } //Trace.WriteLine("Create " + this + " in Thread(" + Thread.CurrentThread.ManagedThreadId + ").", "Information"); }
private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device) { var context = new OpenCLContext(new List<OpenCLDevice> {device}, new OpenCLContextPropertyList(platform), null, IntPtr.Zero); var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl"); var kernel1 = program.CreateKernel("reductionVector"); var kernel2 = program.CreateKernel("reductionComplete"); const int numValues = 1024 * 1024; const int numValuesPerWorkItem = 4; var globalWorkSize = numValues / numValuesPerWorkItem; const int localWorkSize = 32; var initialNumWorkGroups = globalWorkSize/localWorkSize; const int value = 42; var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray(); var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None); var floatType = typeof (float); var floatSize = sizeof (float); var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {numValues}); var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {initialNumWorkGroups*numValuesPerWorkItem}); var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 }); var resultDataBuffer = dataBuffer2; using (var pinnedData = new PinnedObject(data)) { commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues); } foreach (var index in Enumerable.Range(0, int.MaxValue)) { var dataBufferIn = index%2 == 0 ? dataBuffer1 : dataBuffer2; var dataBufferOut = index%2 == 0 ? dataBuffer2 : dataBuffer1; resultDataBuffer = dataBufferOut; kernel1.SetMemoryArgument(0, dataBufferIn); kernel1.SetMemoryArgument(1, dataBufferOut); kernel1.SetLocalArgument(2, localWorkSize*numValuesPerWorkItem*floatSize); Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}"); commandQueue.Execute(kernel1, null, new long[] {globalWorkSize}, new long[] {localWorkSize}); globalWorkSize /= localWorkSize; if (globalWorkSize <= localWorkSize) break; } kernel2.SetMemoryArgument(0, resultDataBuffer); kernel2.SetLocalArgument(1, globalWorkSize*numValuesPerWorkItem*floatSize); kernel2.SetMemoryArgument(2, sumBuffer); Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}"); commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize }); commandQueue.Finish(); var sum = new float[1]; using (var pinnedSum = new PinnedObject(sum)) { commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L); } const int correctAnswer = numValues * value; Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}"); }