示例#1
0
        internal OpenCLEvent(CLEventHandle handle, OpenCLCommandQueue queue)
        {
            Handle = handle;
            SetID(Handle.Value);

            CommandQueue = queue;
            Type = (OpenCLCommandType)GetInfo<CLEventHandle, OpenCLEventInfo, int>(Handle, OpenCLEventInfo.CommandType, CL10.GetEventInfo);
            Context = queue.Context;

            if (OpenCLTools.ParseVersionString(CommandQueue.Device.Platform.Version, 1) > new Version(1, 0))
                HookNotifier();

            //Trace.WriteLine("Create " + this + " in Thread(" + Thread.CurrentThread.ManagedThreadId + ").", "Information");
        }
示例#2
0
        internal OpenCLEvent(CLEventHandle handle, OpenCLCommandQueue queue)
        {
            Handle = handle;
            SetID(Handle.Value);

            CommandQueue = queue;
            Type         = (OpenCLCommandType)GetInfo <CLEventHandle, OpenCLEventInfo, int>(Handle, OpenCLEventInfo.CommandType, CL10.GetEventInfo);
            Context      = queue.Context;

            if (OpenCLTools.ParseVersionString(CommandQueue.Device.Platform.Version, 1) > new Version(1, 0))
            {
                HookNotifier();
            }

            //Trace.WriteLine("Create " + this + " in Thread(" + Thread.CurrentThread.ManagedThreadId + ").", "Information");
        }
        private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device)
        {
            var context = new OpenCLContext(new List<OpenCLDevice> {device}, new OpenCLContextPropertyList(platform), null, IntPtr.Zero);
            var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl");
            var kernel1 = program.CreateKernel("reductionVector");
            var kernel2 = program.CreateKernel("reductionComplete");

            const int numValues = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var globalWorkSize = numValues / numValuesPerWorkItem;
            const int localWorkSize = 32;
            var initialNumWorkGroups = globalWorkSize/localWorkSize;
            const int value = 42;
            var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();

            var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None);

            var floatType = typeof (float);
            var floatSize = sizeof (float);

            var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {numValues});
            var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {initialNumWorkGroups*numValuesPerWorkItem});
            var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 });
            var resultDataBuffer = dataBuffer2;

            using (var pinnedData = new PinnedObject(data))
            {
                commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues);
            }

            foreach (var index in Enumerable.Range(0, int.MaxValue))
            {
                var dataBufferIn = index%2 == 0 ? dataBuffer1 : dataBuffer2;
                var dataBufferOut = index%2 == 0 ? dataBuffer2 : dataBuffer1;
                resultDataBuffer = dataBufferOut;

                kernel1.SetMemoryArgument(0, dataBufferIn);
                kernel1.SetMemoryArgument(1, dataBufferOut);
                kernel1.SetLocalArgument(2, localWorkSize*numValuesPerWorkItem*floatSize);

                Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}");

                commandQueue.Execute(kernel1, null, new long[] {globalWorkSize}, new long[] {localWorkSize});

                globalWorkSize /= localWorkSize;
                if (globalWorkSize <= localWorkSize) break;
            }

            kernel2.SetMemoryArgument(0, resultDataBuffer);
            kernel2.SetLocalArgument(1, globalWorkSize*numValuesPerWorkItem*floatSize);
            kernel2.SetMemoryArgument(2, sumBuffer);

            Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}");

            commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize });

            commandQueue.Finish();

            var sum = new float[1];
            using (var pinnedSum = new PinnedObject(sum))
            {
                commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L);
            }

            const int correctAnswer = numValues * value;

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }