Beispiel #1
0
        private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device)
        {
            var context = new OpenCLContext(new List <OpenCLDevice> {
                device
            }, new OpenCLContextPropertyList(platform), null, IntPtr.Zero);
            var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl");
            var kernel1 = program.CreateKernel("reductionVector");
            var kernel2 = program.CreateKernel("reductionComplete");

            const int numValues            = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var       globalWorkSize       = numValues / numValuesPerWorkItem;
            const int localWorkSize        = 32;
            var       initialNumWorkGroups = globalWorkSize / localWorkSize;
            const int value = 42;
            var       data  = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();

            var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None);

            var floatType = typeof(float);
            var floatSize = sizeof(float);

            var dataBuffer1      = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { numValues });
            var dataBuffer2      = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { initialNumWorkGroups *numValuesPerWorkItem });
            var sumBuffer        = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 });
            var resultDataBuffer = dataBuffer2;

            using (var pinnedData = new PinnedObject(data))
            {
                commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues);
            }

            foreach (var index in Enumerable.Range(0, int.MaxValue))
            {
                var dataBufferIn  = index % 2 == 0 ? dataBuffer1 : dataBuffer2;
                var dataBufferOut = index % 2 == 0 ? dataBuffer2 : dataBuffer1;
                resultDataBuffer = dataBufferOut;

                kernel1.SetMemoryArgument(0, dataBufferIn);
                kernel1.SetMemoryArgument(1, dataBufferOut);
                kernel1.SetLocalArgument(2, localWorkSize * numValuesPerWorkItem * floatSize);

                Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}");

                commandQueue.Execute(kernel1, null, new long[] { globalWorkSize }, new long[] { localWorkSize });

                globalWorkSize /= localWorkSize;
                if (globalWorkSize <= localWorkSize)
                {
                    break;
                }
            }

            kernel2.SetMemoryArgument(0, resultDataBuffer);
            kernel2.SetLocalArgument(1, globalWorkSize * numValuesPerWorkItem * floatSize);
            kernel2.SetMemoryArgument(2, sumBuffer);

            Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}");

            commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize });

            commandQueue.Finish();

            var sum = new float[1];

            using (var pinnedSum = new PinnedObject(sum))
            {
                commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L);
            }

            const int correctAnswer = numValues * value;

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }
        private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device)
        {
            var context = new OpenCLContext(new List<OpenCLDevice> {device}, new OpenCLContextPropertyList(platform), null, IntPtr.Zero);
            var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl");
            var kernel1 = program.CreateKernel("reductionVector");
            var kernel2 = program.CreateKernel("reductionComplete");

            const int numValues = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var globalWorkSize = numValues / numValuesPerWorkItem;
            const int localWorkSize = 32;
            var initialNumWorkGroups = globalWorkSize/localWorkSize;
            const int value = 42;
            var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();

            var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None);

            var floatType = typeof (float);
            var floatSize = sizeof (float);

            var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {numValues});
            var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {initialNumWorkGroups*numValuesPerWorkItem});
            var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 });
            var resultDataBuffer = dataBuffer2;

            using (var pinnedData = new PinnedObject(data))
            {
                commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues);
            }

            foreach (var index in Enumerable.Range(0, int.MaxValue))
            {
                var dataBufferIn = index%2 == 0 ? dataBuffer1 : dataBuffer2;
                var dataBufferOut = index%2 == 0 ? dataBuffer2 : dataBuffer1;
                resultDataBuffer = dataBufferOut;

                kernel1.SetMemoryArgument(0, dataBufferIn);
                kernel1.SetMemoryArgument(1, dataBufferOut);
                kernel1.SetLocalArgument(2, localWorkSize*numValuesPerWorkItem*floatSize);

                Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}");

                commandQueue.Execute(kernel1, null, new long[] {globalWorkSize}, new long[] {localWorkSize});

                globalWorkSize /= localWorkSize;
                if (globalWorkSize <= localWorkSize) break;
            }

            kernel2.SetMemoryArgument(0, resultDataBuffer);
            kernel2.SetLocalArgument(1, globalWorkSize*numValuesPerWorkItem*floatSize);
            kernel2.SetMemoryArgument(2, sumBuffer);

            Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}");

            commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize });

            commandQueue.Finish();

            var sum = new float[1];
            using (var pinnedSum = new PinnedObject(sum))
            {
                commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L);
            }

            const int correctAnswer = numValues * value;

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }