Example #1
0
        private static void RunKernel(Context context, Device device)
        {
            const string resourceName = "RunKernel.sum.cl";

            var source = ProgramUtils.GetProgramSourceFromResource(Assembly.GetExecutingAssembly(), resourceName);
            var program = ProgramUtils.BuildProgramForDevice(context, device, source);

            ErrorCode errorCode;
            var kernel = Cl.CreateKernel(program, "sum", out errorCode);
            errorCode.Check("CreateKernel");

            const int size = 1024;

            var floatsA = Enumerable.Range(1, size).Select(n => (float)n).ToArray();
            var floatsB = Enumerable.Range(1, size).Select(n => (float)n).ToArray();
            var floatsC = new float[size];

            using (var mem1 = new PinnedArrayOfStruct<float>(context, floatsA))
            using (var mem2 = new PinnedArrayOfStruct<float>(context, floatsB))
            using (var mem3 = new PinnedArrayOfStruct<float>(context, floatsC, MemMode.WriteOnly))
            {
                KernelRunner.RunKernel(context, device, kernel, size, new[] { 2 }, mem1, mem2, mem3);
            }

            Console.WriteLine($"floatsC[0]: {floatsC[0]}");
            Console.WriteLine($"floatsC[1023]: {floatsC[1023]}");
        }
Example #2
0
        private static void Reduction(Context context, Device device)
        {
            const string resourceName = "ReductionVectorComplete.reduction.cl";

            var source = ProgramUtils.GetProgramSourceFromResource(Assembly.GetExecutingAssembly(), resourceName);
            var program = ProgramUtils.BuildProgramForDevice(context, device, source);

            ErrorCode errorCode;

            var kernel1 = Cl.CreateKernel(program, "reductionVector", out errorCode);
            errorCode.Check("CreateKernelsInProgram");

            var kernel2 = Cl.CreateKernel(program, "reductionComplete", out errorCode);
            errorCode.Check("CreateKernelsInProgram");

            const int numValues = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var globalWorkSize = numValues/numValuesPerWorkItem;

            //var localWorkSize = Cl.GetKernelWorkGroupInfo(kernel1, device, KernelWorkGroupInfo.WorkGroupSize, out errorCode).CastTo<int>();
            //errorCode.Check("GetKernelWorkGroupInfo(KernelWorkGroupInfo.WorkGroupSize)");
            const int localWorkSize = 32;

            const int value = 42;
            const int correctAnswer = numValues * value;

            var data1 = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();
            var data2 = Enumerable.Repeat(0, globalWorkSize/localWorkSize*numValuesPerWorkItem).Select(n => (float)n).ToArray();
            var sum = new float[1];

            using (var memData1 = new PinnedArrayOfStruct<float>(context, data1, MemMode.ReadWrite))
            using (var memData2 = new PinnedArrayOfStruct<float>(context, data2, MemMode.WriteOnly))
            using (var memSum = new PinnedArrayOfStruct<float>(context, sum, MemMode.WriteOnly))
            {
                var commandQueue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.ProfilingEnable, out errorCode);
                errorCode.Check("CreateCommandQueue");

                var kernel1Events = new List<Event>();
                var memResult = memData2;

                errorCode = Cl.SetKernelArg<float>(kernel1, 2, localWorkSize * numValuesPerWorkItem);
                errorCode.Check("SetKernelArg(2)");

                foreach (var index in Enumerable.Range(0, int.MaxValue))
                {
                    var memDataIn = (index%2 == 0) ? memData1 : memData2;
                    var memDataOut = (index%2 == 0) ? memData2 : memData1;
                    memResult = memDataOut;

                    errorCode = Cl.SetKernelArg(kernel1, 0, memDataIn.Buffer);
                    errorCode.Check("SetKernelArg(0)");

                    errorCode = Cl.SetKernelArg(kernel1, 1, memDataOut.Buffer);
                    errorCode.Check("SetKernelArg(1)");

                    Console.WriteLine($"Calling EnqueueNDRangeKernel(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize/localWorkSize}");
                    Event e;
                    errorCode = Cl.EnqueueNDRangeKernel(
                        commandQueue,
                        kernel1,
                        1, // workDim
                        null, // globalWorkOffset
                        new[] { (IntPtr)globalWorkSize },
                        new[] { (IntPtr)localWorkSize },
                        0, // numEventsInWaitList
                        null, // eventWaitList
                        out e);
                    errorCode.Check("EnqueueNDRangeKernel");
                    kernel1Events.Add(e);

                    globalWorkSize /= localWorkSize;
                    if (globalWorkSize <= localWorkSize) break;
                }

                errorCode = Cl.SetKernelArg(kernel2, 0, memResult.Buffer);
                errorCode.Check("SetKernelArg(0)");

                errorCode = Cl.SetKernelArg<float>(kernel2, 1, localWorkSize * numValuesPerWorkItem);
                errorCode.Check("SetKernelArg(1)");

                errorCode = Cl.SetKernelArg(kernel2, 2, memSum.Buffer);
                errorCode.Check("SetKernelArg(2)");

                Console.WriteLine($"Calling EnqueueNDRangeKernel(kernel2) with globalWorkSize: {globalWorkSize}");
                Event kernel2Event;
                errorCode = Cl.EnqueueNDRangeKernel(
                    commandQueue,
                    kernel2,
                    1, // workDim
                    null, // globalWorkOffset
                    new[] { (IntPtr)globalWorkSize },
                    /*
                     * Force the use of a single work group by setting 'localWorkSize'
                     * to be the same as 'globalWorkSize'. Without doing this,
                     * "Experimental OpenCL 2.0 CPU Only Platform" seems to use 8 work groups,
                     * each with a local size of 1, which means that we end up with an incorrect
                     * result.
                     */
                    new[] { (IntPtr)globalWorkSize },
                    0, // numEventsInWaitList
                    null, // eventWaitList
                    out kernel2Event);
                errorCode.Check("EnqueueNDRangeKernel");

                Event readEvent;
                errorCode = Cl.EnqueueReadBuffer(
                    commandQueue,
                    memSum.Buffer,
                    Bool.False, // blockingRead
                    IntPtr.Zero, // offsetInBytes
                    (IntPtr)memSum.Size,
                    memSum.Handle,
                    0, // numEventsInWaitList
                    null, // eventWaitList
                    out readEvent);
                errorCode.Check("EnqueueReadBuffer");

                errorCode = Cl.Finish(commandQueue);
                errorCode.Check("Finish");

                var start1 = Cl.GetEventProfilingInfo(kernel1Events.First(), ProfilingInfo.Start, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.Start)");
                var end1 = Cl.GetEventProfilingInfo(kernel2Event, ProfilingInfo.End, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.End)");

                var start2 = Cl.GetEventProfilingInfo(readEvent, ProfilingInfo.Start, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.Start)");
                var end2 = Cl.GetEventProfilingInfo(readEvent, ProfilingInfo.End, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.End)");

                Console.WriteLine($"kernel1/kernel2 elapsed time: {end1 - start1:N0}ns");
                Console.WriteLine($"read buffer elapsed time: {end2 - start2:N0}ns");
            }

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }
Example #3
0
        private static void Reduction(Context context, Device device)
        {
            const string resourceName = "ReductionScalar.reduction.cl";

            var source = ProgramUtils.GetProgramSourceFromResource(Assembly.GetExecutingAssembly(), resourceName);
            var program = ProgramUtils.BuildProgramForDevice(context, device, source);

            ErrorCode errorCode;
            var kernel = Cl.CreateKernel(program, "reductionScalar", out errorCode);
            errorCode.Check("CreateKernel");

            const int numValues = 1024 * 1024;
            const int numValuesPerWorkItem = 1;
            const int globalWorkSize = numValues/numValuesPerWorkItem;
            var localWorkSize = Cl.GetKernelWorkGroupInfo(kernel, device, KernelWorkGroupInfo.WorkGroupSize, out errorCode).CastTo<int>();
            errorCode.Check("GetKernelWorkGroupInfo(KernelWorkGroupInfo.WorkGroupSize)");
            Console.WriteLine($"localWorkSize: {localWorkSize}");
            var numWorkGroups = globalWorkSize/localWorkSize;

            const int value = 42;
            const int correctAnswer = numValues*value;

            var data = Enumerable.Repeat(value, globalWorkSize).Select(n => (float) n).ToArray();
            var workGroupResults = new float[numWorkGroups*numValuesPerWorkItem];

            using (var mem1 = new PinnedArrayOfStruct<float>(context, data))
            using (var mem2 = new PinnedArrayOfStruct<float>(context, workGroupResults, MemMode.WriteOnly))
            {
                var commandQueue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.ProfilingEnable, out errorCode);
                errorCode.Check("CreateCommandQueue");

                errorCode = Cl.SetKernelArg(kernel, 0, mem1.Buffer);
                errorCode.Check("SetKernelArg(0)");

                errorCode = Cl.SetKernelArg<float>(kernel, 1, localWorkSize);
                errorCode.Check("SetKernelArg(1)");

                errorCode = Cl.SetKernelArg(kernel, 2, mem2.Buffer);
                errorCode.Check("SetKernelArg(2)");

                Event e1;
                errorCode = Cl.EnqueueNDRangeKernel(
                    commandQueue,
                    kernel,
                    1, // workDim
                    null, // globalWorkOffset
                    new[] {(IntPtr) globalWorkSize},
                    new[] {(IntPtr) localWorkSize},
                    0, // numEventsInWaitList
                    null, // eventWaitList
                    out e1);
                errorCode.Check("EnqueueNDRangeKernel");

                Event e2;
                errorCode = Cl.EnqueueReadBuffer(
                    commandQueue,
                    mem2.Buffer,
                    Bool.False, // blockingRead
                    IntPtr.Zero, // offsetInBytes
                    (IntPtr)mem2.Size,
                    mem2.Handle,
                    0, // numEventsInWaitList
                    null, // eventWaitList
                    out e2);
                errorCode.Check("EnqueueReadBuffer");

                var evs = new[] {e2};
                errorCode = Cl.WaitForEvents((uint)evs.Length, evs);
                errorCode.Check("WaitForEvents");

                var start1 = Cl.GetEventProfilingInfo(e1, ProfilingInfo.Start, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.Start)");
                var end1 = Cl.GetEventProfilingInfo(e1, ProfilingInfo.End, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.End)");

                var start2 = Cl.GetEventProfilingInfo(e2, ProfilingInfo.Start, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.Start)");
                var end2 = Cl.GetEventProfilingInfo(e2, ProfilingInfo.End, out errorCode).CastTo<long>();
                errorCode.Check("GetEventProfilingInfo(ProfilingInfo.End)");

                Console.WriteLine($"e1 elapsed time: {end1 - start1:N0}ns");
                Console.WriteLine($"e2 elapsed time: {end2 - start2:N0}ns");
            }

            var finalAnswer = Math.Truncate(workGroupResults.Sum());
            Console.WriteLine($"OpenCL final answer: {finalAnswer:N0}; Correct answer: {correctAnswer:N0}");
        }