public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                ComputeProgram program = new ComputeProgram(context, kernelSources);
                program.Build(null, null, null, IntPtr.Zero);
                log.WriteLine("Program successfully built.");
                ICollection<ComputeKernel> kernels = program.CreateAllKernels();
                log.WriteLine("Kernels successfully created.");

                // cleanup kernels
                foreach (ComputeKernel kernel in kernels)
                {
                    kernel.Dispose();
                }
                kernels.Clear();

                // cleanup program
                program.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Exemple #2
0
        public void Run(ComputeContext context, TextWriter log)
        {
            this.log = log;
            
            try
            {
                program = new ComputeProgram(context, clSource);
                program.Build(null, null, notify, IntPtr.Zero);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }

            // cleanup program
            program.Dispose();
        }
Exemple #3
0
        public static void Test()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            ComputeDevice device = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            ComputeProgram program = new ComputeProgram(context, source);
            program.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate");


            //Create arguments
            int sideSize = 4096;
            int[] inMatrixA = new int[sideSize * sideSize];
            int[] inMatrixB = new int[sideSize * sideSize];
            int[] outMatrixC = new int[sideSize * sideSize];
            Random random = new Random((int)DateTime.Now.Ticks);

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                    for (int x = 0; x < sideSize; x++)
                    {
                        inMatrixA[y * sideSize + x] = random.Next(3);
                        inMatrixB[y * sideSize + x] = random.Next(3);
                        outMatrixC[y * sideSize + x] = 0;
                    }


            ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixA);

            ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixB);

            ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, outMatrixC);

            long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);


            //Sets arguments
            kernel.SetMemoryArgument(0, bufferMatrixA);
            kernel.SetMemoryArgument(1, bufferMatrixB);
            kernel.SetMemoryArgument(2, bufferMatrixC);
            kernel.SetLocalArgument(3, sideSize * 2);
            kernel.SetValueArgument<int>(4, sideSize);
            //kernel.SetLocalArgument(1, localWorkSize);            

            string offset = " ";
            for (int x = 0; x < sideSize; x++)
                offset += "  ";

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    Console.Write(offset);
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixA[y * sideSize + x] + " ");
                    Console.WriteLine();
                }




            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(context,
                context.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(kernel, null,
                new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
                new long[] { localWorkSize, 1 }, null);

            //globalWorkSize can be any size
            //localWorkSize product much not be greater than device.MaxComputeUnits
            //and it must not be greater than kernel.GetWorkGroupSize()
            //ESSENTIALLY, the program iterates through globalWorkSize
            //in increments of localWorkSize. Both are multidimensional,
            //but this just saves us the time of doing that
            //(1 dimension can be put to multiple if the max dimension lengths
            //are known very easily with remainder).

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);

            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
            program.Dispose();

            Console.WriteLine();
            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixB[y * sideSize + x] + " ");
                    Console.Write(" ");
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(outMatrixC[y * sideSize + x] + " ");

                    Console.WriteLine();
                }

            int testY = random.Next(sideSize);
            int testX = random.Next(sideSize);

            int sum = 0;
            for (int q = 0; q < sideSize; q++)
                sum += inMatrixA[q * sideSize + testX] *
                    inMatrixB[testY * sideSize + q];

            Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);

            Console.WriteLine(executionTime / 10000.0);

        }
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                
                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();
                
                // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                commands.Execute(kernel, null, new long[] { count }, null, eventList);
                
                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commands.ReadFromBuffer(c, ref arrC, false, eventList);

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                // to finish has to be issued before "arrC" can be used. 
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                //eventList.Wait();

                // 2) Or simply use
                commands.Finish();

                // Print the results to a log/console.
                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);

                // cleanup commands
                commands.Dispose();

                // cleanup events
                foreach (ComputeEventBase eventBase in eventList)
                {
                    eventBase.Dispose();
                }
                eventList.Clear();

                // cleanup kernel
                kernel.Dispose();

                // cleanup program
                program.Dispose();

                // cleanup buffers
                a.Dispose();
                b.Dispose();
                c.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }