예제 #1
0
        /// <summary>
        /// Gets the values for an explicit input
        /// </summary>
        /// <param name="input">The explicit input</param>
        /// <param name="output">The output values</param>
        public void GetValues(Single3[] input, ref float[] output)
        {
            if (context == null || kernelExplicit == null)
            {
                throw new Exception("Compile first!");
            }

            int inputLength = input.Length;

            // IO length changed
            if (lastLength != inputLength)
            {
                lastLength   = inputLength;
                outputBuffer = new Cloo.ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, inputLength);
                kernelExplicit.SetMemoryArgument(2, outputBuffer);
            }

            // Setup IO Buffers
            ComputeBuffer <Single3> bufIn = new Cloo.ComputeBuffer <Single3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input);

            // Arrange params
            kernelExplicit.SetMemoryArgument(0, bufIn);

            // Exec and read
            queue.Execute(kernelExplicit, null, new long[] { input.Length }, null, null);
            GCHandle outHandle = GCHandle.Alloc(output, GCHandleType.Pinned);

            queue.Read <float>(outputBuffer, true, 0, inputLength, outHandle.AddrOfPinnedObject(), null); // Read saves about 500 - 1000 ticks. Sweet for small queues
            outHandle.Free();

            queue.Finish();
        }
예제 #2
0
        /// <summary>
        /// Copy a buffer to a float array.
        /// </summary>
        ///
        /// <param name="sourceBuffer">The source buffer.</param>
        /// <param name="target">The target array.</param>
        public void Buffer2Array(ComputeBuffer <float> sourceBuffer, float[] target)
        {
            GCHandle arrCHandle = GCHandle.Alloc(target, GCHandleType.Pinned);

            commands.Read(sourceBuffer, true, 0, target.Length, arrCHandle.AddrOfPinnedObject(), null);
            arrCHandle.Free();
        }
예제 #3
0
파일: Laser.cs 프로젝트: omarcin96/L3DS
        // Wartosci min, max po OpenCL.
        public void GetMinMaxValuesCL(Mat frame, out int[] maxValues, out int[] minValues, int windowValue)
        {
            maxValues = null;
            minValues = null;


            try
            {
                MinMaxCL.UpdateArguments(frame, clooCtx, ctxMinMaxKernel, windowValue);

                // execute kernel
                queue.Execute(ctxMinMaxKernel, null, new long[] { frame.Cols }, null, null);

                // max Values.
                maxValues = new int[frame.Cols];
                GCHandle maxHandle = GCHandle.Alloc(maxValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.maxBufferCB, true, 0, maxValues.Length, maxHandle.AddrOfPinnedObject(), null);

                // min Values.
                minValues = new int[frame.Cols];
                GCHandle minHandle = GCHandle.Alloc(minValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.minBufferCB, true, 0, minValues.Length, minHandle.AddrOfPinnedObject(), null);

                // end opencl compute.
                queue.Finish();
            } catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
예제 #4
0
    static void Main(string[] args)
    {
        int[] r1 = new int[]
        { 8, 2, 3, 4 };
        int[] r2 = new int[]
        { 4, 3, 2, 5 };
        int[] r3      = new int[4];
        int   rowSize = r1.Length;
        // pick first platform
        ComputePlatform platform = ComputePlatform.Platforms[0];
        // create context with all gpu devices
        ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
                                                    new ComputeContextPropertyList(platform), null, IntPtr.Zero);
        // create a command queue with first gpu found
        ComputeCommandQueue queue = new ComputeCommandQueue(context,
                                                            context.Devices[0], ComputeCommandQueueFlags.None);
        // load opencl source and
        // create program with opencl source
        ComputeProgram program = new ComputeProgram(context, CalculateKernel);

        // compile opencl source
        program.Build(null, null, null, IntPtr.Zero);
        // load chosen kernel from program
        ComputeKernel kernel = program.CreateKernel("Calc");
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> row1Buffer = new ComputeBuffer <int>(context,
                                                                 ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> row2Buffer = new ComputeBuffer <int>(context,
                                                                 ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> resultBuffer = new ComputeBuffer <int>(context,
                                                                   ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]);

        kernel.SetMemoryArgument(0, row1Buffer);   // set the integer array
        kernel.SetMemoryArgument(1, row2Buffer);   // set the integer array
        kernel.SetValueArgument(2, rowSize);       // set the array size
        kernel.SetMemoryArgument(3, resultBuffer); // set the integer array
        // execute kernel
        queue.ExecuteTask(kernel, null);
        // wait for completion
        queue.Finish();
        GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned);

        queue.Read <int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null);
        Console.WriteLine("display result from gpu buffer:");
        for (int i = 0; i < r3.Length; i++)
        {
            Console.WriteLine(r3[i]);
        }
        arrCHandle.Free();
        row1Buffer.Dispose();
        row2Buffer.Dispose();
        kernel.Dispose();
        program.Dispose();
        queue.Dispose();
        context.Dispose();
        Console.WriteLine("Finished");
        Console.ReadKey();
    }
예제 #5
0
        public static Tuple <List <List <int> >, TimeSpan> MultiplyParallel(List <List <int> > matrixOne, List <List <int> > matrixTwo)
        {
            if (!isRegularMatrix(matrixOne) || !isRegularMatrix(matrixTwo))
            {
                throw new ArgumentException("Non regular matrix detected. Rows size mismatch detected.");
            }
            if (matrixOne[0].Count != matrixTwo.Count)
            {
                throw new ArgumentException("Matrixes is not compatible. Columns count of first matrix is not equal to rows count of second matrix.");
            }
            List <List <int> > result   = new List <List <int> >();
            ComputePlatform    platform = GetGPU();

            if (platform is null)
            {
                throw new PlatformNotSupportedException("Platform doesn't have a dedicated GPU. Run is impossible.");
            }
            ComputeContext      context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
            ComputeCommandQueue queue   = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
            ComputeProgram      program = new ComputeProgram(context, CalculateKernel);

            program.Build(null, null, null, IntPtr.Zero);
            ComputeKernel kernel = program.CreateKernel("Multiply");

            List <ComputeBuffer <int> > rowsMatrixOne    = matrixOne.TransformMatrixToComputerBuffersOfRows(context);
            List <ComputeBuffer <int> > columnsMatrixTwo = matrixTwo.TransformMatrixToComputerBuffersOfColumns(context);
            List <ComputeBuffer <int> > resultRowsMatrix = TwoDToOneDResult(matrixOne.Count, matrixTwo[0].Count, context);

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            for (int i = 0; i < resultRowsMatrix.Count; ++i)
            {
                for (int j = 0; j < resultRowsMatrix[i].Count; ++j)
                {
                    kernel.SetMemoryArgument(0, rowsMatrixOne[i]);
                    kernel.SetMemoryArgument(1, columnsMatrixTwo[j]);
                    kernel.SetMemoryArgument(2, resultRowsMatrix[i]);
                    kernel.SetValueArgument(3, matrixTwo.Count);
                    kernel.SetValueArgument(4, j);

                    queue.ExecuteTask(kernel, null);
                }
            }

            queue.Finish();
            stopwatch.Stop();

            for (int i = 0; i < resultRowsMatrix.Count; ++i)
            {
                int[]    res      = new int[resultRowsMatrix[i].Count];
                GCHandle gCHandle = GCHandle.Alloc(res, GCHandleType.Pinned);
                queue.Read <int>(resultRowsMatrix[i], true, 0, res.Length, gCHandle.AddrOfPinnedObject(), null);
                result.Add(new List <int>(res));
            }

            return(new Tuple <List <List <int> >, TimeSpan>(result, stopwatch.Elapsed));
        }
예제 #6
0
        private TFP[] RunKernalTest(float num, ComputeBuffer <TFP> result, ComputeKernel kernel)
        {
            kernel.SetMemoryArgument(0, result);
            kernel.SetValueArgument(1, num);

            // BUG: ATI Stream v2.2 crash if event list not null.
            commands.Execute(kernel, null, new long[] { 1, 1 }, null, events);
            //commands.Execute(kernel, null, new long[] { count }, null, null);

            TFP[] myresult = new TFP[1];

            GCHandle arrCHandle = GCHandle.Alloc(myresult, GCHandleType.Pinned);

            commands.Read(result, true, 0, 1, arrCHandle.AddrOfPinnedObject(), events);

            arrCHandle.Free();
            return(myresult);
        }
예제 #7
0
        public float[] MultiplyMatrices(float[] matrix1, float[] matrix2,
                                        int matrix1Height, int matrix1WidthMatrix2Height, int matrix2Width)
        {
            if (!_initialized)
            {
                Initialize();
                _initialized = true;
            }

            ComputeBuffer <float> matrix1Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix1);

            _kernel.SetMemoryArgument(0, matrix1Buffer);

            ComputeBuffer <float> matrix2Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix2);

            _kernel.SetMemoryArgument(1, matrix2Buffer);

            float[] ret = new float[matrix1Height * matrix2Width];
            ComputeBuffer <float> retBuffer = new ComputeBuffer <float>(_context,
                                                                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                                                                        ret);

            _kernel.SetMemoryArgument(2, retBuffer);

            _kernel.SetValueArgument <int>(3, matrix1WidthMatrix2Height);
            _kernel.SetValueArgument <int>(4, matrix2Width);

            _commandQueue.Execute(_kernel,
                                  new long[] { 0 },
                                  new long[] { matrix2Width, matrix1Height },
                                  null, null);

            unsafe
            {
                fixed(float *retPtr = ret)
                {
                    _commandQueue.Read(retBuffer,
                                       false, 0,
                                       ret.Length,
                                       new IntPtr(retPtr),
                                       null);

                    _commandQueue.Finish();
                }
            }

            matrix1Buffer.Dispose();
            matrix2Buffer.Dispose();
            retBuffer.Dispose();

            return(ret);
        }
예제 #8
0
        private BitmapSource ReadImageDataFromGPUMemory()
        {
            unsafe
            {
                fixed(float *imgPtr = NextImageVector)
                {
                    CQ.Read(NextImage, true, new SysIntX3(0, 0, 0), new SysIntX3(Width, Height, 1), Width * 4 * sizeof(float), 0, (IntPtr)imgPtr, null);
                }
            }

            return(BitmapSource.Create(Width, Height, 96, 96, PixelFormats.Rgba128Float, null, NextImageVector, (PixelFormats.Rgba128Float.BitsPerPixel / 8) * Width));
        }
예제 #9
0
        public void DoubleSumTest()
        {
            var    builder = new OpenCL200Factory();
            string text    = File.ReadAllText("Examples/SumTest.cl");
            int    count   = 2000;
            var    a       = new double[count];
            var    b       = new double[count];
            var    ab      = new double[count];

            for (int i = 0; i < count; i++)
            {
                a[i] = i / 10.0;
                b[i] = -i / 9.0;
            }
            var Properties = new List <ComputeContextProperty>
            {
                new ComputeContextProperty(ComputeContextPropertyName.Platform, Device.Platform.Handle.Value)
            };

            using (var Context = builder.CreateContext(ComputeDeviceTypes.All, Properties, null, IntPtr.Zero))
            {
                using (var Program = builder.BuildComputeProgram(Context, text))
                {
                    var Devs = new List <IComputeDevice>()
                    {
                        Device
                    };
                    Program.Build(Devs, "", null, IntPtr.Zero);
                    var kernel = builder.CreateKernel(Program, "doubleVectorSum");
                    using (ComputeBuffer <double>
                           varA = new ComputeBuffer <double>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, a),
                           varB = new ComputeBuffer <double>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, b))
                    {
                        kernel.SetMemoryArgument(0, varA);
                        kernel.SetMemoryArgument(1, varB);
                        using (var Queue = new ComputeCommandQueue(Context, Device, ComputeCommandQueueFlags.None))
                        {
                            Queue.Execute(kernel, null, new long[] { count }, null, null);
                            ab = Queue.Read(varA, true, 0, count, null);
                        }
                    }
                }
            }
            for (int i = 0; i < count; i++)
            {
                Assert.AreEqual(-i / 90.0, ab[i], 1E-13);
            }
        }
예제 #10
0
        public void OpenClMul()
        {
            //ѡȡ�豸
            var platform   = ComputePlatform.Platforms.FirstOrDefault();
            var device     = platform.Devices.FirstOrDefault();
            var properties = new ComputeContextPropertyList(platform);
            var context    = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero);
            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0],
                                                                   ComputeCommandQueueFlags.None);
            var code    = File.ReadAllText(@"demos\cls\matrix_mul.cl");
            var program = new ComputeProgram(context, code);

            try
            {
                program.Build(new [] { device }, null, null, IntPtr.Zero);
            }
            catch (Exception ex)
            {
                throw;
            }
            var kernel = program.CreateKernel("MatrixMul");
            int rank   = Rank;
            var result = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, rank * rank);
            var matrix = CreateMatrix(context, rank);

            kernel.SetMemoryArgument(0, result);
            kernel.SetMemoryArgument(1, matrix);
            kernel.SetValueArgument(2, rank);
            Console.WriteLine($"Platform: {platform.Name}\n Device: {device.Name}\n Size: {rank}x{rank}");
            Stopwatch sw = Stopwatch.StartNew();

            commands.Execute(kernel, null, new long[] { rank, rank }, null, null);
            int[] resultArray = new int[rank * rank];
            var   arrHandle   = GCHandle.Alloc(resultArray, GCHandleType.Pinned);

            commands.Read(result, true, 0, rank * rank, arrHandle.AddrOfPinnedObject(), null);
            var elapsed = sw.Elapsed;

            Console.WriteLine($"using: {elapsed.TotalMilliseconds} ms\n");
            arrHandle.Free();
            kernel.Dispose();
        }
예제 #11
0
        private TFP[] RunKernal(int unit, int w, int h, int cx, int cy, float originx, float originy, int bufferSize, ComputeBuffer <TFP> points, ComputeKernel kernel)
        {
            kernel.SetMemoryArgument(0, points);
            kernel.SetValueArgument(1, unit);
            kernel.SetValueArgument(2, w);
            kernel.SetValueArgument(3, cx);
            kernel.SetValueArgument(4, cy);
            kernel.SetValueArgument(5, originx);
            kernel.SetValueArgument(6, originy);

            // BUG: ATI Stream v2.2 crash if event list not null.
            commands.Execute(kernel, null, new long[] { w, h }, null, events);
            //commands.Execute(kernel, null, new long[] { count }, null, null);

            TFP[]    pointsArray = new TFP[bufferSize];
            GCHandle arrCHandle  = GCHandle.Alloc(pointsArray, GCHandleType.Pinned);

            commands.Read(points, true, 0, bufferSize, arrCHandle.AddrOfPinnedObject(), events);

            arrCHandle.Free();
            return(pointsArray);
        }
예제 #12
0
        public static void Run(TextWriter log, ComputeContext context)
        {
            StartTest(log, "Vector addition test");

            try
            {
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();

                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                ComputeProgram program = new ComputeProgram(context, kernelSource);
                program.Build(null, null, null, IntPtr.Zero);
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                ICollection<ComputeEventBase> events = new Collection<ComputeEventBase>();

                // BUG: ATI Stream v2.2 crash if event list not null.
                commands.Execute(kernel, null, new long[] { count }, null, events);
                //commands.Execute(kernel, null, new long[] { count }, null, null);

                arrC = new float[count];
                GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

                commands.Read(c, true, 0, count, arrCHandle.AddrOfPinnedObject(), events);

                arrCHandle.Free();

                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }

            EndTest(log, "Vector addition test");
        }
예제 #13
0
        static float ComputeAverageGPUTime(
            ushort[] depthPixels,
            int width,
            float inverseRotatedFx,
            float rotatedCx,
            float inverseRotatedFy,
            float rotatedCy,
            Matrix bedTransformationM,
            Matrix bedTransformationb,
            Matrix floorTransformationM,
            Matrix floorTransformationb,
            int numberOfIterations)
        {
            // pick the device platform
            ComputePlatform intelGPU = ComputePlatform.Platforms.Where(n => n.Name.Contains("Intel")).First();

            ComputeContext context = new ComputeContext(
                ComputeDeviceTypes.Gpu,                   // use the gpu
                new ComputeContextPropertyList(intelGPU), // use the intel openCL platform
                null,
                IntPtr.Zero);

            // the command queue is the, well, queue of commands sent to the "device" (GPU)
            ComputeCommandQueue commandQueue = new ComputeCommandQueue(
                context,                        // the compute context
                context.Devices[0],             // first device matching the context specifications
                ComputeCommandQueueFlags.None); // no special flags

            string kernelSource = null;

            using (StreamReader sr = new StreamReader("kernel.cl"))
            {
                kernelSource = sr.ReadToEnd();
            }

            // create the "program"
            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });

            // compile.
            program.Build(null, null, null, IntPtr.Zero);
            ComputeKernel kernel = program.CreateKernel("ComputePoints");

            Point3D[] outProjectivePoints = new Point3D[depthPixels.Length];
            Point3D[] outRealPoints       = new Point3D[depthPixels.Length];
            Point3D[] outBedPoints        = new Point3D[depthPixels.Length];
            Point3D[] outFloorPoints      = new Point3D[depthPixels.Length];

            float[] affines = new float[24];

            // do bed affines first because that's what assembly code expects
            int z = 0;

            for (int b = 0; b < 3; b++)
            {
                for (int c = 0; c < 3; c++)
                {
                    affines[z++] = bedTransformationM[b, c];
                }
                affines[z++] = bedTransformationb[b, 0];
            }

            // do floor affines next because that's what assembly code expects
            for (int b = 0; b < 3; b++)
            {
                for (int c = 0; c < 3; c++)
                {
                    affines[z++] = floorTransformationM[b, c];
                }
                affines[z++] = floorTransformationb[b, 0];
            }

            ComputeBuffer <float> affinesBuffer = new ComputeBuffer <float>(context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer,
                                                                            affines);

            kernel.SetMemoryArgument(1, affinesBuffer);

            ComputeBuffer <Point3D> projectivePointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                         ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                         outProjectivePoints);

            kernel.SetMemoryArgument(2, projectivePointsBuffer);

            ComputeBuffer <Point3D> realPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                   ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                   outBedPoints);

            kernel.SetMemoryArgument(3, realPointsBuffer);

            ComputeBuffer <Point3D> bedPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                  ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                  outFloorPoints);

            kernel.SetMemoryArgument(4, projectivePointsBuffer);

            ComputeBuffer <Point3D> floorPointsBuffer = new ComputeBuffer <Point3D>(context,
                                                                                    ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                    outRealPoints);

            kernel.SetMemoryArgument(5, realPointsBuffer);

            kernel.SetValueArgument <int>(6, width);
            kernel.SetValueArgument <float>(7, inverseRotatedFx);
            kernel.SetValueArgument <float>(8, rotatedCx);
            kernel.SetValueArgument <float>(9, inverseRotatedFy);
            kernel.SetValueArgument <float>(10, rotatedCy);

            Stopwatch sw = new Stopwatch();

            sw.Start();
            for (int c = 0; c < numberOfIterations; c++)
            {
                ComputeBuffer <ushort> depthPointsBuffer = new ComputeBuffer <ushort>(context,
                                                                                      ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer,
                                                                                      depthPixels);
                kernel.SetMemoryArgument(0, depthPointsBuffer);

                commandQueue.Execute(kernel, new long[] { 0 }, new long[] { depthPixels.Length }, null, null);

                unsafe
                {
                    fixed(Point3D *projectivePointsPtr = outProjectivePoints)
                    {
                        fixed(Point3D *realPointsPtr = outRealPoints)
                        {
                            fixed(Point3D *bedPointsPtr = outBedPoints)
                            {
                                fixed(Point3D *floorPointsPtr = outFloorPoints)
                                {
                                    commandQueue.Read(projectivePointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(projectivePointsPtr), null);
                                    commandQueue.Read(realPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(realPointsPtr), null);
                                    commandQueue.Read(bedPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(bedPointsPtr), null);
                                    commandQueue.Read(floorPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(floorPointsPtr), null);
                                    commandQueue.Finish();
                                }
                            }
                        }
                    }
                }
            }
            sw.Stop();

            return(sw.ElapsedMilliseconds / (numberOfIterations * 1.0f));
        }
예제 #14
0
        // 26 ms 4096x4096@512 iter with 1024 cores
        static long Method05(float ymin, float xmin, float width, int[] message)
        {
            // pick first platform
            ComputePlatform platform = ComputePlatform.Platforms[0];

            // create context with all gpu devices
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
                                                        new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            // create a command queue with first gpu found
            ComputeCommandQueue queue = new ComputeCommandQueue(context,
                                                                context.Devices[0], ComputeCommandQueueFlags.None);

            // load opencl source
            StreamReader streamReader = new StreamReader("Mandel3.cl");
            string       clSource     = streamReader.ReadToEnd();

            streamReader.Close();

            // create program with opencl source
            ComputeProgram program = new ComputeProgram(context, clSource);

            // compile opencl source
            program.Build(null, null, null, IntPtr.Zero);

            // load chosen kernel from program
            ComputeKernel kernel = program.CreateKernel("mandel");

            int messageSize = message.Length;

            // allocate a memory buffer with the message
            ComputeBuffer <int> messageBuffer = new ComputeBuffer <int>(context,
                                                                        ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message);

            kernel.SetMemoryArgument(0, messageBuffer);
            kernel.SetValueArgument(1, N);
            kernel.SetValueArgument(2, ymin);
            kernel.SetValueArgument(3, xmin);
            kernel.SetValueArgument(4, width);
            kernel.SetValueArgument(5, MaxIter);

            var watch = System.Diagnostics.Stopwatch.StartNew();

            // Execute kernel
            //queue.ExecuteTask(kernel, null);
            //queue.Execute(kernel, new long[] { 0, 0, 0, 0 }, new long[] { 8, 8 }, new long[] { 8, 8 }, null);
            for (var i = 0; i < N / 32; ++i)
            {
                for (var j = 0; j < N / 32; ++j)
                {
                    queue.Execute(kernel, new long[] { i *32, j *32 }, new long[] { 32, 32 }, null, null);
                }
            }


            // Read data back
            unsafe
            {
                fixed(int *retPtr = message)
                {
                    queue.Read(messageBuffer,
                               false, 0,
                               messageSize,
                               new IntPtr(retPtr),
                               null);

                    queue.Finish();
                }
            }

            watch.Stop();
            return(watch.ElapsedMilliseconds);
        }
예제 #15
0
        static void GenerateMandelBrot()
        {
            var nWidth = width / N;

            if (useDoublePrecision)
            {
                kernel.SetValueArgument(4, ymin);
                kernel.SetValueArgument(5, xmin);
                kernel.SetValueArgument(6, nWidth);
            }
            else
            {
                kernel.SetValueArgument(4, (float)ymin);
                kernel.SetValueArgument(5, (float)xmin);
                kernel.SetValueArgument(6, (float)nWidth);
            }
            kernel.SetValueArgument(7, maxIter);
            kernel.SetValueArgument(8, coclorCycle);

            coclorCycle += colorCycleFrameIncrement;

            Stopwatch watch = new Stopwatch();

            watch.Start();

            // Execute kernel
            queue.Execute(kernel, new long[] { 0, 0 }, new long[] { N, N }, null, null);

            // Read data back
            unsafe
            {
                fixed(int *retPtr = message)
                {
                    queue.Read(messageBuffer,
                               false, 0,
                               messageSize,
                               new IntPtr(retPtr),
                               null);

                    queue.Finish();
                }
            }

            watch.Stop();
            debugLabel.Content = $"{watch.ElapsedMilliseconds} ms";

            // Write to bitmap
            try
            {
                // Reserve the back buffer for updates.
                writeableBitmap.Lock();

                unsafe
                {
                    IntPtr buffer = writeableBitmap.BackBuffer;
                    System.Runtime.InteropServices.Marshal.Copy(message, 0, buffer, (int)(writeableBitmap.Height * writeableBitmap.Width));
                }

                // Specify the area of the bitmap that changed.
                writeableBitmap.AddDirtyRect(new Int32Rect(0, 0, (int)writeableBitmap.Width, (int)writeableBitmap.Height));
            }
            finally
            {
                // Release the back buffer and make it available for display.
                writeableBitmap.Unlock();
            }
        }
예제 #16
0
 public void ReadResult()
 {
     clCommands.Read(cbuf_Result, true, 0, width * height * 4, gc_resultBuffer.AddrOfPinnedObject(), null);
     clCommands.Finish();
 }
예제 #17
0
        /// <summary>
        /// Creates the Julia image using the GPU.
        /// </summary>
        /// <param name="width">The width.</param>
        /// <param name="height">The height.</param>
        /// <returns>An image containing the Julia set.</returns>
        private Image CreateGPU(int width, int height)
        {
            Bitmap image;

            // Initialize the position of the set.
            float realLeft        = -1.5f;
            float realRight       = 1.5f;
            float imaginaryBottom = -1.2f;
            float imaginaryTop    = imaginaryBottom + (realRight - realLeft) * height / width;

            /* Compute factors for translating from the imaginary plane to the
             * Cartesian plane. */
            float realFactor      = (realRight - realLeft) / (width - 1);
            float imaginaryFactor = (imaginaryTop - imaginaryBottom) / (height - 1);

            // Set the number of iterations to check for values in the set.
            uint maxIterations = MAX_ITERATIONS;

            // Create a buffer to for kernel output.
            ComputeBuffer <char> kernelOutput = new ComputeBuffer <char>(
                context, ComputeMemoryFlags.WriteOnly, width * height * 4);

            // Set arguments for the kernel.
            kernel.SetValueArgument <float>(0, realFactor);
            kernel.SetValueArgument <float>(1, imaginaryFactor);
            kernel.SetValueArgument <float>(2, realLeft);
            kernel.SetValueArgument <float>(3, imaginaryBottom);
            kernel.SetValueArgument <float>(4, imaginaryTop);
            kernel.SetValueArgument <uint>(5, maxIterations);
            kernel.SetValueArgument <int>(6, width);
            kernel.SetMemoryArgument(7, kernelOutput);

            // TODO: Scale work group and work item sizes to fit the resolution.
            commands.Execute(kernel, null, new long[] { width, height },
                             null, events);

            // Create a pinned buffer for kernel output.
            byte[]   kernelResult       = new byte[width * height * 4];
            GCHandle kernelResultHandle = GCHandle.Alloc(kernelResult,
                                                         GCHandleType.Pinned);

            // Copy the kernel result into the pinned buffer.
            commands.Read(kernelOutput, false, 0, width * height * 4,
                          kernelResultHandle.AddrOfPinnedObject(), events);
            commands.Finish();

            // Free the pinned handle.
            kernelResultHandle.Free();

            unsafe
            {
                fixed(byte *pKernelResult = kernelResult)
                {
                    IntPtr intPtr = new IntPtr((void *)pKernelResult);

                    image = new Bitmap(width, height, width * 4,
                                       PixelFormat.Format32bppArgb, intPtr);
                }
            }

            return(image);
        }
예제 #18
0
        private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr)
        {
            uint[] dst = new uint[16];

            fixed (uint* dstPtr = dst)
            {
                using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None))
                {
                    var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16);

                    var kernel = program.CreateKernel("test");
                    kernel.SetValueArgument(0, 1443351125U);
                    kernel.SetMemoryArgument(1, buf);

                    var eventList = new ComputeEventList();

                    queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null);
                    queue.Finish();
                    queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null);
                    queue.Finish();
                    queue.Finish();
                }
            }
        }
예제 #19
0
        protected override void RunInternal()
        {
            int count = 10;
            float[] arrA = new float[count];
            float[] arrB = new float[count];
            float[] arrC = new float[count];

            Random rand = new Random();

            for (int i = 0; i < count; i++)
            {
                arrA[i] = (float)(rand.NextDouble() * 100);
                arrB[i] = (float)(rand.NextDouble() * 100);
            }

            ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
            ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
            ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });
            program.Build(null, null, null, IntPtr.Zero);

            ComputeKernel kernel = program.CreateKernel("VectorAdd");
            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            ComputeEventList events = new ComputeEventList();

            commands.Execute(kernel, null, new long[] { count }, null, events);

            arrC = new float[count];
            GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

            commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events);
            commands.Finish();

            arrCHandle.Free();

            for (int i = 0; i < count; i++)
                Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
        }
예제 #20
0
 private unsafe void ReadFromDeviceTo(void* p, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events)
 {
     if (CreatedFromGLBuffer && (!AcquiredInOpenCL)) throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables");
     CQ.Read((ComputeImage)VarPointer, BlockingRead, new SysIntX3(0, 0, 0), new SysIntX3(width, height, 1), 0, 0, new IntPtr(p), events);
 }
예제 #21
0
 private unsafe void ReadFromDeviceTo(void* p, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events)
 {
     CQ.Read((ComputeImage)VarPointer, BlockingRead, new SysIntX3(0, 0, 0), new SysIntX3(width, height, 1), 0, 0, new IntPtr(p), events);
 }
예제 #22
0
        public void Compute_cl(string imageFile, string dst)
        {
            //选取设备
            var platform = ComputePlatform.Platforms.FirstOrDefault();
            var device   = platform.Devices.FirstOrDefault();
            //设置相关上下文
            var properties = new ComputeContextPropertyList(platform);
            var context    = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero);
            //命令队列,用于控制执行的代码
            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0],
                                                                   ComputeCommandQueueFlags.None);
            //读取opencl代码
            var code = File.ReadAllText(@"gaussianblur.cl");
            //编译
            var program = new ComputeProgram(context, code);

            try
            {
                program.Build(new[] { device }, null, null, IntPtr.Zero);
            }
            catch (Exception ex)
            {
                throw;
            }

            var images = CreateImageFromBitmap(imageFile, context,
                                               ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer);

            //创建核心代码,就是cl代码中以kernel标识,函数签名为MatrixMul的函数
            var kernel = program.CreateKernel("gaussian_blur");
            //矩阵规模
            //储存计算结果的数组

            //创建的核心代码函数以这种方式来传参
            var resultBuffer = new ComputeBuffer <char>(context, ComputeMemoryFlags.WriteOnly, dstBytes.Length);

            kernel.SetMemoryArgument(0, images);
            kernel.SetMemoryArgument(1, resultBuffer);
            kernel.SetMemoryArgument(2, new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, _matrix));
            kernel.SetValueArgument(3, Radius);
            kernel.SetValueArgument(4, (int)images.Width);
            Console.WriteLine($"\n运行平台: {platform.Name}\n运行设备: {device.Name}");
            Stopwatch sw    = Stopwatch.StartNew();
            var       climg = images;

            Console.WriteLine($"处理图片尺寸:{climg.Width}*{climg.Height}");

            //执行代码
            commands.Execute(kernel, null, new long[] { climg.Width, climg.Height }, null, null);

            //read data
            char[] resultArray = new char[dstBytes.Length];
            var    arrHandle   = GCHandle.Alloc(resultArray, GCHandleType.Pinned);

            commands.Read(resultBuffer, true, 0, dstBytes.Length, arrHandle.AddrOfPinnedObject(), null);
            //commands.ReadFromImage(images.Item2, processeddata.Scan0, true, null);

            var resultHandle = GCHandle.Alloc(resultArray, GCHandleType.Pinned);
            var bmp          = new Bitmap(climg.Width, climg.Height, climg.Width * 4, PixelFormat.Format32bppArgb, resultHandle.AddrOfPinnedObject());
            var elapsed      = sw.Elapsed;

            Console.WriteLine($"OpenCL处理耗时: {elapsed.TotalMilliseconds} ms\n");
            kernel.Dispose();

            bmp.Save(dst);
            arrHandle.Free();
        }
예제 #23
0
 /// <summary>Reads variable from device.</summary>
 /// <param name="Values">Values to store data coming from device</param>
 /// <param name="CQ">Command queue to use</param>
 /// <param name="BlockingRead">TRUE to return only after completed reading.</param>
 /// <param name="events">OpenCL Event associated with this operation</param>
 public void ReadFromDeviceTo(byte[] Values, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events)
 {
     if (Values.Length != OriginalVarLength) throw new Exception("Values length should be the same as allocated length");
     if (CreatedFromGLBuffer && (!AcquiredInOpenCL)) throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables");
     unsafe
     {
         fixed (void* ponteiro = Values)
         {
             CQ.Read<byte>((ComputeBuffer<byte>)VarPointer, BlockingRead, 0, Values.Length, (IntPtr)ponteiro, events);
         }
     }
 }