示例#1
0
文件: Laser.cs 项目: omarcin96/L3DS
        // Wartosci min, max po OpenCL.
        public void GetMinMaxValuesCL(Mat frame, out int[] maxValues, out int[] minValues, int windowValue)
        {
            maxValues = null;
            minValues = null;


            try
            {
                MinMaxCL.UpdateArguments(frame, clooCtx, ctxMinMaxKernel, windowValue);

                // execute kernel
                queue.Execute(ctxMinMaxKernel, null, new long[] { frame.Cols }, null, null);

                // max Values.
                maxValues = new int[frame.Cols];
                GCHandle maxHandle = GCHandle.Alloc(maxValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.maxBufferCB, true, 0, maxValues.Length, maxHandle.AddrOfPinnedObject(), null);

                // min Values.
                minValues = new int[frame.Cols];
                GCHandle minHandle = GCHandle.Alloc(minValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.minBufferCB, true, 0, minValues.Length, minHandle.AddrOfPinnedObject(), null);

                // end opencl compute.
                queue.Finish();
            } catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
示例#2
0
        public float[] MultiplyMatricesZeroCopy(float[] matrix1, float[] matrix2,
                                                int matrix1Height, int matrix1WidthMatrix2Height, int matrix2Width)
        {
            if (!_initialized)
            {
                Initialize();
                _initialized = true;
            }

            ComputeBuffer <float> matrix1Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix1);

            _kernel.SetMemoryArgument(0, matrix1Buffer);

            ComputeBuffer <float> matrix2Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix2);

            _kernel.SetMemoryArgument(1, matrix2Buffer);

            float[] ret = new float[matrix1Height * matrix2Width];
            ComputeBuffer <float> retBuffer = new ComputeBuffer <float>(_context,
                                                                        ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer,
                                                                        ret);

            _kernel.SetMemoryArgument(2, retBuffer);

            _kernel.SetValueArgument <int>(3, matrix1WidthMatrix2Height);
            _kernel.SetValueArgument <int>(4, matrix2Width);

            _commandQueue.Execute(_kernel,
                                  new long[] { 0 },
                                  new long[] { matrix2Width, matrix1Height },
                                  null, null);

            IntPtr retPtr = _commandQueue.Map(
                retBuffer,
                true,
                ComputeMemoryMappingFlags.Read,
                0,
                ret.Length, null);

            _commandQueue.Unmap(retBuffer, ref retPtr, null);
            //_commandQueue.Finish();

            matrix1Buffer.Dispose();
            matrix2Buffer.Dispose();
            retBuffer.Dispose();

            return(ret);
        }
示例#3
0
        public void RunKernel(ComputeKernel kernel, int count)
        {
            int argOffset = _intComputeBuffers.Count + _floatComputeBuffers.Count;

            foreach (string key in _intArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _intArguments[key]);

                argOffset++;
            }

            foreach (string key in _floatArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _floatArguments[key]);

                argOffset++;
            }

            foreach (string key in _doubleArguments.Keys)
            {
                kernel.SetValueArgument(argOffset, _doubleArguments[key]);

                argOffset++;
            }

            _commands.Execute(kernel, count);

            _commands.Finish();
        }
示例#4
0
        private double[] MatrixMultiply(Matrix <double> L, Matrix <double> R)
        {
            var L_array = L.To1D();
            var R_array = R.To1D();
            var O_array = new double[L.M * R.N];

            ComputeBuffer <double> a = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, L_array);
            ComputeBuffer <double> b = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, R_array);
            ComputeBuffer <double> c = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, O_array.Length);

            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);
            kernel.SetValueArgument(3, L.N);
            kernel.SetValueArgument(4, L.M);
            kernel.SetValueArgument(5, R.N);
            kernel.SetValueArgument(6, R.M);

            commands.Execute(kernel, null, new long[] { R.N, L.M }, null, null);

            commands.ReadFromBuffer(c, ref O_array, true, null);

            commands.Finish();

            // cleanup buffers
            a.Dispose();
            b.Dispose();
            c.Dispose();
            return(O_array);
        }
示例#5
0
        public BitmapSource Diffuse()
        {
            CurrImage = NextImage;
            SetMemoryArguments();

            if (WithLocalMem)
            {
                CQ.Execute(DiffuseKernel, null, new long[] { Width, Height }, new long[] { 28, 8 }, null);
            }
            else
            {
                CQ.Execute(DiffuseKernel, null, new long[] { Width, Height }, null, null);
            }

            return(ReadImageDataFromGPUMemory());
        }
示例#6
0
        public override void Proccess()
        {
            // execute kernel
            queue.Execute(kernel, null, new long[] { DataGenerator.InputCount }, null, null);
            queue.Finish();

            /*
             * short[] results2 = new short[this.results.Length];
             * GCHandle arrCHandle = GCHandle.Alloc(results2, GCHandleType.Pinned);
             * queue.Read(result_dev, true, 0, DataFeeder.GetInputCount(), arrCHandle.AddrOfPinnedObject(), events);
             */

            //bool[] results2 = new bool[DataFeeder.GetInputCount()];
            queue.ReadFromBuffer(result_dev, ref resultsBytes, true, null);
            queue.ReadFromBuffer(resultCalc_dev, ref calculatables, true, null);
            //queue.ReadFromBuffer()

            /*
             * bool[] arrC = new bool[5];
             * GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);
             * queue.Read<bool>(result_dev, true, 0, 5, arrCHandle.AddrOfPinnedObject(), null);
             */
            // wait for completion
            //queue.Finish();

            //kernel.Dispose();
            //queue.Dispose();
            //context.Dispose();
        }
示例#7
0
        public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b)
        {
            InitCloo();

            var ncols = result.GetUpperBound(0) + 1;
            var nrows = result.GetUpperBound(1) + 1;

            fixed(double *rp = result, ap = a, bp = b)
            {
                ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap);
                ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp);
                ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length);

                kernel.SetMemoryArgument(0, aBuffer);
                kernel.SetMemoryArgument(1, bBuffer);
                kernel.SetValueArgument(2, ncols);
                kernel.SetMemoryArgument(3, rBuffer);

                ComputeEventList events = new ComputeEventList();

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                commands.Execute(kernel, null, new long[] { result.Length }, null, events);

                commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events);

                commands.Finish();
            }
        }
示例#8
0
        public uint FindProofOfWork(byte[] header, byte[] bits, uint nonceStart, uint iterations, out long elapsedMilliseconds)
        {
            this.stopwatch.Restart();

            using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header);
            using var bitsBuffer   = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits);
            using var powBuffer    = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1);

            this.computeKernel.SetMemoryArgument(0, headerBuffer);
            this.computeKernel.SetMemoryArgument(1, bitsBuffer);
            this.computeKernel.SetValueArgument(2, nonceStart);
            this.computeKernel.SetMemoryArgument(3, powBuffer);

            using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None);
            commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null);

            var nonceOut = new uint[1];

            commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null);
            commands.Finish();

            elapsedMilliseconds = this.stopwatch.ElapsedMilliseconds;

            return(nonceOut[0]);
        }
示例#9
0
        public string vectorSum()
        {
            string vecSum = @"
                __kernel void vectorSum(__global float *v1, __global float *v2, __global float *v3) {
                    int i = get_global_id(0);
                    v3[i] = v1[i] + v2[i];
                }
            ";
            int    size   = 100000;

            float[] v1_ = new float[size];
            float[] v2_ = new float[size];
            float[] v3_ = new float[size];
            for (var i = 0; i < size; i++)
            {
                v1_[i] = (float)i;
                v2_[i] = (float).5f;
            }
            var platform_ = ComputePlatform.Platforms[0];
            ComputeContextPropertyList properties = new ComputeContextPropertyList(platform_);
            ComputeContext             ctx        = new ComputeContext(ComputeDeviceTypes.Gpu, properties, null, IntPtr.Zero);
            ComputeCommandQueue        commands   = new ComputeCommandQueue(ctx, ctx.Devices[0], ComputeCommandQueueFlags.None);
            ComputeProgram             program    = new ComputeProgram(ctx, vecSum);

            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                Console.WriteLine("program build completed");
            }
            catch
            {
                string log = program.GetBuildLog(ctx.Devices[0]);
            }
            ComputeBuffer <float> v1, v2, v3;

            v1 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v1_);
            v2 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v2_);
            v3 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, v3_);
            long[] worker = { size };
            commands.WriteToBuffer(v1_, v1, false, null);
            commands.WriteToBuffer(v2_, v2, false, null);
            ComputeKernel sumKernal = program.CreateKernel("vectorSum");

            Console.WriteLine("kernal created");
            sumKernal.SetMemoryArgument(0, v1);
            sumKernal.SetMemoryArgument(1, v2);
            sumKernal.SetMemoryArgument(2, v3);
            commands.Execute(sumKernal, null, worker, null, null);
            Console.WriteLine("Executed");
            commands.ReadFromBuffer <float>(v3, ref v3_, false, null);
            StringBuilder sb = new StringBuilder();

            for (int i = 0; i < size; i++)
            {
                sb.AppendFormat("{0} + {1} = {2}<br>", v1_[i].ToString(), v2_[i].ToString(), v3_[i].ToString());
            }
            var sum_expression_result = sb.ToString();

            return(sum_expression_result);
        }
示例#10
0
        public Bitmap ProcessImage(Bitmap inImage)
        {
            ComputeImage2D oclInImage  = null;
            ComputeImage2D oclOutImage = null;

            BitmapData inImageData = inImage.LockBits(new Rectangle(0, 0, inImage.Width, inImage.Height),
                                                      ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);

            Bitmap outImage = new Bitmap(inImage.Width, inImage.Height, PixelFormat.Format32bppArgb);

            BitmapData outImageData = outImage.LockBits(new Rectangle(0, 0, outImage.Width, outImage.Height),
                                                        ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);

            try
            {
                oclInImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.ReadOnly |
                                                ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra,
                                                                                                           ComputeImageChannelType.UNormInt8), inImage.Width, inImage.Height, 0, inImageData.Scan0);

                oclOutImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.WriteOnly |
                                                 ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra,
                                                                                                            ComputeImageChannelType.UNormInt8), outImage.Width, outImage.Height, 0, outImageData.Scan0);

                oclKernel.SetMemoryArgument(0, oclInImage);
                oclKernel.SetMemoryArgument(1, oclOutImage);

                oclCommandQueue.Execute(oclKernel, new long[] { 0, 0 }, new long[] { oclInImage.Width,
                                                                                     oclInImage.Height }, null, null);

                oclCommandQueue.Finish();

                oclCommandQueue.ReadFromImage(oclOutImage, outImageData.Scan0, true, null);
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                inImage.UnlockBits(inImageData);
                outImage.UnlockBits(outImageData);

                if (oclInImage != null)
                {
                    oclInImage.Dispose();
                    oclInImage = null;
                }

                if (oclOutImage != null)
                {
                    oclOutImage.Dispose();
                    oclOutImage = null;
                }
            }

            return(outImage);
        }
        public async Task Can_use_struct()
        {
            var platform = ComputePlatform.Platforms.First();
            var device   = platform.Devices.First();
            var context  = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            using var program = new ComputeProgram(context, Struct_kernel);

            try
            {
                program.Build(new[] { device }, "-cl-std=CL1.2", null, IntPtr.Zero);
            }
            catch (Exception ex)
            {
                OnProgramBuilt(program, device);
                return;
            }

            using var queue  = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);
            using var kernel = program.CreateKernel("fill");

            var target   = new Target[10];
            var gcHandle = GCHandle.Alloc(target, GCHandleType.Pinned);
            var ptr      = gcHandle.AddrOfPinnedObject();
            var buffer   = new ComputeBuffer <Target>(
                context,
                ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                10,
                ptr);


            kernel.SetMemoryArgument(0, buffer);

            var events = new List <ComputeEventBase>();

            queue.Execute(kernel, null, new long[] { 10 }, null, events);
            //await events.WaitForEvents();

            unsafe
            {
                target[0].a.Should().Be(0);
                target[0].b.Should().BeApproximately(0.5f, 0.01f);
                target[0].c.Should().Be(1);
                target[0].d[0].Should().Be(1);
                target[0].d[1].Should().Be(2);
                target[1].a.Should().Be(2);
                target[1].b.Should().BeApproximately(1.5f, 0.01f);
                target[1].c.Should().Be(2);
                target[1].d[0].Should().Be(1);
                target[1].d[1].Should().Be(2);
                target[9].a.Should().Be(18);
                target[9].b.Should().BeApproximately(9.5f, 0.01f);
                target[9].c.Should().Be(10);
                target[9].d[0].Should().Be(1);
                target[9].d[1].Should().Be(2);
            }
        }
示例#12
0
        private TFP[] RunKernalTest(float num, ComputeBuffer <TFP> result, ComputeKernel kernel)
        {
            kernel.SetMemoryArgument(0, result);
            kernel.SetValueArgument(1, num);

            // BUG: ATI Stream v2.2 crash if event list not null.
            commands.Execute(kernel, null, new long[] { 1, 1 }, null, events);
            //commands.Execute(kernel, null, new long[] { count }, null, null);

            TFP[] myresult = new TFP[1];

            GCHandle arrCHandle = GCHandle.Alloc(myresult, GCHandleType.Pinned);

            commands.Read(result, true, 0, 1, arrCHandle.AddrOfPinnedObject(), events);

            arrCHandle.Free();
            return(myresult);
        }
        public async Task Can_multiply_using_pinned_host_pointer()
        {
            // This doesn't seem to work on NVidia platforms
            var platform = ComputePlatform.Platforms.First();
            var device   = platform.Devices.First();
            var context  = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null,
                                              IntPtr.Zero);

            using var program = new ComputeProgram(context, Sum_kernel);

            program.Build(new[] { device }, "-cl-std=CL1.2", (handle, ptr) => OnProgramBuilt(program, device),
                          IntPtr.Zero);

            using var queue  = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);
            using var kernel = program.CreateKernel("sum");

            var source       = new byte[] { 1, 2, 3, 4, 5 };
            var sourceHandle = GCHandle.Alloc(source, GCHandleType.Pinned);
            var sourcePtr    = sourceHandle.AddrOfPinnedObject();

            using var sourceBuffer = new ComputeBuffer <byte>(
                      context,
                      ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                      source.LongLength,
                      sourcePtr);

            var target       = new byte[5];
            var targetHandle = GCHandle.Alloc(target, GCHandleType.Pinned);
            var targetPtr    = targetHandle.AddrOfPinnedObject();

            using var targetBuffer = new ComputeBuffer <byte>(
                      context,
                      ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                      target.LongLength,
                      targetPtr);

            try
            {
                kernel.SetMemoryArgument(0, sourceBuffer);
                kernel.SetMemoryArgument(1, targetBuffer);

                var events = new List <ComputeEventBase>();
                queue.Execute(kernel, null, new long[] { source.Length }, null, events);

                //await events.WaitForEvents();

                for (var i = 0; i < target.Length; i++)
                {
                    Console.WriteLine($"{source[i]} * 2 = {target[i]}");
                }
            }
            finally
            {
                sourceHandle.Free();
                targetHandle.Free();
            }
        }
示例#14
0
        /// <summary>
        /// Subsequent calls to Invoke work faster without arguments
        /// </summary>
        public void Invoke(string Method, long Offset, long Worksize)
        {
            if (LastKernel == null)
            {
                throw new InvalidOperationException("You need to call Invoke with arguments before. All Arguments are saved");
            }

            ComputeEventList eventList = new ComputeEventList();

            InvokeStarted?.Invoke(this, EventArgs.Empty);

            queue.Execute(LastKernel, new long[] { Offset }, new long[] { Worksize }, null, eventList);

            eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, null);
            eventList[0].Aborted   += (sender, e) => EasyCL_Aborted(sender, Method);

            queue.Finish();
        }
        public void Can_copy()
        {
            var platform = ComputePlatform.Platforms.First();
            var device   = platform.Devices.First();
            var context  = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            using var program = new ComputeProgram(context, Copy_kernel);

            try
            {
                program.Build(new[] { device }, "-cl-std=CL1.2", null, IntPtr.Zero);
            }
            catch (Exception ex)
            {
                OnProgramBuilt(program, device);
                return;
            }

            using var queue  = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);
            using var kernel = program.CreateKernel("copy");

            var sourcePath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "sample00.png"));

            using var sourceImage = (Bitmap)Image.FromFile(sourcePath);

            var w = sourceImage.Width;
            var h = sourceImage.Height;

            var source = sourceImage.ToBytes();

            source.SaveFormatted("source.txt", w, h, channels: 4);
            using var sourceBuffer = context.CreateImage2D(source, w, h);

            var target = new byte[h * w * 4];

            using var targetBuffer = context.CreateBuffer(target);

            kernel.SetMemoryArgument(0, sourceBuffer);
            kernel.SetMemoryArgument(1, targetBuffer);

            queue.Execute(kernel, null, new long[] { w, h }, null, null);
            queue.Finish();

            target.SaveFormatted("target.txt", w, h, channels: 4);

            var result     = target.ToBitmap(w, h, 4);
            var resultPath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "copy.png"));

            result.Save(resultPath);

            Run("source.txt");
            Run("target.txt");
            Run(resultPath);
        }
示例#16
0
        static void Main(string[] args)
        {
            int w = 11, h = 11, sx = 5, sy = 5, iters = 2;

            var properties = new ComputeContextPropertyList(ComputePlatform.Platforms[0]);
            var context    = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero);
            var quene      = new ComputeCommandQueue(context, ComputePlatform.Platforms[0].Devices[0], ComputeCommandQueueFlags.None);

            //Компиляция программы
            var prog = new ComputeProgram(context, Source);

            try
            {
                prog.Build(context.Devices, "", null, IntPtr.Zero);
            }
            catch
            {
                Console.WriteLine(prog.GetBuildLog(context.Devices[0]));
            }
            //Создание ядра
            var kernel = prog.CreateKernel("Test");


            var mat = new float[w * h];

            for (int i = 0; i < w * h; i++)
            {
                mat[i] = (i == w * sy + sx) ? 1f : 0f;
            }

            var mat1 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, mat);
            var mat2 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite, w * h);

            kernel.SetMemoryArgument(0, mat1);
            kernel.SetMemoryArgument(1, mat2);
            kernel.SetValueArgument(2, iters);
            kernel.SetValueArgument(3, w);
            kernel.SetValueArgument(4, h);

            quene.Execute(kernel, null, new long[] { (long)h, (long)w }, null, null);

            quene.ReadFromBuffer(mat1, ref mat, true, null);

            for (int i = 0; i < h; i++)
            {
                for (int j = 0; j < w; j++)
                {
                    Console.Write($"{mat[i*w+j]:.00} ");
                }
                Console.WriteLine();
            }

            Console.ReadKey();
        }
示例#17
0
        private void InitializeCellGrid()
        {
            ComputeProgram prog = new ComputeProgram(Ctx, InitilizeGridKernelString);

            prog.Build(Ctx.Devices, "", null, IntPtr.Zero);
            InitilizeGridKernel = prog.CreateKernel("InitializeGrid");
            InitilizeGridKernel.SetMemoryArgument(0, CurrImage);
            InitilizeGridKernel.SetMemoryArgument(1, NextImage);
            CQ = new ComputeCommandQueue(Ctx, Ctx.Devices[0], ComputeCommandQueueFlags.None);
            CQ.Execute(InitilizeGridKernel, null, new long[] { Width, Height }, null, null);
        }
        public static void Enqueue(
            this ComputeCommandQueue queue,
            ComputeKernel kernel,
            long[] globalWorkOffset = null,
            long[] globalWorkSize   = null,
            long[] localWorkSize    = null,
            ICollection <ComputeEventBase> events = null)
        {
            events ??= new List <ComputeEventBase>();

            queue.Execute(kernel, globalWorkOffset, globalWorkSize, localWorkSize, events);
        }
示例#19
0
        protected T[] InternalExecuteOpencl <T>(
            String source,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams)
            where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice);

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);

            String         updatedSource = "#define OpenCL\r\n" + source;
            ComputeProgram program       = new ComputeProgram(queue.Context, updatedSource);

            program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            T[] resultBuffer = new T[bufferSize];

            ComputeBuffer <T>    resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize);
            List <ComputeMemory> vars            = new List <ComputeMemory>();

            vars.Add(resultBufferVar);
            vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context));

            ComputeKernel kernel = program.CreateKernel(function);

            for (int i = 0; i < vars.Count; i++)
            {
                kernel.SetMemoryArgument(i, vars[i]);
            }

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            long[] workersGlobal = new long[2] {
                loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height
            };
            queue.Execute(kernel, null, workersGlobal, null, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

            queue.Finish();
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

            return(resultBuffer);
        }
示例#20
0
        public PrOpenClCalculation()
        {
            m_Platform       = ComputePlatform.Platforms.First();
            m_Device         = m_Platform.Devices.First();
            m_ComputeContext = new ComputeContext(new [] { m_Device }, new ComputeContextPropertyList(m_Platform), null, IntPtr.Zero);
            m_CommandQueue   = new ComputeCommandQueue(m_ComputeContext, m_Device, ComputeCommandQueueFlags.None);
            m_Program        = new ComputeProgram(m_ComputeContext, ProgramSource);
            m_Program.Build(new [] { m_Device }, "", null, IntPtr.Zero);
            m_Kernel = m_Program.CreateKernel("Test");

            long count = 100;

            var result = new int[count];
            var a      = new int[count];

            for (int i = 0; i < count; i++)
            {
                a[i] = i;
            }

            var resultDev = new ComputeBuffer <int>(m_ComputeContext,
                                                    ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                                    result);

            var aDev = new ComputeBuffer <int>(m_ComputeContext,
                                               ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                               a);


            var bDev = new ComputeBuffer <int>(m_ComputeContext,
                                               ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                               a);

            //Задаем их для нашего ядра
            m_Kernel.SetMemoryArgument(0, resultDev);
            m_Kernel.SetMemoryArgument(1, aDev);
            m_Kernel.SetMemoryArgument(2, bDev);

            //Вызываем ядро количество потоков равно count
            m_CommandQueue.Execute(m_Kernel, null, new[] { count }, null, null);

            //Читаем результат из переменной
            m_CommandQueue.ReadFromBuffer(resultDev, ref result, true, null);

            //Выводим результат
            foreach (var i in result)
            {
                Console.WriteLine(i);
            }
        }
        public static async Task ExecuteAsync(
            this ComputeCommandQueue queue,
            ComputeKernel kernel,
            long[] globalWorkOffset = null,
            long[] globalWorkSize   = null,
            long[] localWorkSize    = null,
            ICollection <ComputeEventBase> events = null)
        {
            events ??= new List <ComputeEventBase>();

            queue.Execute(kernel, globalWorkOffset, globalWorkSize, localWorkSize, events);

            await events.WaitForEvents();
        }
        public async Task Can_multiply_with_device_allocated_memory()
        {
            var platform = ComputePlatform.Platforms.First();
            var device   = platform.Devices.First();
            var context  = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            using var program = new ComputeProgram(context, Sum_kernel);

            program.Build(new[] { device }, "-cl-std=CL1.2", (handle, ptr) => OnProgramBuilt(program, device), IntPtr.Zero);

            using var queue  = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);
            using var kernel = program.CreateKernel("sum");

            var source = new byte[] { 1, 2, 3, 4, 5 };

            using var sourceBuffer = new ComputeBuffer <byte>(
                      context,
                      ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                      source);

            using var targetBuffer = new ComputeBuffer <byte>(
                      context,
                      ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.AllocateHostPointer,
                      5);

            kernel.SetMemoryArgument(0, sourceBuffer);
            kernel.SetMemoryArgument(1, targetBuffer);

            var events = new List <ComputeEventBase>();

            queue.Execute(kernel, null, new long[] { source.Length }, null, events);

            var targetPtr = queue.Map(targetBuffer, true, ComputeMemoryMappingFlags.Read, 0, source.Length, events);

            //await events.WaitForEvents();

            unsafe
            {
                var result = (byte *)targetPtr.ToPointer();
                for (var i = 0; i < source.Length; i++)
                {
                    Console.WriteLine($"{source[i]} * 2 = {result[i]}");
                }
            }

            queue.Unmap(targetBuffer, ref targetPtr, events);
            //await events.WaitForEvents();
        }
示例#23
0
        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="args"></param>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute(string functionName, params object[] args)
        {
            ValidateArgs(functionName, args);

            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                var  ndobject = (Array)args.FirstOrDefault(x => (x.GetType().IsArray));
                long length   = ndobject != null ? ndobject.Length : 1;

                var method = KernelFunctions.FirstOrDefault(x => (x.Name == functionName));

                var buffers = BuildKernelArguments(method, args, kernel, length);
                commands.Execute(kernel, null, new long[] { length }, null, null);

                for (int i = 0; i < args.Length; i++)
                {
                    if (!args[i].GetType().IsArray)
                    {
                        continue;
                    }

                    var ioMode = method.Parameters.ElementAt(i).Value.IOMode;
                    if (ioMode == IOMode.InOut || ioMode == IOMode.Out)
                    {
                        Array r = (Array)args[i];
                        commands.ReadFromMemory(buffers[i], ref r, true, 0, null);
                    }
                    buffers[i].Dispose();
                }
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Finish();
                commands.Dispose();
            }
        }
示例#24
0
        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done = new List<QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();
            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer<QueenTask>(context,
                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                        inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                            .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }
示例#25
0
        private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr)
        {
            uint[] dst = new uint[16];

            fixed (uint* dstPtr = dst)
            {
                using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None))
                {
                    var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16);

                    var kernel = program.CreateKernel("test");
                    kernel.SetValueArgument(0, 1443351125U);
                    kernel.SetMemoryArgument(1, buf);

                    var eventList = new ComputeEventList();

                    queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null);
                    queue.Finish();
                    queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null);
                    queue.Finish();
                    queue.Finish();
                }
            }
        }
示例#26
0
        protected override void RunInternal()
        {
            int count = 10;
            float[] arrA = new float[count];
            float[] arrB = new float[count];
            float[] arrC = new float[count];

            Random rand = new Random();

            for (int i = 0; i < count; i++)
            {
                arrA[i] = (float)(rand.NextDouble() * 100);
                arrB[i] = (float)(rand.NextDouble() * 100);
            }

            ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
            ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
            ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });
            program.Build(null, null, null, IntPtr.Zero);

            ComputeKernel kernel = program.CreateKernel("VectorAdd");
            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            ComputeEventList events = new ComputeEventList();

            commands.Execute(kernel, null, new long[] { count }, null, events);

            arrC = new float[count];
            GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

            commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events);
            commands.Finish();

            arrCHandle.Free();

            for (int i = 0; i < count; i++)
                Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
        }
示例#27
0
        private void CalculateConvolution(ComputeContext computeContext)
        {
            Stopwatch stopwatch = new Stopwatch();
            stopwatch.Start();

            float dx;
            bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx);
            if (!shiftXParse)
                throw new SyntaxErrorException(", needs to be .");

            float dy;
            bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy);
            if (!shiftYParse)
                throw new SyntaxErrorException(", needs to be  .");

            float dz;
            bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz);
            if (!shiftZParse)
                throw new SyntaxErrorException(", needs to be  .");

            int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ;

            Console.WriteLine("Computing...");
            Console.WriteLine("Reading kernel...");

            String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;

            String kernelString;
            using (var sr = new StreamReader(kernelPath + "\\convolution.cl"))
                kernelString = sr.ReadToEnd();

            Console.WriteLine("Reading kernel... done");

            float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz);

            //create openCL program
            ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString);

            computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero);

            ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice);
            Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus);

            String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice);
            Console.WriteLine("buildLog");
            if (buildLog.Equals("\n"))
                Console.WriteLine("\tbuildLog is empty...");
            else
                Console.WriteLine("\t" + buildLog);

            float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename);

            /////////////////////////////////////////////
            // Create a Command Queue & Event List
            /////////////////////////////////////////////
            ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None);

            ////////////////////////////////////////////////////////////////
            // Create Buffers Transform
            ////////////////////////////////////////////////////////////////
            ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength);

            ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            ///////////////////////////////////////////////////////////
            ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores");

            /////////////////////////////////////////////
            // Set the transformFluorophoresKernel arguments
            /////////////////////////////////////////////
            transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords);
            transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformFluorophoresKernel = null;
            long[] globalWorkSizeTransformFluorophoresKernel = new long[]   { fluorophores.Length / 4 };
            long[] localWorkSizeTransformFluorophoresKernel = null;

            ////////////////////////////////////////////////////////
            // Enqueue the transformFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null);
            computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null);

            computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null);
            //            computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents);

            float[] transformedFluorophores = new float[fluorophores.Length];

            computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null);

            computeCommandQueue.Finish();

            //TODO remove, only for testing
            //            for (int i = 0; i < transformedFluorophores.Length; i++)
            //            {
            //                Console.WriteLine(transformedFluorophores[i]);
            //            }
            // /TODO remove, only for testing

            stopwatch.Stop();
            Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed);
            stopwatch.Reset();
            stopwatch.Start();
            // fluorophoresCoords are now transformed (done in place)

            ////////////////////////////////////////////////////////////////
            // Create Buffers Convolve Fluorophores
            ////////////////////////////////////////////////////////////////

            const int convolve_kernel_lwgs = 16;
            int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs;

            ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            /////////////////////////////////////////////
            ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores");

            /////////////////////////////////////////////
            // Set the convolveFluorophoresKernel arguments
            /////////////////////////////////////////////

            convolveFluorophoresKernel.SetMemoryArgument(0, resultImage);
            convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX);
            convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY);
            convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords);
            convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs);
            convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null;
            long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount };
            long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs};

            ////////////////////////////////////////////////////////
            // Enqueue the convolveFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null);

            float[] resultImageData = new float[totalBuffer];
            computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null);

            computeCommandQueue.Finish();

            for (int i = 0; i < pixelCount; i++)
            {
                Console.WriteLine(resultImageData[i]);
            }

            Console.WriteLine("Writing data to file...");
            //            CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData);
            TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY);

            Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY);

            float max = resultImageData.Max();

            float scale = 255/(float)max;

            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }

            ushort[] ushortdata = new ushort[resultImageData.Length];

            for (int i = 0; i < resultImageData.Length; i++)
            {
                ushortdata[i] = (ushort)resultImageData[i];
            }

            uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16);

            byte[] bytes = new byte[convertGray16ToRgb.Length * 4];
            //
            //            int[] resultImageData2 = new int[resultImageData.Length];
            //
            for (int index = 0; index < convertGray16ToRgb.Length; index++)
            {
            //                resultImageData2[index] = (int)(scale*resultImageData[index]);

                byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]);
                bytes[index] = bytes1[0];
                bytes[4 * index + 1] = bytes1[1];
                bytes[4 * index + 2] = bytes1[2];
                bytes[4 * index + 3] = bytes1[3];
            }
            //
            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData2[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }
            //            bitmap.Save("c:\\temp.bmp");

            using (MemoryStream ms = new MemoryStream(bytes))
            {
                Image image = Bitmap.FromStream(ms);
                image.Save("c:\\temp.bmp");
            }

            Console.WriteLine("Writing data to file... done");

            stopwatch.Stop();
            Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed);
            Console.WriteLine("Computing... done");
        }
示例#28
0
        public static void Test()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            ComputeDevice device = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            ComputeProgram program = new ComputeProgram(context, source);
            program.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate");


            //Create arguments
            int sideSize = 4096;
            int[] inMatrixA = new int[sideSize * sideSize];
            int[] inMatrixB = new int[sideSize * sideSize];
            int[] outMatrixC = new int[sideSize * sideSize];
            Random random = new Random((int)DateTime.Now.Ticks);

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                    for (int x = 0; x < sideSize; x++)
                    {
                        inMatrixA[y * sideSize + x] = random.Next(3);
                        inMatrixB[y * sideSize + x] = random.Next(3);
                        outMatrixC[y * sideSize + x] = 0;
                    }


            ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixA);

            ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixB);

            ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, outMatrixC);

            long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);


            //Sets arguments
            kernel.SetMemoryArgument(0, bufferMatrixA);
            kernel.SetMemoryArgument(1, bufferMatrixB);
            kernel.SetMemoryArgument(2, bufferMatrixC);
            kernel.SetLocalArgument(3, sideSize * 2);
            kernel.SetValueArgument<int>(4, sideSize);
            //kernel.SetLocalArgument(1, localWorkSize);            

            string offset = " ";
            for (int x = 0; x < sideSize; x++)
                offset += "  ";

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    Console.Write(offset);
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixA[y * sideSize + x] + " ");
                    Console.WriteLine();
                }




            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(context,
                context.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(kernel, null,
                new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
                new long[] { localWorkSize, 1 }, null);

            //globalWorkSize can be any size
            //localWorkSize product much not be greater than device.MaxComputeUnits
            //and it must not be greater than kernel.GetWorkGroupSize()
            //ESSENTIALLY, the program iterates through globalWorkSize
            //in increments of localWorkSize. Both are multidimensional,
            //but this just saves us the time of doing that
            //(1 dimension can be put to multiple if the max dimension lengths
            //are known very easily with remainder).

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);

            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
            program.Dispose();

            Console.WriteLine();
            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixB[y * sideSize + x] + " ");
                    Console.Write(" ");
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(outMatrixC[y * sideSize + x] + " ");

                    Console.WriteLine();
                }

            int testY = random.Next(sideSize);
            int testX = random.Next(sideSize);

            int sum = 0;
            for (int q = 0; q < sideSize; q++)
                sum += inMatrixA[q * sideSize + testX] *
                    inMatrixB[testY * sideSize + q];

            Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);

            Console.WriteLine(executionTime / 10000.0);

        }
示例#29
0
 private static void CoreRender(ComputeMemory buffer, ComputeCommandQueue queue, IEnumerable<ComputeKernel> kernels, Vector4 position, Vector4 lookat, Vector4 up, int frame, float fov, int slowRenderCount, float focalDistance, int width, int height, long[] globalSize, long[] localSize)
 {
     foreach (var kernel in kernels)
     {
         kernel.SetMemoryArgument(0, buffer);
         kernel.SetValueArgument(1, width);
         kernel.SetValueArgument(2, height);
         kernel.SetValueArgument(3, position);
         kernel.SetValueArgument(4, lookat);
         kernel.SetValueArgument(5, up);
         kernel.SetValueArgument(6, frame);
         kernel.SetValueArgument(7, fov);
         kernel.SetValueArgument(8, slowRenderCount);
         kernel.SetValueArgument(9, focalDistance);
         queue.Execute(kernel, LaunchSize, globalSize, localSize, null);
     }
 }
示例#30
0
        static void Main(string[] args)
        {
            #region
            const string programName = "Prime Number";

            Stopwatch stopWatch = new Stopwatch();

            string clProgramSource = KernelProgram();

            Console.WriteLine("Environment OS:");
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine(Environment.OSVersion);
            #endregion
            if (ComputePlatform.Platforms.Count == 0)
            {
                Console.WriteLine("No OpenCL Platforms are availble!");
            }
            else
            {
                #region 1
                // step 1 choose the first available platform
                ComputePlatform platform = ComputePlatform.Platforms[0];

                // output the basic info
                BasicInfo(platform);

                Console.WriteLine("Program: " + programName);
                Console.WriteLine("-----------------------------------------");
                #endregion
                //Cpu 10 seconds Gpu 28 seconds
                int count = 64;

                int[] output_Z = new int[count * count * count];

                int[] input_X = new int[count * count * count];

                for (int x = 0; x < count * count * count; x++)
                {
                    input_X[x] = x;
                }
                #region 2
                // step 2 create context for that platform and all devices
                ComputeContextPropertyList properties = new ComputeContextPropertyList(platform);
                ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero);

                // step 3 create and build program
                ComputeProgram program = new ComputeProgram(context, clProgramSource);
                program.Build(platform.Devices, null, null, IntPtr.Zero);
                #endregion
                // step 4 create memory objects
                ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X);
                ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length);

                // step 5 create kernel object with same kernel programe name VectorAdd
                ComputeKernel kernel = program.CreateKernel("PrimeNumber");

                // step 6 set kernel arguments
                //kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, z);

                ComputeEventList eventList = new ComputeEventList();

                //for (int j = 0; j < context.Devices.Count; j++)
                // query available devices n,...,1,0.  cpu first then gpu
                for (int j = context.Devices.Count-1; j > -1; j--)
                {
                    #region 3
                    stopWatch.Start();

                    // step 7 create command queue on that context on that device
                    ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None);

                    // step 8 run the kernel program
                    commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList);
                    //Application.DoEvents();

                    #endregion
                    // step 9 read results
                    commands.ReadFromBuffer(z, ref output_Z, false, eventList);
                    #region 4
                    commands.Finish();

                    string fileName = "C:\\primenumber\\PrimeNumberGPU.txt";
                    StreamWriter file = new StreamWriter(fileName, true);

                    FileInfo info = new FileInfo(fileName);
                    long fs = info.Length;

                    // 1 MegaByte = 1.049e+6 Byte
                    int index = 1;
                    if (fs == 1.049e+6)
                    {
                        fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt";
                        file = new System.IO.StreamWriter(fileName, true);
                        index++;
                    }
                    #endregion

                    for (uint xx = 0; xx < count * count * count; xx++)
                    {
                        if (output_Z[xx] != 0 && output_Z[xx] != 1)
                        {
                            Console.WriteLine(output_Z[xx]);
                            file.Write(output_Z[xx]);
                            file.Write("x");
                        }
                    }
                    #region 5
                    file.Close();
                    stopWatch.Stop();

                    ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started;
                    ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended;
                    double time = 10e-9 * (end - start);
                    //Console.WriteLine("Nanosecond: " + time);

                    TimeSpan ts = stopWatch.Elapsed;
                    string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds);
                    Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime);

                    Console.WriteLine("-----------------------------------------");
                    #endregion
                }
                Console.ReadLine();
            }
        }
示例#31
0
        public static void CallOpenCL(int[,] libertyGroups, 
            int[,] groupNumbers, int x, int y, 
            int[] surroundingLibs, ref int emptySurroundings,
            int ourSgn, ref int duplicateGroups)
        {
            //Create arguments
            
            //Does not split yet
            //int[,] libertyGroups, 
            //int[,] groupNumbers, 
            //int x, 
            //int y, 
            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //int ourSgn, 
            
            //We have to map 2 dimension to 1 dimension            

            //Set arguments
            ComputeBuffer<int> libertyGroupsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(libertyGroups));
            openCLKernel.SetMemoryArgument(0, libertyGroupsIn);

            ComputeBuffer<int> groupNumbersIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(groupNumbers));
            openCLKernel.SetMemoryArgument(1, groupNumbersIn);

            openCLKernel.SetValueArgument<int>(2, x);
            openCLKernel.SetValueArgument<int>(3, y);
            
            ComputeBuffer<int> surroundingLibsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, surroundingLibs);
            openCLKernel.SetMemoryArgument(4, surroundingLibsIn);

            int[] emptySurroundRef = new int[1];
            ComputeBuffer<int> emptySurroundRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, emptySurroundRef);
            openCLKernel.SetMemoryArgument(5, emptySurroundRefIn);

            int[] duplicateGroupsRef = new int[1];
            ComputeBuffer<int> duplicateGroupsRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, duplicateGroupsRef);
            openCLKernel.SetMemoryArgument(6, duplicateGroupsRefIn);

            openCLKernel.SetValueArgument<int>(7, ourSgn);            

            //long localWorkSize = Math.Min(openCLDevice.MaxComputeUnits, sideSize);

            //Display input data

            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(openCLContext,
                openCLContext.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(openCLKernel, null,
                new long[] { 1 },
                new long[] { 1 }, null);

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //Read output data
            commands.ReadFromBuffer(surroundingLibsIn, ref surroundingLibs, true, null);
            commands.ReadFromBuffer(emptySurroundRefIn, ref emptySurroundRef, true, null); emptySurroundings = emptySurroundRef[0];
            commands.ReadFromBuffer(duplicateGroupsRefIn, ref duplicateGroupsRef, true, null); duplicateGroups = duplicateGroupsRef[0];

            //We could set blocking to false on reads and then read them all back in then, we could (possiblity) gain some performance
            //by telling it that commands can be executed out of order and then by queuing them up and calling Finish
            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
         //   openCLProgram.Dispose();            
            //display output data
            
            //Test are done by our caller now

            Console.WriteLine(executionTime / 10000.0);
        }
示例#32
0
文件: GPUT.cs 项目: ruarai/Trigrad
        public static void Calculate(List<Calculation> calculations)
        {
            Stopwatch s = new Stopwatch();
            s.Start();

            int count = calculations.Count;

            IntVec2[] p_p = new IntVec2[count];

            IntVec2[] p_a = new IntVec2[count];
            IntVec2[] p_b = new IntVec2[count];
            IntVec2[] p_c = new IntVec2[count];

            FloatVec3[] c = new FloatVec3[count];

            int[] c_valid = new int[count];

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];

                p_p[i] = new IntVec2(calc.P);
                p_a[i] = new IntVec2(calc.A);
                p_b[i] = new IntVec2(calc.B);
                p_c[i] = new IntVec2(calc.C);
            });

            mark(s, "memory init");

            ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p);

            ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a);

            ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b);

            ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c);

            ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length);
            ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length);

            mark(s, "memory buffer init");

            ComputeKernel kernel = program.CreateKernel("Barycentric");
            kernel.SetMemoryArgument(0, _p_p);

            kernel.SetMemoryArgument(1, _p_a);

            kernel.SetMemoryArgument(2, _p_b);

            kernel.SetMemoryArgument(3, _p_c);

            kernel.SetMemoryArgument(4, _c);
            kernel.SetMemoryArgument(5, _c_valid);

            mark(s, "memory init 2");

            ComputeEventList eventList = new ComputeEventList();

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            commands.Execute(kernel, null, new long[] { count }, null, eventList);

            mark(s, "execute");

            commands.ReadFromBuffer(_c, ref c, false, eventList);
            commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList);
            commands.Finish();

            mark(s, "read 1");

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];
                calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W);

                if (c_valid[i] == 1)
                {
                    lock (calc.Tri)
                        calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P));
                }
            });

            mark(s, "read 2");

            // cleanup commands
            commands.Dispose();

            // cleanup events
            foreach (ComputeEventBase eventBase in eventList)
            {
                eventBase.Dispose();
            }
            eventList.Clear();

            // cleanup kernel
            kernel.Dispose();

            _p_p.Dispose();

            _p_a.Dispose();
            _p_b.Dispose();
            _p_c.Dispose();

            _c.Dispose();
            _c_valid.Dispose();

            mark(s, "dispose");
        }
示例#33
0
        public void Render(ComputeBuffer<Vector4> buffer, ComputeCommandQueue queue, IParameterSet parameters,
            Size windowSize, int numBlocks, Size coordinates, int bufferWidth = 0)
        {
            lock (_kernelLock)
            {
                if (_kernel == null)
                    return;

                var size = new long[] { windowSize.Width, windowSize.Height };
                var localSize = Threadsize(queue);

                var offset = new long[size.Length];
                var offsetCoords = new long[] { coordinates.Width, coordinates.Height };

                if (numBlocks > 0)
                {
                    for (var i = 0; i < size.Length; i++)
                        size[i] = numBlocks * localSize[i];
                    for (var i = 0; i < size.Length; i++)
                        offset[i] = size[i] * offsetCoords[i];
                }

                var globalSize = GlobalLaunchsizeFor(localSize, size);

                var kernelNumArgs = 0;
                _kernel.SetMemoryArgument(kernelNumArgs++, buffer);
                _kernel.SetValueArgument(kernelNumArgs++, bufferWidth);
                _kernel.SetValueArgument(kernelNumArgs++, windowSize.Width);
                _kernel.SetValueArgument(kernelNumArgs++, windowSize.Height);
                parameters.ApplyToKernel(_kernel, _useDouble, ref kernelNumArgs);

                queue.Execute(_kernel, offset, globalSize, localSize, null);
            }
        }
示例#34
0
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                
                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();
                
                // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                commands.Execute(kernel, null, new long[] { count }, null, eventList);
                
                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commands.ReadFromBuffer(c, ref arrC, false, eventList);

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                // to finish has to be issued before "arrC" can be used. 
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                //eventList.Wait();

                // 2) Or simply use
                commands.Finish();

                // Print the results to a log/console.
                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);

                // cleanup commands
                commands.Dispose();

                // cleanup events
                foreach (ComputeEventBase eventBase in eventList)
                {
                    eventBase.Dispose();
                }
                eventList.Clear();

                // cleanup kernel
                kernel.Dispose();

                // cleanup program
                program.Dispose();

                // cleanup buffers
                a.Dispose();
                b.Dispose();
                c.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
		public String SearchPassword (byte[] hash, HashType type, int maxLength, String[] keySpace)
		{
			if (type != HashType.MD5) {
				throw new NotImplementedException ("sums other than MD5 not supported");
			}

			if (maxLength > 6) {
				throw new NotImplementedException ("doesn't support longer passwords than 7");
			}

			var joinedKeySpace = new List<byte> ();

			foreach (var k in keySpace) {
				if (k.Length > 1) {
					throw new NotImplementedException ("doesn't support longer keyspaces than 1");
				}

				joinedKeySpace.AddRange (Encoding.ASCII.GetBytes (k));
			}
				
			byte[] resultData = new byte[20];
			byte[] keyspaceJoined = joinedKeySpace.ToArray ();

			var resultBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, resultData);
			var hashBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, hash);
			var keyspaceBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, keyspaceJoined);
			var passLenBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly, 1);
			var flagBuffer = new ComputeBuffer<int> (Context, ComputeMemoryFlags.None, 1);


			Kernel.SetMemoryArgument (0, hashBuffer);
			Kernel.SetMemoryArgument (1, keyspaceBuffer);
			Kernel.SetMemoryArgument (2, resultBuffer);
			Kernel.SetMemoryArgument (3, passLenBuffer);
			Kernel.SetMemoryArgument (4, flagBuffer);

			// execute kernel
			var queue = new ComputeCommandQueue (Context, Device, ComputeCommandQueueFlags.None);

			long firstDim = joinedKeySpace.Count;
			var globalWorksize = new long[] { firstDim, 57 * 57, 57 * 57 };

			queue.Execute (Kernel, new long[] { 0, 0, 0 }, globalWorksize, null, null);

			byte[] passLen = new byte[1];

			queue.ReadFromBuffer (resultBuffer, ref resultData, true, null);
			queue.ReadFromBuffer (passLenBuffer, ref passLen, true, null);

			String password = null;

			if (passLen [0] > 0) {
				logger.Info ("pass len {0}", passLen [0]);
				password = Encoding.ASCII.GetString (resultData, 0, passLen [0]);
				logger.Info ("Found password: \"{0}\"", password);
			} else {
				logger.Info ("Password not found.");
			}

			queue.Finish ();

			return password;
		}
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 640*480; // 
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                
                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                

                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);

                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                
                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("CompareGPUCPU");
                DateTime ExecutionStartTime; //Var will hold Execution Starting Time
                DateTime ExecutionStopTime;//Var will hold Execution Stopped Time
                TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero                
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
                
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                int repeatTimes = 100;
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    kernel.SetMemoryArgument(0, a);
                    //kernel.SetMemoryArgument(1, b);
                    //kernel.SetMemoryArgument(2, c);
                    kernel.SetMemoryArgument(1, c);

                    // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                    // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                    // For this reason their use should be avoided if possible.
                    //ComputeEventList eventList = new ComputeEventList();

                    // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                  

                    // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                    // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                    //commands.Execute(kernel, null, new long[] { count }, null, eventList);
                    commands.Execute(kernel, null, new long[] { count }, null, null);

                    // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                    // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                    // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                    // eventList will contain two events after this method returns.
                    //commands.ReadFromBuffer(c, ref arrC, false, eventList);
                    commands.ReadFromBuffer(c, ref arrC, false, null);

                    // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                    // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                    // to finish has to be issued before "arrC" can be used. 
                    // This explicit synchronization can be achieved in two ways:

                    // 1) Wait for the events in the list to finish,
                    //eventList.Wait();

                    // 2) Or simply use
                    commands.Finish();
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using GPU", perTaskTime);
 
                // Do that using CPU
                /*
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    for (int i = 0; i < count; i++)
                    {
                        //arrC[i] = arrA[i] + arrB[i];
                        int j;
                        for (j = 0; j < 330 * 10; j++)
                            arrC[i] = arrA[i] + j;
                    }
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString());
                 */
                log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
示例#37
0
            public unsafe void EndSend()
            {
                for (int i = 0; i < points.Count; i++)
                {
                  inx[i].x = (float)points[i].Item3.Real;
                  inx[i].y = (float)points[i].Item3.Imaginary;
                  inc[i].x = (float)points[i].Item4.Real;
                  inc[i].y = (float)points[i].Item4.Imaginary;
                }

                _krnl.SetMemoryArgument(0, x);
                _krnl.SetMemoryArgument(1, c);
                for (int i = 0; i < _ld.Count; i++)
                {
                  _krnl.SetMemoryArgument(2 + i, outp[i]);
                }

                ComputeCommandQueue command = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None);
                command.WriteToBuffer(inx, x, false, null);
                command.WriteToBuffer(inc, c, false, null);

                command.Execute(_krnl, null, new long[] { points.Count }, null, null);

                for (int i = 0; i < _ld.Count; i++)
                  command.ReadFromBuffer(outp[i], ref opl[i], false, null);

                command.Finish();

                output = new Queue<Tuple<int, int, List<ProcessLayer>>>();

                for (int i = 0; i < points.Count; i++)
                {
                  List<ProcessLayer> pl = new List<ProcessLayer>();
                  for (int ii = 0; ii < _ld.Count; ii++)
                  {
                ProcessLayer p = _ld[ii].Clone();
                p.c_active = opl[ii][i].c_active != 0;
                p.c_calc = opl[ii][i].c_calc;
                p.c_cmean = opl[ii][i].c_cmean;
                p.c_cvariance = opl[ii][i].c_cvariance;
                p.c_cvarsx = opl[ii][i].c_cvarsx;
                p.c_isin = opl[ii][i].c_isin != 0;
                p.c_n = opl[ii][i].c_n;
                p.c_old2x = new Complex(opl[ii][i].c_old2x.x,opl[ii][i].c_old2x.y);
                p.c_oldx = new Complex(opl[ii][i].c_oldx.x,opl[ii][i].c_oldx.y);
                p.c_resn = opl[ii][i].c_resn;
                p.c_resx = new Complex(opl[ii][i].c_resx.x,opl[ii][i].c_resx.y);
                p.c_x = new Complex(opl[ii][i].c_x.x,opl[ii][i].c_x.y);
                pl.Add(p);
                  }
                  output.Enqueue(Tuple.Create(points[i].Item1, points[i].Item2, pl));
                }
            }
示例#38
0
                /// <summary>Execute this kernel</summary>
                /// <param name="CQ">Command queue to use</param>
                /// <param name="Arguments">Arguments of the kernel function</param>
                /// <param name="GlobalWorkSize">Array of maximum index arrays. Total work-items = product(max[i],i+0..n-1), n=max.Length</param>
                /// <param name="events">Event of this command</param>
                public void Execute(ComputeCommandQueue CQ, CLCalc.Program.MemoryObject[] Arguments, int[] GlobalWorkSize, ICollection<ComputeEventBase> events)
                {
                    SetArguments(Arguments);

                    long[] globWSize=new long[GlobalWorkSize.Length];
                    for (int i=0;i<globWSize.Length;i++) globWSize[i]=GlobalWorkSize[i];

                    CQ.Execute(kernel, null, globWSize, null, events);
                }
示例#39
0
        public static void Run(TextWriter log, ComputeContext context)
        {
            StartTest(log, "Vector addition test");

            try
            {
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();

                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                ComputeProgram program = new ComputeProgram(context, kernelSource);
                program.Build(null, null, null, IntPtr.Zero);
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                ICollection<ComputeEventBase> events = new Collection<ComputeEventBase>();

                // BUG: ATI Stream v2.2 crash if event list not null.
                commands.Execute(kernel, null, new long[] { count }, null, events);
                //commands.Execute(kernel, null, new long[] { count }, null, null);

                arrC = new float[count];
                GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

                commands.Read(c, true, 0, count, arrCHandle.AddrOfPinnedObject(), events);

                arrCHandle.Free();

                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }

            EndTest(log, "Vector addition test");
        }
示例#40
0
                /// <summary>Execute this kernel</summary>
                /// <param name="CQ">Command queue to use</param>
                /// <param name="Arguments">Arguments of the kernel function</param>
                /// <param name="GlobalWorkSize">Array of maximum index arrays. Total work-items = product(max[i],i+0..n-1), n=max.Length</param>
                /// <param name="LocalWorkSize">Local work sizes</param>
                /// <param name="events">Event of this command</param>
                public void Execute(ComputeCommandQueue CQ, CLCalc.Program.MemoryObject[] Arguments, int[] GlobalWorkSize, int[] LocalWorkSize, ICollection<ComputeEventBase> events)
                {
                    SetArguments(Arguments);
                    if (LocalWorkSize != null && GlobalWorkSize.Length != LocalWorkSize.Length) throw new Exception("Global and local work size must have same dimension");

                    long[] globWSize = new long[GlobalWorkSize.Length];
                    for (int i = 0; i < globWSize.Length; i++) globWSize[i] = GlobalWorkSize[i];
                    long[] locWSize = null;

                    if (LocalWorkSize != null)
                    {
                        locWSize = new long[LocalWorkSize.Length];
                        for (int i = 0; i < locWSize.Length; i++) locWSize[i] = LocalWorkSize[i];
                    }

                    CQ.Execute(kernel, null, globWSize, locWSize, events);
                }