ComputeCommandQueue.ReadFromBuffer C# (CSharp)代码示例

示例#1

0

显示文件

文件： Impl_Cloo.cs 项目： pavlexander/gpu_tests

        public override void Proccess()
        {
            // execute kernel
            queue.Execute(kernel, null, new long[] { DataGenerator.InputCount }, null, null);
            queue.Finish();

            /*
             * short[] results2 = new short[this.results.Length];
             * GCHandle arrCHandle = GCHandle.Alloc(results2, GCHandleType.Pinned);
             * queue.Read(result_dev, true, 0, DataFeeder.GetInputCount(), arrCHandle.AddrOfPinnedObject(), events);
             */

            //bool[] results2 = new bool[DataFeeder.GetInputCount()];
            queue.ReadFromBuffer(result_dev, ref resultsBytes, true, null);
            queue.ReadFromBuffer(resultCalc_dev, ref calculatables, true, null);
            //queue.ReadFromBuffer()

            /*
             * bool[] arrC = new bool[5];
             * GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);
             * queue.Read<bool>(result_dev, true, 0, 5, arrCHandle.AddrOfPinnedObject(), null);
             */
            // wait for completion
            //queue.Finish();

            //kernel.Dispose();
            //queue.Dispose();
            //context.Dispose();
        }

示例#2

0

显示文件

        public int[] ReadIntBuffer(string key, int length)
        {
            int[] rawBuffer = _intBuffers[key];

            if (HardwareAccelerationEnabled)
            {
                _commands.ReadFromBuffer(_intComputeBuffers[key], ref rawBuffer, true, 0, 0, length, null);

                _commands.Finish();
            }

            return(rawBuffer);
        }

示例#3

0

显示文件

文件： Gpgpu.cs 项目： emalron/simserver

        public string vectorSum()
        {
            string vecSum = @"
                __kernel void vectorSum(__global float *v1, __global float *v2, __global float *v3) {
                    int i = get_global_id(0);
                    v3[i] = v1[i] + v2[i];
                }
            ";
            int    size   = 100000;

            float[] v1_ = new float[size];
            float[] v2_ = new float[size];
            float[] v3_ = new float[size];
            for (var i = 0; i < size; i++)
            {
                v1_[i] = (float)i;
                v2_[i] = (float).5f;
            }
            var platform_ = ComputePlatform.Platforms[0];
            ComputeContextPropertyList properties = new ComputeContextPropertyList(platform_);
            ComputeContext             ctx        = new ComputeContext(ComputeDeviceTypes.Gpu, properties, null, IntPtr.Zero);
            ComputeCommandQueue        commands   = new ComputeCommandQueue(ctx, ctx.Devices[0], ComputeCommandQueueFlags.None);
            ComputeProgram             program    = new ComputeProgram(ctx, vecSum);

            try
            {
                program.Build(null, null, null, IntPtr.Zero);
                Console.WriteLine("program build completed");
            }
            catch
            {
                string log = program.GetBuildLog(ctx.Devices[0]);
            }
            ComputeBuffer <float> v1, v2, v3;

            v1 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v1_);
            v2 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v2_);
            v3 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, v3_);
            long[] worker = { size };
            commands.WriteToBuffer(v1_, v1, false, null);
            commands.WriteToBuffer(v2_, v2, false, null);
            ComputeKernel sumKernal = program.CreateKernel("vectorSum");

            Console.WriteLine("kernal created");
            sumKernal.SetMemoryArgument(0, v1);
            sumKernal.SetMemoryArgument(1, v2);
            sumKernal.SetMemoryArgument(2, v3);
            commands.Execute(sumKernal, null, worker, null, null);
            Console.WriteLine("Executed");
            commands.ReadFromBuffer <float>(v3, ref v3_, false, null);
            StringBuilder sb = new StringBuilder();

            for (int i = 0; i < size; i++)
            {
                sb.AppendFormat("{0} + {1} = {2}<br>", v1_[i].ToString(), v2_[i].ToString(), v3_[i].ToString());
            }
            var sum_expression_result = sb.ToString();

            return(sum_expression_result);
        }

示例#4

0

显示文件

        /// <summary>
        /// Renders the provided potential force field or gradient map.
        /// </summary>
        /// <param name="queue">The queue.</param>
        /// <param name="pointsToRender">The points to render.</param>
        /// <param name="computeBuffer">The compute buffer to use.</param>
        /// <returns>A bitmap representing <paramref name="pointsToRender"/></returns>
        private Bitmap RenderPoints(ComputeCommandQueue queue, float[] pointsToRender, ComputeBuffer <float> computeBuffer)
        {
            // Calculate the max and min values here, as it's easier than on the GPU
            // and there's no real time cost
            var max = pointsToRender.AsParallel().Max();
            var min = pointsToRender.AsParallel().Min();

            // Set up the colourKernel's arguments
            colourKernel.SetValueArgument(0, max);
            colourKernel.SetValueArgument(1, min);
            colourKernel.SetMemoryArgument(2, computeBuffer);
            colourKernel.SetMemoryArgument(3, outPix);

            // Run the colourKernl
            queue.Execute(colourKernel, new long[] { 0 }, new long[] { gridWidth, gridHeight },
                          new long[] { WorkGroupSize, WorkGroupSize }, null);
            // and read out the data produced
            queue.ReadFromBuffer(outPix, ref pixels, true, null);

            // Copy the data into a new Bitmap
            var bitmap = new Bitmap(gridWidth, gridHeight, PixelFormat.Format24bppRgb);

            var info = bitmap.LockBits(new Rectangle(0, 0, gridWidth, gridHeight), ImageLockMode.ReadWrite,
                                       PixelFormat.Format24bppRgb);

            Marshal.Copy(pixels, 0, info.Scan0, pixels.Length);

            bitmap.UnlockBits(info);
            // Flip the bitmap to make the coordinate axis match those in the simulator
            bitmap.RotateFlip(RotateFlipType.RotateNoneFlipY);
            return(bitmap);
        }

示例#5

0

显示文件

文件： OpenCL.cs 项目： Warpten/CASC-Bruteforcer

        public T[] ExecuteReturn <T>(long Worksize, long WorkOffset, int OutSize) where T : struct
        {
            T[] Returned = new T[OutSize];

            SetArgs();

            if (WorkOffset != 0)
            {
                queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, null, null);
            }
            else
            {
                queue.Execute(kernel, null, new long[] { Worksize }, null, null);
            }

            for (int i = 0; i < MethodInfo.Arguments.Length; i++)
            {
                if (MethodInfo.Arguments[i].CopyBack)
                {
                    queue.ReadFromBuffer((ComputeBuffer <T>)MethodInfo.Arguments[i].ComputeMemory, ref Returned, false, null);
                }
            }

            queue.Finish();

            return(Returned);
        }

示例#6

0

显示文件

        public uint FindProofOfWork(byte[] header, byte[] bits, uint nonceStart, uint iterations, out long elapsedMilliseconds)
        {
            this.stopwatch.Restart();

            using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header);
            using var bitsBuffer   = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits);
            using var powBuffer    = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1);

            this.computeKernel.SetMemoryArgument(0, headerBuffer);
            this.computeKernel.SetMemoryArgument(1, bitsBuffer);
            this.computeKernel.SetValueArgument(2, nonceStart);
            this.computeKernel.SetMemoryArgument(3, powBuffer);

            using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None);
            commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null);

            var nonceOut = new uint[1];

            commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null);
            commands.Finish();

            elapsedMilliseconds = this.stopwatch.ElapsedMilliseconds;

            return(nonceOut[0]);
        }

示例#7

0

显示文件

        /// <summary>
        /// Computes the field used when the robot posesses the ball
        /// </summary>
        /// <param name="queue">The queue to execute the kernel on.</param>
        /// <param name="field">The calculated field points</param>
        private void ComputePosessionField(ComputeCommandQueue queue, float[] field)
        {
            var ball =
                new Vector2((float)currentEnvironment.CurrentBall.Position.X - currentEnvironment.FieldBounds.Left,
                            (float)currentEnvironment.CurrentBall.Position.Y - currentEnvironment.FieldBounds.Bottom
                            );

            var goalTarget =
                new Vector2((97.3632f) - currentEnvironment.FieldBounds.Left,
                            (33.932f + 49.6801f) / 2.0f - currentEnvironment.FieldBounds.Bottom);

            // Collect together all the points that will repel the robot
            var repulsers =
                currentEnvironment.Opponents.Select(o => o.Position).Concat(
                    currentEnvironment.Home.Select(h => h.Position)).Select(
                    p =>
                    new Vector2((float)p.X - currentEnvironment.FieldBounds.Left,
                                (float)p.Y - currentEnvironment.FieldBounds.Bottom)).ToArray();

            using (var inRepulsers = new ComputeBuffer <Vector2>(context,
                                                                 ComputeMemoryFlags.ReadOnly |
                                                                 ComputeMemoryFlags.CopyHostPointer, repulsers))
            {
                possessionKernel.SetValueArgument(0, ball);
                possessionKernel.SetValueArgument(1, goalTarget);
                possessionKernel.SetValueArgument(2, GridResolution);
                possessionKernel.SetMemoryArgument(3, inRepulsers);
                possessionKernel.SetMemoryArgument(4, outCl);

                queue.Execute(possessionKernel, new long[] { 0 }, new long[] { gridWidth, gridHeight },
                              new long[] { WorkGroupSize, WorkGroupSize }, null);
                queue.ReadFromBuffer(outCl, ref field, true, null);
            }
        }

示例#8

0

显示文件

文件： CSMM-Cloo.cs 项目： talanc/MiscBenchmarks

        public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b)
        {
            InitCloo();

            var ncols = result.GetUpperBound(0) + 1;
            var nrows = result.GetUpperBound(1) + 1;

            fixed(double *rp = result, ap = a, bp = b)
            {
                ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap);
                ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp);
                ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length);

                kernel.SetMemoryArgument(0, aBuffer);
                kernel.SetMemoryArgument(1, bBuffer);
                kernel.SetValueArgument(2, ncols);
                kernel.SetMemoryArgument(3, rBuffer);

                ComputeEventList events = new ComputeEventList();

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                commands.Execute(kernel, null, new long[] { result.Length }, null, events);

                commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events);

                commands.Finish();
            }
        }

示例#9

0

显示文件

文件： OpenCLMiner.cs 项目： FluidChains/FullNode

        /// <summary>
        /// Finds the nonce for a block header hash that meets the given target.
        /// </summary>
        /// <param name="header">serialized block header</param>
        /// <param name="bits">the target</param>
        /// <param name="nonceStart">the first nonce value to try</param>
        /// <param name="iterations">the number of iterations</param>
        /// <returns></returns>
        public uint FindPow(byte[] header, byte[] bits, uint nonceStart, uint iterations)
        {
            if (this.computeDevice == null)
            {
                throw new InvalidOperationException("GPU not found");
            }

            this.ConstructOpenCLResources();

            using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header);
            using var bitsBuffer   = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits);
            using var powBuffer    = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1);

            this.computeKernel.SetMemoryArgument(0, headerBuffer);
            this.computeKernel.SetMemoryArgument(1, bitsBuffer);
            this.computeKernel.SetValueArgument(2, nonceStart);
            this.computeKernel.SetMemoryArgument(3, powBuffer);

            using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None);
            commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null);

            var nonceOut = new uint[1];

            commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null);
            commands.Finish();

            this.DisposeOpenCLResources();

            return(nonceOut[0]);
        }

示例#10

0

显示文件

        private double[] MatrixMultiply(Matrix <double> L, Matrix <double> R)
        {
            var L_array = L.To1D();
            var R_array = R.To1D();
            var O_array = new double[L.M * R.N];

            ComputeBuffer <double> a = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, L_array);
            ComputeBuffer <double> b = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, R_array);
            ComputeBuffer <double> c = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, O_array.Length);

            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);
            kernel.SetValueArgument(3, L.N);
            kernel.SetValueArgument(4, L.M);
            kernel.SetValueArgument(5, R.N);
            kernel.SetValueArgument(6, R.M);

            commands.Execute(kernel, null, new long[] { R.N, L.M }, null, null);

            commands.ReadFromBuffer(c, ref O_array, true, null);

            commands.Finish();

            // cleanup buffers
            a.Dispose();
            b.Dispose();
            c.Dispose();
            return(O_array);
        }

示例#11

0

显示文件

        /// <summary>
        /// Computes the gradient of the field at all points.
        /// </summary>
        /// <param name="queue">The queue to execute the kernel on.</param>
        /// <param name="gradPoints">The calculated gradient points.</param>
        private void ComputeGradient(ComputeCommandQueue queue, float[] gradPoints)
        {
            gradientKernel.SetMemoryArgument(0, outCl);
            gradientKernel.SetMemoryArgument(1, outGradient);

            queue.Execute(gradientKernel, new long[] { 1, 1 }, new long[] { gridWidth - 2, gridHeight - 2 },
                          null, null);

            queue.ReadFromBuffer(outGradient, ref gradPoints, true, null);
        }

示例#12

0

显示文件

文件： Program.cs 项目： definedD4/Monte-Carlo_Method

        static void Main(string[] args)
        {
            int w = 11, h = 11, sx = 5, sy = 5, iters = 2;

            var properties = new ComputeContextPropertyList(ComputePlatform.Platforms[0]);
            var context    = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero);
            var quene      = new ComputeCommandQueue(context, ComputePlatform.Platforms[0].Devices[0], ComputeCommandQueueFlags.None);

            //Компиляция программы
            var prog = new ComputeProgram(context, Source);

            try
            {
                prog.Build(context.Devices, "", null, IntPtr.Zero);
            }
            catch
            {
                Console.WriteLine(prog.GetBuildLog(context.Devices[0]));
            }
            //Создание ядра
            var kernel = prog.CreateKernel("Test");


            var mat = new float[w * h];

            for (int i = 0; i < w * h; i++)
            {
                mat[i] = (i == w * sy + sx) ? 1f : 0f;
            }

            var mat1 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, mat);
            var mat2 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite, w * h);

            kernel.SetMemoryArgument(0, mat1);
            kernel.SetMemoryArgument(1, mat2);
            kernel.SetValueArgument(2, iters);
            kernel.SetValueArgument(3, w);
            kernel.SetValueArgument(4, h);

            quene.Execute(kernel, null, new long[] { (long)h, (long)w }, null, null);

            quene.ReadFromBuffer(mat1, ref mat, true, null);

            for (int i = 0; i < h; i++)
            {
                for (int j = 0; j < w; j++)
                {
                    Console.Write($"{mat[i*w+j]:.00} ");
                }
                Console.WriteLine();
            }

            Console.ReadKey();
        }

示例#13

0

显示文件

文件： OpenCLController.cs 项目： bilginfurkan/ParticleSimulation

        public List <Vector2> CalculateSquareDistance(List <Vector2> positions)
        {
            // Create data
            var xResultData = new float[positions.Count];
            var yResultData = new float[positions.Count];

            var xData = positions.Select(x => x.X).ToArray();
            var yData = positions.Select(x => x.Y).ToArray();

            // Put data on buffers
            var xResultBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, xResultData);
            var yResultBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, yResultData);

            var xBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, xData);
            var yBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, yData);

            // Set memory arguments to kernel
            kernel.SetMemoryArgument(0, xResultBuffer);
            kernel.SetMemoryArgument(1, yResultBuffer);
            kernel.SetMemoryArgument(2, xBuffer);
            kernel.SetMemoryArgument(3, yBuffer);

            // Create queue
            var queue = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);

            queue.Execute(kernel, null, new long[] { positions.Count }, null, null);

            // Read data
            queue.ReadFromBuffer(xResultBuffer, ref xResultData, true, null);
            queue.ReadFromBuffer(yResultBuffer, ref yResultData, true, null);

            var result = new List <Vector2>();

            for (int i = 0; i < xResultData.Length; i++)
            {
                result.Add(new Vector2(xResultData[i], xResultData[i]));
            }

            return(result);
        }

示例#14

0

显示文件

文件： OpenCLTask.cs 项目： zakol/Parallelity

        protected T[] InternalExecuteOpencl <T>(
            String source,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams)
            where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice);

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);

            String         updatedSource = "#define OpenCL\r\n" + source;
            ComputeProgram program       = new ComputeProgram(queue.Context, updatedSource);

            program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            T[] resultBuffer = new T[bufferSize];

            ComputeBuffer <T>    resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize);
            List <ComputeMemory> vars            = new List <ComputeMemory>();

            vars.Add(resultBufferVar);
            vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context));

            ComputeKernel kernel = program.CreateKernel(function);

            for (int i = 0; i < vars.Count; i++)
            {
                kernel.SetMemoryArgument(i, vars[i]);
            }

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            long[] workersGlobal = new long[2] {
                loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height
            };
            queue.Execute(kernel, null, workersGlobal, null, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

            queue.Finish();
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

            return(resultBuffer);
        }

示例#15

0

显示文件

        public PrOpenClCalculation()
        {
            m_Platform       = ComputePlatform.Platforms.First();
            m_Device         = m_Platform.Devices.First();
            m_ComputeContext = new ComputeContext(new [] { m_Device }, new ComputeContextPropertyList(m_Platform), null, IntPtr.Zero);
            m_CommandQueue   = new ComputeCommandQueue(m_ComputeContext, m_Device, ComputeCommandQueueFlags.None);
            m_Program        = new ComputeProgram(m_ComputeContext, ProgramSource);
            m_Program.Build(new [] { m_Device }, "", null, IntPtr.Zero);
            m_Kernel = m_Program.CreateKernel("Test");

            long count = 100;

            var result = new int[count];
            var a      = new int[count];

            for (int i = 0; i < count; i++)
            {
                a[i] = i;
            }

            var resultDev = new ComputeBuffer <int>(m_ComputeContext,
                                                    ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                                    result);

            var aDev = new ComputeBuffer <int>(m_ComputeContext,
                                               ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                               a);


            var bDev = new ComputeBuffer <int>(m_ComputeContext,
                                               ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer,
                                               a);

            //Задаем их для нашего ядра
            m_Kernel.SetMemoryArgument(0, resultDev);
            m_Kernel.SetMemoryArgument(1, aDev);
            m_Kernel.SetMemoryArgument(2, bDev);

            //Вызываем ядро количество потоков равно count
            m_CommandQueue.Execute(m_Kernel, null, new[] { count }, null, null);

            //Читаем результат из переменной
            m_CommandQueue.ReadFromBuffer(resultDev, ref result, true, null);

            //Выводим результат
            foreach (var i in result)
            {
                Console.WriteLine(i);
            }
        }

示例#16

0

显示文件

        public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender)
        {
            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            var computeBuffer   = new ComputeBuffer <Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);
            var queue           = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None);

            var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight);

            for (var i = 0; i < slowRender; i++)
            {
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            }
            for (var i = 0; i < camera.Frame * slowRender; i++)
            {
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            }

            var pixels = new Vector4[screenshotWidth * screenshotHeight];

            queue.ReadFromBuffer(computeBuffer, ref pixels, true, null);
            queue.Finish();

            computeBuffer.Dispose();
            queue.Dispose();

            var bmp        = new Bitmap(screenshotWidth, screenshotHeight);
            var destBuffer = new int[screenshotWidth * screenshotHeight];

            for (var y = 0; y < screenshotHeight; y++)
            {
                for (var x = 0; x < screenshotWidth; x++)
                {
                    var pixel = pixels[x + y * screenshotWidth];
                    if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                    {
                        Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!");
                        continue;
                    }
                    destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
                }
            }
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);

            Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length);
            bmp.UnlockBits(bmpData);

            return(bmp);
        }

示例#17

0

显示文件

        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done  = new List <QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();

            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer <QueenTask>(context,
                                                  ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                                                  inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                {
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);
                }

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                        .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }

示例#18

0

显示文件

        public void ExecuteReturn <T>(int Worksize, T[] Returned) where T : struct
        {
            SetArgs();

            queue.Execute(kernel, null, new long[] { Worksize }, null, null);

            for (int i = 0; i < MethodInfo.Arguments.Length; i++)
            {
                var arg = MethodInfo.Arguments[i];
                if (arg.CopyBack)
                {
                    queue.ReadFromBuffer <T>((Cloo.ComputeBuffer <T>)MethodInfo.Arguments[i].ComputeMemory, ref Returned, false, null);
                }
            }

            queue.Finish();
        }

示例#19

0

显示文件

文件： Program.cs 项目： peterallenwebb/GpuQueens

        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done = new List<QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();
            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer<QueenTask>(context,
                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                        inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                            .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }

示例#20

0

显示文件

        public override void RunKernel(ComputeContext context, ComputeKernel kernel, ComputeCommandQueue commands, long[] dimensions)
        {
            var tradeprofitsBuffer   = new ComputeBuffer <short>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeProfits);
            var tradearbitrageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeArbitrage);
            var fit_functionBuffer   = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, FitFunction);

            kernel.SetMemoryArgument(0, allvarsBuffer);
            kernel.SetMemoryArgument(1, loboundsBuffer);
            kernel.SetMemoryArgument(2, upboundsBuffer);
            kernel.SetMemoryArgument(3, tradeprofitsBuffer);
            kernel.SetMemoryArgument(4, tradearbitrageBuffer);
            kernel.SetMemoryArgument(5, fit_functionBuffer);
            kernel.SetValueArgument <int>(6, VariablesCount);

            var eventList = new ComputeEventList();

            commands.Execute(kernel, null, dimensions, null, eventList);
            commands.ReadFromBuffer(fit_functionBuffer, ref FitFunction, true, null);
            commands.Finish();
        }

示例#21

0

显示文件

        public T[] ExecuteReturn <T>(long Worksize, long?LocalWorksize, long WorkOffset, int OutSize) where T : struct
        {
            SetArgs();

            if (LocalWorksize.HasValue)
            {
                queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, new long[] { LocalWorksize.Value }, null);
            }
            else
            {
                queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, null, null);
            }

            T[] Returned = new T[OutSize];
            var copyBack = MethodInfo.Arguments.First(x => x.CopyBack);

            queue.ReadFromBuffer((ComputeBuffer <T>)copyBack.ComputeMemory, ref Returned, true, null);

            queue.Finish();

            return(Returned);
        }

示例#22

0

显示文件

        public static void RunKernels(int nofKernels)
        {
            try
            {
                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();

                commandQueue1.WriteToBuffer(input, CB_input, false, eventList);
                commandQueue1.WriteToBuffer(weightIDs, CB_networkIndex, false, eventList);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.

                commandQueue1.Execute(kernel, null, new long[] { nofKernels }, null, eventList);

                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commandQueue1.ReadFromBuffer(CB_output, ref output, false, eventList); // , eventList

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands
                // to finish has to be issued before "arrC" can be used.
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                eventList.Wait();

                // 2) Or simply use
                commandQueue1.Finish();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }

示例#23

0

显示文件

文件： OpenCLWaveProvider.cs 项目： atminatana/SoundMap

        private void Run(OpenCLInfo[] info, OpenCLPointInfo[] points, OpenCLNote[] envs, float[] result)
        {
            //var s = sw.ElapsedMilliseconds;

            ComputeBuffer <OpenCLInfo>      infoBuffer   = new ComputeBuffer <OpenCLInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, info);
            ComputeBuffer <OpenCLPointInfo> pointsBuffer = new ComputeBuffer <OpenCLPointInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, points);

            ComputeBuffer <OpenCLNote> envsBuffer   = new ComputeBuffer <OpenCLNote>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, envs);
            ComputeBuffer <float>      resultBuffer = new ComputeBuffer <float>(FContext, ComputeMemoryFlags.WriteOnly, result.Length);

            kernel.SetMemoryArgument(0, infoBuffer);
            kernel.SetMemoryArgument(1, pointsBuffer);
            // 2 - FWaveformBuffer
            kernel.SetMemoryArgument(3, envsBuffer);
            kernel.SetMemoryArgument(4, resultBuffer);

            //var f = sw.ElapsedMilliseconds;

            commands.Execute(kernel, null, new long[] { result.Length / 2 }, null, null);

            //var e = sw.ElapsedMilliseconds;

            commands.ReadFromBuffer(resultBuffer, ref result, true, null);

            //var r = sw.ElapsedMilliseconds;

            commands.Finish();

            infoBuffer.Dispose();
            pointsBuffer.Dispose();
            envsBuffer.Dispose();
            resultBuffer.Dispose();

            //var d = sw.ElapsedMilliseconds;

            //if (App.DebugMode)
            //	Debug.WriteLine($"OpenCLWaveProvider.Run: tot: {d - s}");
        }

示例#24

0

显示文件

        /// <summary>
        /// Run kernel against all elements.
        /// </summary>
        /// <typeparam name="TSource">Struct type that corresponds to kernel function type</typeparam>
        /// <param name="array">Array of elements to process</param>
        /// <param name="kernelCode">The code of kernel function</param>
        /// <param name="kernelSelector">Method that selects kernel by function name; if null uses first</param>
        /// <param name="deviceSelector">Method that selects device by index, description, OpenCL version; if null uses first</param>
        public static void ClooForEach <TSource>(this TSource[] array, string kernelCode, Func <string, bool> kernelSelector = null, Func <int, string, Version, bool> deviceSelector = null) where TSource : struct
        {
            kernelSelector = kernelSelector ?? ((k) => true);
            deviceSelector = deviceSelector ?? ((i, d, v) => true);

            var device = ComputePlatform.Platforms.SelectMany(p => p.Devices).Where((d, i) => deviceSelector(i, $"{d.Name} {d.DriverVersion}", d.Version)).First();

            var properties = new ComputeContextPropertyList(device.Platform);

            using (var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero))
                using (var program = new ComputeProgram(context, kernelCode))
                {
                    program.Build(new[] { device }, null, null, IntPtr.Zero);

                    var kernels = program.CreateAllKernels().ToList();
                    try
                    {
                        var kernel = kernels.First((k) => kernelSelector(k.FunctionName));

                        using (var primesBuffer = new ComputeBuffer <TSource>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, array))
                        {
                            kernel.SetMemoryArgument(0, primesBuffer);

                            using (var queue = new ComputeCommandQueue(context, context.Devices[0], 0))
                            {
                                queue.Execute(kernel, null, new long[] { primesBuffer.Count }, null, null);
                                queue.Finish();

                                queue.ReadFromBuffer(primesBuffer, ref array, true, null);
                            }
                        }
                    }
                    finally
                    {
                        kernels.ForEach(k => k.Dispose());
                    }
                }
        }

示例#25

0

显示文件

文件： OpenCLCompiler.cs 项目： myso42/Amplifier.NET

        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="inputs">The inputs.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute <TSource>(string functionName, params object[] args)
        {
            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[])));

                long length = ndobject != null ? ndobject.Length : 1;

                var buffers = BuildKernelArguments <TSource>(args, kernel, length);
                commands.Execute(kernel, null, new long[] { length }, null, null);

                foreach (var item in buffers)
                {
                    TSource[] r = (TSource[])args[item.Key];
                    commands.ReadFromBuffer(item.Value, ref r, true, null);
                    //args[item.Key] = r;
                    item.Value.Dispose();
                }

                commands.Finish();
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Dispose();
            }
        }

示例#26

0

显示文件

文件： opencl.cs 项目： Jarelk/Concurrency_P3

 public void CopyFromDevice()
 {
     _queue.ReadFromBuffer(_gpubuffer, ref _cpubuffer, true, null);
 }

示例#27

0

显示文件

文件： Program.cs 项目： allen-ecu/PrimeNumber

        static void Main(string[] args)
        {
            #region
            const string programName = "Prime Number";

            Stopwatch stopWatch = new Stopwatch();

            string clProgramSource = KernelProgram();

            Console.WriteLine("Environment OS:");
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine(Environment.OSVersion);
            #endregion
            if (ComputePlatform.Platforms.Count == 0)
            {
                Console.WriteLine("No OpenCL Platforms are availble!");
            }
            else
            {
                #region 1
                // step 1 choose the first available platform
                ComputePlatform platform = ComputePlatform.Platforms[0];

                // output the basic info
                BasicInfo(platform);

                Console.WriteLine("Program: " + programName);
                Console.WriteLine("-----------------------------------------");
                #endregion
                //Cpu 10 seconds Gpu 28 seconds
                int count = 64;

                int[] output_Z = new int[count * count * count];

                int[] input_X = new int[count * count * count];

                for (int x = 0; x < count * count * count; x++)
                {
                    input_X[x] = x;
                }
                #region 2
                // step 2 create context for that platform and all devices
                ComputeContextPropertyList properties = new ComputeContextPropertyList(platform);
                ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero);

                // step 3 create and build program
                ComputeProgram program = new ComputeProgram(context, clProgramSource);
                program.Build(platform.Devices, null, null, IntPtr.Zero);
                #endregion
                // step 4 create memory objects
                ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X);
                ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length);

                // step 5 create kernel object with same kernel programe name VectorAdd
                ComputeKernel kernel = program.CreateKernel("PrimeNumber");

                // step 6 set kernel arguments
                //kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, z);

                ComputeEventList eventList = new ComputeEventList();

                //for (int j = 0; j < context.Devices.Count; j++)
                // query available devices n,...,1,0.  cpu first then gpu
                for (int j = context.Devices.Count-1; j > -1; j--)
                {
                    #region 3
                    stopWatch.Start();

                    // step 7 create command queue on that context on that device
                    ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None);

                    // step 8 run the kernel program
                    commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList);
                    //Application.DoEvents();

                    #endregion
                    // step 9 read results
                    commands.ReadFromBuffer(z, ref output_Z, false, eventList);
                    #region 4
                    commands.Finish();

                    string fileName = "C:\\primenumber\\PrimeNumberGPU.txt";
                    StreamWriter file = new StreamWriter(fileName, true);

                    FileInfo info = new FileInfo(fileName);
                    long fs = info.Length;

                    // 1 MegaByte = 1.049e+6 Byte
                    int index = 1;
                    if (fs == 1.049e+6)
                    {
                        fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt";
                        file = new System.IO.StreamWriter(fileName, true);
                        index++;
                    }
                    #endregion

                    for (uint xx = 0; xx < count * count * count; xx++)
                    {
                        if (output_Z[xx] != 0 && output_Z[xx] != 1)
                        {
                            Console.WriteLine(output_Z[xx]);
                            file.Write(output_Z[xx]);
                            file.Write("x");
                        }
                    }
                    #region 5
                    file.Close();
                    stopWatch.Stop();

                    ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started;
                    ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended;
                    double time = 10e-9 * (end - start);
                    //Console.WriteLine("Nanosecond: " + time);

                    TimeSpan ts = stopWatch.Elapsed;
                    string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds);
                    Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime);

                    Console.WriteLine("-----------------------------------------");
                    #endregion
                }
                Console.ReadLine();
            }
        }

示例#28

0

显示文件

文件： VectorAddExample.cs 项目： RokkiGH/cloo-unity

        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                
                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();
                
                // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                commands.Execute(kernel, null, new long[] { count }, null, eventList);
                
                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commands.ReadFromBuffer(c, ref arrC, false, eventList);

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                // to finish has to be issued before "arrC" can be used. 
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                //eventList.Wait();

                // 2) Or simply use
                commands.Finish();

                // Print the results to a log/console.
                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);

                // cleanup commands
                commands.Dispose();

                // cleanup events
                foreach (ComputeEventBase eventBase in eventList)
                {
                    eventBase.Dispose();
                }
                eventList.Clear();

                // cleanup kernel
                kernel.Dispose();

                // cleanup program
                program.Dispose();

                // cleanup buffers
                a.Dispose();
                b.Dispose();
                c.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }

示例#29

0

显示文件

        public unsafe void CreateCollisionCellArray()
        {
            time.Reset();

            int   num_of_bodies   = Program.window.number_of_bodies;
            uint  num_of_elements = (uint)num_of_bodies * 8;
            float ge = Program.window.grid_edge;

            InitializeQueueAndContext();
            InitializeComponents();

            time.Start();

            uint[] sortedCellIDArray = new uint[num_of_elements];
            uint[] indexArrayIn      = new uint[num_of_elements];
            for (int j = 0; j < num_of_elements; j++)
            {
                indexArrayIn[j] = (uint)j;
            }
            uint[] indexArrayOut = new uint[num_of_elements];

            GCHandle gch_iai = GCHandle.Alloc(indexArrayIn, GCHandleType.Pinned);
            IntPtr   ptr_i   = gch_iai.AddrOfPinnedObject();
            GCHandle gch_ge  = GCHandle.Alloc(ge, GCHandleType.Pinned);
            IntPtr   ptr_ge  = gch_ge.AddrOfPinnedObject();

            array = new BodyData[num_of_bodies];
            for (int i = 0; i < num_of_bodies; i++)
            {
                Body body = Program.window.bodies.ElementAt(i);
                array[i].pos     = new float[3];
                array[i].cellIDs = new uint[8];
                //array[i].ctrl_bits = 0;
                array[i].pos[0] = body.getPos().X;
                array[i].pos[1] = body.getPos().Y;
                array[i].pos[2] = body.getPos().Z;
                array[i].ID     = (uint)i;
                array[i].radius = body.getBSphere().radius;
            }

            structSize = Marshal.SizeOf(array[0]);

            #region INITIALIZATION AND POPULATION OF DEVICE ARRAYS

            IntPtr ptr = Marshal.AllocHGlobal(structSize * num_of_bodies);
            for (int i = 0; i < num_of_bodies; i++)
            {
                Marshal.StructureToPtr(array[i], ptr + i * structSize, false);
            }
            byte[] input = new byte[structSize * num_of_bodies];
            Marshal.Copy(ptr, input, 0, structSize * num_of_bodies);

            b_start = time.ElapsedMilliseconds;

            b_objectData = new ComputeBuffer <byte>
                               (context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, input);
            b_objectIDArray = new ComputeBuffer <ulong>
                                  (context, ComputeMemoryFlags.ReadWrite, num_of_elements);
            b_cellIDArray = new ComputeBuffer <uint>
                                (context, ComputeMemoryFlags.ReadWrite, num_of_elements);
            b_gridEdge = new ComputeBuffer <float>
                             (context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, 1, ptr_ge);

            b_count += time.ElapsedMilliseconds - b_start;

            k_dataInitialization.SetMemoryArgument(0, b_objectData);
            k_dataInitialization.SetMemoryArgument(1, b_gridEdge);
            k_dataInitialization.SetMemoryArgument(2, b_cellIDArray);
            k_dataInitialization.SetMemoryArgument(3, b_objectIDArray);

            dataInit_start = time.ElapsedMilliseconds;

            queue.Execute(k_dataInitialization, null, new long[] { num_of_bodies }, null, null);
            queue.Finish();

            Console.WriteLine("TIME SPENT EXECUTIND DATA INITIALIZATION KERNEL: " + (time.ElapsedMilliseconds - dataInit_start) + "ms");

            #endregion
#if PRINT
            uint[] rs_array = new uint[num_of_bodies * 8];
            queue.ReadFromBuffer <uint>(b_cellIDArray, ref rs_array, true, null);
            queue.Finish();

            #region READ DATA INITIALIZED IN DEVICE
            array = new BodyData[num_of_bodies];
            byte[] result = new byte[structSize * num_of_bodies];
            IntPtr intPtr = Marshal.AllocHGlobal(structSize * num_of_bodies);
            queue.ReadFromBuffer <byte>(b_objectData, ref result, true, null);
            queue.Finish();
            #endregion

            #region CHECK CORRECTNESS

            Marshal.Copy(result, 0, intPtr, structSize * num_of_bodies);
            for (int i = 0; i < num_of_bodies; i++)
            {
                array[i] = (BodyData)Marshal.PtrToStructure(intPtr + i * structSize, typeof(BodyData));
            }
            checkCorrectness();
            Marshal.FreeHGlobal(intPtr);
            #endregion

            #region CHECK OBJECT ID AND CELL ID ARRAYS

            uint[]  unsortedCellIDArray   = new uint[num_of_elements];
            ulong[] unsortedObjectIDArray = new ulong[num_of_elements];

            queue.ReadFromBuffer <uint>(b_cellIDArray, ref unsortedCellIDArray, true, null);
            queue.Finish();

            string cellIDArrayLog = "";

            for (int i = 0; i < num_of_elements; i++)
            {
                cellIDArrayLog += "[" + i + "]" + unsortedCellIDArray[i] + "\n";
            }
            File.WriteAllText(Application.StartupPath + @"\unsortedCellIDArrayLog.txt", cellIDArrayLog);

            string objectIDArrayLog = "";

            queue.ReadFromBuffer <ulong>(b_objectIDArray, ref unsortedObjectIDArray, true, null);
            queue.Finish();
            for (int i = 0; i < num_of_elements; i++)
            {
                objectIDArrayLog += "[" + i + "]" + (uint)unsortedObjectIDArray[i] + "\n";
            }
            File.WriteAllText(Application.StartupPath + @"\unsortedObjectIDArrayLog.txt", objectIDArrayLog);

            #endregion
#endif

#if ALL
            #region RADIX SORT
            #region INITIALIZING RADIX SORT MEMBERS
            uint block_count = (uint)Math.Ceiling((float)num_of_elements / BLOCK_SIZE);
            uint lScanCount  = block_count * (1 << DIGITS_PER_ROUND) / 4;
            uint lScanSize   = (uint)Math.Ceiling(((float)lScanCount / BLOCK_SIZE)) * BLOCK_SIZE;
            uint globalSize  = block_count * BLOCK_SIZE;
            #endregion

            #region POPULATING RADIX SORT BUFFERS

            b_start = time.ElapsedMilliseconds;

            b_blockScan   = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, block_count * (1 << DIGITS_PER_ROUND));
            b_blockOffset = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, block_count * (1 << DIGITS_PER_ROUND));
            b_blockSum    = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, BLOCK_SIZE);
            b_temp        = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements);
            b_iArrayIn    = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite
                                                     | ComputeMemoryFlags.CopyHostPointer, num_of_elements, ptr_i);
            b_iArrayOut = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements);

            b_count += time.ElapsedMilliseconds - b_start;

            #endregion

            GCHandle gch_sc = GCHandle.Alloc(lScanCount, GCHandleType.Pinned);
            IntPtr   ptr_sc = gch_sc.AddrOfPinnedObject();
            GCHandle gch_ec = GCHandle.Alloc(num_of_elements, GCHandleType.Pinned);
            IntPtr   ptr_ec = gch_ec.AddrOfPinnedObject();

            b_start = time.ElapsedMilliseconds;

            b_scanCount =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_sc);
            b_numberOfElems =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_ec);

            b_count += time.ElapsedMilliseconds - b_start;

            #region libCL RADIX SORT ITERATION
            for (uint j = 0; j < 32; j += DIGITS_PER_ROUND)
            {
                GCHandle gch_iter = GCHandle.Alloc(j, GCHandleType.Pinned);
                IntPtr   ptr_j    = gch_iter.AddrOfPinnedObject();

                b_start = time.ElapsedMilliseconds;

                ComputeBuffer <uint> b_iteration =
                    new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, 1, ptr_j);

                b_count += time.ElapsedMilliseconds - b_start;

                #region BLOCK SORT
                kernel_block_sort.SetMemoryArgument(0, b_cellIDArray);
                kernel_block_sort.SetMemoryArgument(1, b_temp);
                kernel_block_sort.SetMemoryArgument(2, b_iArrayIn);
                kernel_block_sort.SetMemoryArgument(3, b_iArrayOut);
                kernel_block_sort.SetMemoryArgument(4, b_iteration);
                kernel_block_sort.SetMemoryArgument(5, b_blockScan);
                kernel_block_sort.SetMemoryArgument(6, b_blockOffset);
                kernel_block_sort.SetMemoryArgument(7, b_numberOfElems);

                radixSort_start = time.ElapsedMilliseconds;

                queue.Execute(kernel_block_sort, null, new long[] { globalSize }, new long[] { BLOCK_SIZE }, null);
                queue.Finish();

                radixSort_count += time.ElapsedMilliseconds - radixSort_start;

                #endregion

                #region BLOCK SCAN
                kernel_block_scan.SetMemoryArgument(0, b_blockScan);
                kernel_block_scan.SetMemoryArgument(1, b_blockSum);
                kernel_block_scan.SetMemoryArgument(2, b_scanCount);

                radixSort_start = time.ElapsedMilliseconds;

                queue.Execute(kernel_block_scan, null, new long[] { lScanSize }, new long[] { BLOCK_SIZE }, null);
                queue.Finish();

                radixSort_count += time.ElapsedMilliseconds - radixSort_start;

                #endregion

                #region BLOCK PREFIX
                kernel_block_prefix.SetMemoryArgument(0, b_blockScan);
                kernel_block_prefix.SetMemoryArgument(1, b_blockSum);
                kernel_block_prefix.SetMemoryArgument(2, b_scanCount);

                radixSort_start = time.ElapsedMilliseconds;

                queue.Execute(kernel_block_prefix, null, new long[] { lScanSize }, new long[] { BLOCK_SIZE }, null);
                queue.Finish();

                radixSort_count += time.ElapsedMilliseconds - radixSort_start;

                #endregion

                #region REORDER
                kernel_reorder.SetMemoryArgument(0, b_temp);
                kernel_reorder.SetMemoryArgument(1, b_cellIDArray);
                kernel_reorder.SetMemoryArgument(2, b_iArrayOut);
                kernel_reorder.SetMemoryArgument(3, b_iArrayIn);
                kernel_reorder.SetMemoryArgument(4, b_blockScan);
                kernel_reorder.SetMemoryArgument(5, b_blockOffset);
                kernel_reorder.SetMemoryArgument(6, b_iteration);
                kernel_reorder.SetMemoryArgument(7, b_numberOfElems);

                radixSort_start = time.ElapsedMilliseconds;

                queue.Execute(kernel_reorder, null, new long[] { globalSize }, new long[] { BLOCK_SIZE }, null);
                queue.Finish();

                radixSort_count += time.ElapsedMilliseconds - radixSort_start;

                #endregion

                b_iteration.Dispose();
                gch_iter.Free();
            }
            #endregion

            Console.WriteLine("TIME SPENT EXECUTING RADIX SORT: " + radixSort_count + " ms");

            #endregion

#if PRINT
            #region CHECK CELL ID ARRAY ORDER

            queue.ReadFromBuffer <uint>(b_cellIDArray, ref sortedCellIDArray, true, null);
            queue.Finish();

            string orderedCellIDArrayLog = "";

            for (int i = 0; i < num_of_elements; i++)
            {
                orderedCellIDArrayLog += "[" + i + "]" + sortedCellIDArray[i] + "\n";
            }
            File.WriteAllText(Application.StartupPath + @"\radixSortLog.txt", orderedCellIDArrayLog);
            #endregion
#endif
            #region REORDER OBJECT ID ARRAY
            ulong[] o_array = new ulong[num_of_elements];

            b_start = time.ElapsedMilliseconds;

            b_reorder = new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite, num_of_elements);

            b_count += time.ElapsedMilliseconds - b_start;

            k_reorder.SetMemoryArgument(0, b_iArrayIn);
            k_reorder.SetMemoryArgument(1, b_objectIDArray);
            k_reorder.SetMemoryArgument(2, b_reorder);

            reorder_start = time.ElapsedMilliseconds;

            queue.Execute(k_reorder, null, new long[] { num_of_elements }, null, null);
            queue.Finish();

            Console.WriteLine("TIME SPENT REORDERING OBJECT ID ARRAY: " + (time.ElapsedMilliseconds - reorder_start) + " ms");

            #endregion
#if PRINT
            #region CHECK OBJECT ID ORDER

            indexArrayOut = new uint[num_of_elements];
            queue.ReadFromBuffer <uint>(b_iArrayIn, ref indexArrayOut, true, null);
            queue.Finish();

            queue.ReadFromBuffer <ulong>(b_reorder, ref o_array, true, null);
            queue.Finish();

            string orderedObjIDArray = "";
            for (int i = 0; i < num_of_elements; i++)
            {
                orderedObjIDArray += "[" + i + "]" + (uint)o_array[i] + "\n";
            }
            string indexOut = "";
            for (int i = 0; i < num_of_elements; i++)
            {
                indexOut += "[" + i + "]" + indexArrayOut[i] + "\n";
            }
            File.WriteAllText(Application.StartupPath + @"\reorderLog.txt", orderedObjIDArray);
            File.WriteAllText(Application.StartupPath + @"\indexLog.txt", indexOut);
            #endregion
#endif

            #region ELEMENT COUNT

            //511 -> max value a cell hash can be
            //uint[] occurrences = new uint[512];
            uint[] n_occurrences = new uint[512];
            uint[] temp_array    = new uint[num_of_elements];
            uint[] temp_array2   = new uint[num_of_elements];
            uint[] nocc          = new uint[1];

            b_start = time.ElapsedMilliseconds;

            b_occPerRad =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, 512);
            b_temp2 =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements);
            b_flags =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements);
            b_numOfCC =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, 1);

            b_count = time.ElapsedMilliseconds - b_start;

            k_elementCount.SetMemoryArgument(0, b_reorder);
            k_elementCount.SetMemoryArgument(1, b_temp2);
            k_elementCount.SetMemoryArgument(2, b_numOfCC);
            k_elementCount.SetMemoryArgument(3, b_occPerRad);
            k_elementCount.SetMemoryArgument(4, b_flags);


            elemCount_start = time.ElapsedMilliseconds;

            try{
                queue.Execute(k_elementCount, null, new long[] { num_of_elements }, null, null);
            }
            catch (Exception e)
            {
                File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message);
            }
            queue.Finish();

            Console.WriteLine("TIME SPENT EXECUTING ELEMENT COUNT: " + (time.ElapsedMilliseconds - elemCount_start) + " ms");

            queue.ReadFromBuffer <uint>(b_temp2, ref temp_array, true, null);
            queue.ReadFromBuffer <uint>(b_numOfCC, ref nocc, true, null);
            queue.Finish();

            #endregion

#if PRINT
            #region CHECK ELEMENT COUNT

            queue.ReadFromBuffer <uint>(b_occPerRad, ref n_occurrences, true, null);
            queue.Finish();

            string hs = "";
            for (int h = 0; h < 512; h++)
            {
                hs += "[" + h + "] " +
                      //temp_array[h] + "\n\t" +
                      n_occurrences[h] + "\n";
            }

            File.WriteAllText(Application.StartupPath + @"\elementCountLog_nOcc.txt", hs);

            Array.Sort <uint>(rs_array); //sort by .NET

            string s = "";
            for (int h = 0; h < num_of_elements; h++)
            {
                s += "[" + h + "] " +
                     //s += sortedCellIDArray[h] + "\n"; //OCL
                     //s += rs_array[h] + "\n"; //CPU
                     //s += unchecked((uint)o_array[h]) + "\n"; //OCL reordered array cell ID
                     //s += ((o_array[h] & ((ulong)281470681743360)) >> 32) + "\n"; //OCL reordered array obj ID
                     //s += (indexArrayOut[h] / 8) + "\n"; // cell index divided by 8 --> should be equal to the line above;
                     temp_array[h] + "\n";
            }

            File.WriteAllText(Application.StartupPath + @"\elementCountLog_temp.txt", s);
            #endregion
#endif

            #region PREFIX SUM

            int maxIter = (int)Math.Log(num_of_elements, 2);
            for (uint d = 0; d < maxIter; d++)
            {
                GCHandle gch_iteration = GCHandle.Alloc(d, GCHandleType.Pinned);
                IntPtr   ptr_iteration = gch_iteration.AddrOfPinnedObject();

                b_start = time.ElapsedMilliseconds;

                ComputeBuffer <uint> b_iteration2 =
                    new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_iteration);

                b_count += time.ElapsedMilliseconds - b_start;

                k_prefixSum.SetMemoryArgument(0, b_temp2);
                k_prefixSum.SetMemoryArgument(1, b_iteration2);

                prefixSum_start = time.ElapsedMilliseconds;

                try
                {
                    queue.Execute(k_prefixSum, null, new long[] { num_of_elements }, new long[] { num_of_elements }, null);
                }
                catch (Exception e)
                {
                    File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message);
                }
                queue.Finish();

                prefixSum_count += time.ElapsedMilliseconds - prefixSum_start;

                b_iteration2.Dispose();
                gch_iteration.Free();
            }

            Console.WriteLine("TIME SPENT EXECUTING PREFIX SUM: " + prefixSum_count + " ms");

            queue.ReadFromBuffer <uint>(b_temp2, ref temp_array2, true, null);

            #endregion

#if PRINT
            #region CHECK SUM

            string sscan = "";

            for (int h = 0; h < num_of_elements; h++)
            {
                //TO BE FIXED! -- fixed?!
                sscan += temp_array2[h] + "\n";
            }
            File.WriteAllText(Application.StartupPath + @"\scanLog.txt", sscan);

            uint sum = 0;
            for (int p = 0; p < num_of_elements; p++) //TODO CHANGE TO nocc[0] -- but why??
            {
                sum += temp_array[p];
            }
            Console.WriteLine(".NET sum: " + sum);
            Console.WriteLine("OCL sum: " + nocc[0]);

            #endregion
#endif

            #region COLLISION CELL ARRAY CREATION

            //uint[] outArray = new uint[nocc[0]];
            ulong[] out_array = new ulong[nocc[0]];
            uint[]  indexes   = new uint[nocc[0]];
            b_ccArray = new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite, nocc[0]);//TODO !!!!! CHANGE TO nocc[0]

            GCHandle gch_ta = GCHandle.Alloc(temp_array, GCHandleType.Pinned);
            IntPtr   ptr_ta = gch_ta.AddrOfPinnedObject();

            b_start = time.ElapsedMilliseconds;

            b_temp3 =
                new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, num_of_elements, ptr_ta);
            b_ccIndexes =
                new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, nocc[0]);//TODO!!! CHANGE TO nocc[0]

            b_count += time.ElapsedMilliseconds - b_start;

            k_ccArrayCreation.SetMemoryArgument(0, b_reorder);
            k_ccArrayCreation.SetMemoryArgument(1, b_ccArray);
            k_ccArrayCreation.SetMemoryArgument(2, b_temp2);
            k_ccArrayCreation.SetMemoryArgument(3, b_occPerRad);
            k_ccArrayCreation.SetMemoryArgument(4, b_temp3);
            k_ccArrayCreation.SetMemoryArgument(5, b_ccIndexes);
            k_ccArrayCreation.SetMemoryArgument(6, b_flags);

            ccArrayC_start = time.ElapsedMilliseconds;

            try
            {
                queue.Execute(k_ccArrayCreation, null, new long[] { num_of_elements }, null, null);
            }
            catch (Exception e)
            {
                File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message);
            }
            queue.Finish();

            Console.WriteLine("TIME SPENT POPULATING COLLISION CELL ARRAY: " + (time.ElapsedMilliseconds - ccArrayC_start) + " ms");
            time.Stop();

            #endregion

            Console.WriteLine("TIME SPENT EXECUTING BROAD-PHASE COLLISION DETECTION: " + time.ElapsedMilliseconds + " ms");
            Console.WriteLine("TIME SPENT CREATING DEVICE BUFFERS: " + b_count + " ms");

#if PRINT
            #region CHECK RESULT

            queue.ReadFromBuffer(b_ccArray, ref out_array, true, null);
            queue.ReadFromBuffer(b_ccIndexes, ref indexes, true, null);
            queue.Finish();

            string output = "";
            for (int t = 0; t < nocc[0]; t++)//CHANGE TO nocc[0]
            {
                output += "---INDEX--- " + t + "\n\t";
                if ((out_array[t] & ((ulong)1 << 63)) != (ulong)0)
                {
                    output += "[H] ";
                }
                output += (uint)out_array[t] + "\n\t";
                output += indexes[t] + "\n";
            }

            File.WriteAllText(Application.StartupPath + @"\outputLog.txt", output);

            #endregion
#endif

            #region POINTERS DISPOSAL
            Marshal.FreeHGlobal(ptr);
            gch_ta.Free();
            gch_ec.Free();
            gch_sc.Free();
            gch_ge.Free();
            #endregion

            try
            {
                DisposeBuffers();
                DisposeComponents();
                DisposeQueueAndContext();
            }
            catch (Exception e) {
                Console.WriteLine("Error encountered while releasing buffers - " + e.Message);
                File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message);
            }
#else
            Marshal.FreeHGlobal(ptr);
            gch_iai.Free();
            gch_ge.Free();
            b_objectData.Dispose();
            b_objectIDArray.Dispose();
            b_cellIDArray.Dispose();
            b_gridEdge.Dispose();
#endif
        }

示例#30

0

显示文件

文件： GPUT.cs 项目： ruarai/Trigrad

        public static void Calculate(List<Calculation> calculations)
        {
            Stopwatch s = new Stopwatch();
            s.Start();

            int count = calculations.Count;

            IntVec2[] p_p = new IntVec2[count];

            IntVec2[] p_a = new IntVec2[count];
            IntVec2[] p_b = new IntVec2[count];
            IntVec2[] p_c = new IntVec2[count];

            FloatVec3[] c = new FloatVec3[count];

            int[] c_valid = new int[count];

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];

                p_p[i] = new IntVec2(calc.P);
                p_a[i] = new IntVec2(calc.A);
                p_b[i] = new IntVec2(calc.B);
                p_c[i] = new IntVec2(calc.C);
            });

            mark(s, "memory init");

            ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p);

            ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a);

            ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b);

            ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c);

            ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length);
            ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length);

            mark(s, "memory buffer init");

            ComputeKernel kernel = program.CreateKernel("Barycentric");
            kernel.SetMemoryArgument(0, _p_p);

            kernel.SetMemoryArgument(1, _p_a);

            kernel.SetMemoryArgument(2, _p_b);

            kernel.SetMemoryArgument(3, _p_c);

            kernel.SetMemoryArgument(4, _c);
            kernel.SetMemoryArgument(5, _c_valid);

            mark(s, "memory init 2");

            ComputeEventList eventList = new ComputeEventList();

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            commands.Execute(kernel, null, new long[] { count }, null, eventList);

            mark(s, "execute");

            commands.ReadFromBuffer(_c, ref c, false, eventList);
            commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList);
            commands.Finish();

            mark(s, "read 1");

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];
                calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W);

                if (c_valid[i] == 1)
                {
                    lock (calc.Tri)
                        calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P));
                }
            });

            mark(s, "read 2");

            // cleanup commands
            commands.Dispose();

            // cleanup events
            foreach (ComputeEventBase eventBase in eventList)
            {
                eventBase.Dispose();
            }
            eventList.Clear();

            // cleanup kernel
            kernel.Dispose();

            _p_p.Dispose();

            _p_a.Dispose();
            _p_b.Dispose();
            _p_c.Dispose();

            _c.Dispose();
            _c_valid.Dispose();

            mark(s, "dispose");
        }

示例#31

0

显示文件

文件： Form1.cs 项目： jalmar/DoM_Utrecht-GPU

        private void CalculateConvolution(ComputeContext computeContext)
        {
            Stopwatch stopwatch = new Stopwatch();
            stopwatch.Start();

            float dx;
            bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx);
            if (!shiftXParse)
                throw new SyntaxErrorException(", needs to be .");

            float dy;
            bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy);
            if (!shiftYParse)
                throw new SyntaxErrorException(", needs to be  .");

            float dz;
            bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz);
            if (!shiftZParse)
                throw new SyntaxErrorException(", needs to be  .");

            int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ;

            Console.WriteLine("Computing...");
            Console.WriteLine("Reading kernel...");

            String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;

            String kernelString;
            using (var sr = new StreamReader(kernelPath + "\\convolution.cl"))
                kernelString = sr.ReadToEnd();

            Console.WriteLine("Reading kernel... done");

            float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz);

            //create openCL program
            ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString);

            computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero);

            ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice);
            Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus);

            String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice);
            Console.WriteLine("buildLog");
            if (buildLog.Equals("\n"))
                Console.WriteLine("\tbuildLog is empty...");
            else
                Console.WriteLine("\t" + buildLog);

            float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename);

            /////////////////////////////////////////////
            // Create a Command Queue & Event List
            /////////////////////////////////////////////
            ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None);

            ////////////////////////////////////////////////////////////////
            // Create Buffers Transform
            ////////////////////////////////////////////////////////////////
            ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength);

            ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            ///////////////////////////////////////////////////////////
            ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores");

            /////////////////////////////////////////////
            // Set the transformFluorophoresKernel arguments
            /////////////////////////////////////////////
            transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords);
            transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformFluorophoresKernel = null;
            long[] globalWorkSizeTransformFluorophoresKernel = new long[]   { fluorophores.Length / 4 };
            long[] localWorkSizeTransformFluorophoresKernel = null;

            ////////////////////////////////////////////////////////
            // Enqueue the transformFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null);
            computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null);

            computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null);
            //            computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents);

            float[] transformedFluorophores = new float[fluorophores.Length];

            computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null);

            computeCommandQueue.Finish();

            //TODO remove, only for testing
            //            for (int i = 0; i < transformedFluorophores.Length; i++)
            //            {
            //                Console.WriteLine(transformedFluorophores[i]);
            //            }
            // /TODO remove, only for testing

            stopwatch.Stop();
            Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed);
            stopwatch.Reset();
            stopwatch.Start();
            // fluorophoresCoords are now transformed (done in place)

            ////////////////////////////////////////////////////////////////
            // Create Buffers Convolve Fluorophores
            ////////////////////////////////////////////////////////////////

            const int convolve_kernel_lwgs = 16;
            int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs;

            ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            /////////////////////////////////////////////
            ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores");

            /////////////////////////////////////////////
            // Set the convolveFluorophoresKernel arguments
            /////////////////////////////////////////////

            convolveFluorophoresKernel.SetMemoryArgument(0, resultImage);
            convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX);
            convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY);
            convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords);
            convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs);
            convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null;
            long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount };
            long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs};

            ////////////////////////////////////////////////////////
            // Enqueue the convolveFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null);

            float[] resultImageData = new float[totalBuffer];
            computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null);

            computeCommandQueue.Finish();

            for (int i = 0; i < pixelCount; i++)
            {
                Console.WriteLine(resultImageData[i]);
            }

            Console.WriteLine("Writing data to file...");
            //            CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData);
            TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY);

            Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY);

            float max = resultImageData.Max();

            float scale = 255/(float)max;

            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }

            ushort[] ushortdata = new ushort[resultImageData.Length];

            for (int i = 0; i < resultImageData.Length; i++)
            {
                ushortdata[i] = (ushort)resultImageData[i];
            }

            uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16);

            byte[] bytes = new byte[convertGray16ToRgb.Length * 4];
            //
            //            int[] resultImageData2 = new int[resultImageData.Length];
            //
            for (int index = 0; index < convertGray16ToRgb.Length; index++)
            {
            //                resultImageData2[index] = (int)(scale*resultImageData[index]);

                byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]);
                bytes[index] = bytes1[0];
                bytes[4 * index + 1] = bytes1[1];
                bytes[4 * index + 2] = bytes1[2];
                bytes[4 * index + 3] = bytes1[3];
            }
            //
            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData2[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }
            //            bitmap.Save("c:\\temp.bmp");

            using (MemoryStream ms = new MemoryStream(bytes))
            {
                Image image = Bitmap.FromStream(ms);
                image.Save("c:\\temp.bmp");
            }

            Console.WriteLine("Writing data to file... done");

            stopwatch.Stop();
            Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed);
            Console.WriteLine("Computing... done");
        }

示例#32

0

显示文件

文件： CL11Example.cs 项目： n8allan/silver-horn-opencl

        public void Run(IComputeContext context, TextWriter log)
        {
            try
            {
                log.Write("Creating command queue... ");
                var commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
                log.WriteLine("done.");

                log.Write("Generating data... ");

                int      linearSize = 24;
                SysIntX2 rectSize   = new SysIntX2(4, 6);
                SysIntX3 cubicSize  = new SysIntX3(2, 3, 4);
                float[]  linearIn   = new float[linearSize];
                float[]  linearOut  = new float[linearSize];
                float[,] rectIn    = new float[(int)rectSize.Y, (int)rectSize.X];
                float[,] rectOut   = new float[(int)rectSize.Y, (int)rectSize.X];
                float[,,] cubicIn  = new float[(int)cubicSize.Z, (int)cubicSize.Y, (int)cubicSize.X];
                float[,,] cubicOut = new float[(int)cubicSize.Z, (int)cubicSize.Y, (int)cubicSize.X];

                for (int i = 0; i < linearSize; i++)
                {
                    linearIn[i] = i;
                }

                for (int i = 0; i < (int)rectSize.X; i++)
                {
                    for (int j = 0; j < (int)rectSize.Y; j++)
                    {
                        rectIn[j, i] = (float)(rectSize.X.ToInt32() * j + i);
                    }
                }

                for (int i = 0; i < (int)cubicSize.X; i++)
                {
                    for (int j = 0; j < (int)cubicSize.Y; j++)
                    {
                        for (int k = 0; k < (int)cubicSize.Z; k++)
                        {
                            cubicIn[k, j, i] = (float)(k * cubicSize.Y.ToInt32() * cubicSize.X.ToInt32() + cubicSize.X.ToInt32() * j + i);
                        }
                    }
                }

                log.WriteLine("done.");

                log.Write("Creating buffer... ");
                var buffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite, linearSize);
                log.WriteLine("done.");

                GC.Collect();

                log.Write("Writing to buffer (linear)... ");
                commands.WriteToBuffer(linearIn, buffer, false, null);
                log.WriteLine("done.");

                log.Write("Reading from buffer (linear)... ");
                commands.ReadFromBuffer(buffer, ref linearOut, false, null);
                log.WriteLine("done.");

                GC.Collect();

                commands.Finish();

                log.Write("Comparing data... ");
                Compare(linearIn, linearOut);
                log.WriteLine("passed.");

                GC.Collect();

                log.Write("Writing to buffer (rectangular)... ");
                commands.WriteToBuffer(rectIn, buffer, false, new SysIntX2(), new SysIntX2(), rectSize, null);
                log.WriteLine("done.");

                GC.Collect();

                log.Write("Reading from buffer (rectangular)... ");
                commands.ReadFromBuffer(buffer, ref rectOut, false, new SysIntX2(), new SysIntX2(), rectSize, null);
                log.WriteLine("done.");

                GC.Collect();

                commands.Finish();

                log.Write("Comparing data... ");
                Compare(rectIn, rectOut);
                log.WriteLine("passed.");

                GC.Collect();

                log.Write("Writing to buffer (cubic)... ");
                commands.WriteToBuffer(cubicIn, buffer, false, new SysIntX3(), new SysIntX3(), cubicSize, null);
                log.WriteLine("done.");

                GC.Collect();

                log.Write("Reading from buffer (cubic)... ");
                commands.ReadFromBuffer(buffer, ref cubicOut, false, new SysIntX3(), new SysIntX3(), cubicSize, null);
                log.WriteLine("done.");

                GC.Collect();

                commands.Finish();

                log.Write("Comparing data... ");
                Compare(cubicIn, cubicOut);
                log.WriteLine("passed.");
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }

示例#33

0

显示文件

文件： game.cs 项目： PyotrRomanov/RHS

        // tick: renders one frame
        public void Tick()
        {
            // initialize timer
            if (firstFrame)
            {
                timer.Reset();
                timer.Start();
                firstFrame = false;
            }
            // handle keys, only when running time set to -1 (infinite)
            if (runningTime == -1)
            {
                if (camera.HandleInput())
                {
                    // camera moved; restart
                    ClearAccumulator();
                }
            }
            // render
            if (false)     // if (useGPU)
            {
                // add your CPU + OpenCL path here
                // mind the gpuPlatform parameter! This allows us to specify the platform on our
                // test system.
                // note: it is possible that the automated test tool provides you with a different
                // platform number than required by your hardware. In that case, you can hardcode
                // the platform during testing (ignoring gpuPlatform); do not forget to put back
                // gpuPlatform before submitting!
                long[] workSize  = { screen.width, screen.height };
                long[] localSize = { 16, 2 };
                queue.Execute(kernel, null, workSize, null, null);
                queue.Finish();

                queue.ReadFromBuffer(outputBuffer, ref screen.pixels, true, null);
                Console.WriteLine(screen.pixels[0]);

                Random r = RTTools.GetRNG();
                for (int i = 0; i < 1000; i++)
                {
                    randoms[i] = (float)r.NextDouble();
                }
                rndBuffer    = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms);
                cameraBuffer = new ComputeBuffer <Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL());
                kernel.SetMemoryArgument(6, cameraBuffer);
                kernel.SetMemoryArgument(7, rndBuffer);
            }
            else
            {
                // this is your CPU only path
                float scale = 1.0f / (float)++spp;

                Parallel.For(0, screen.height, y => {
                    for (int x = 0; x < screen.width; x++)
                    {
                        // generate primary ray
                        Ray ray = camera.Generate(RTTools.GetRNG(), x, y);
                        // trace path
                        int pixelIdx           = x + y * screen.width;
                        accumulator[pixelIdx] += Sample(ray, 0, x, y);
                        // plot final color
                        screen.pixels[pixelIdx] = RTTools.Vector3ToIntegerRGB(scale * accumulator[pixelIdx]);
                    }
                });
            }
            // stop and report when max render time elapsed
            int elapsedSeconds = (int)(timer.ElapsedMilliseconds / 1000);

            if (runningTime != -1)
            {
                if (elapsedSeconds >= runningTime)
                {
                    OpenTKApp.Report((int)timer.ElapsedMilliseconds, spp, screen);
                }
            }
        }

示例#34

0

显示文件

文件： OpenCLPasswordMatcher.cs 项目： frachstudia/studies

		public String SearchPassword (byte[] hash, HashType type, int maxLength, String[] keySpace)
		{
			if (type != HashType.MD5) {
				throw new NotImplementedException ("sums other than MD5 not supported");
			}

			if (maxLength > 6) {
				throw new NotImplementedException ("doesn't support longer passwords than 7");
			}

			var joinedKeySpace = new List<byte> ();

			foreach (var k in keySpace) {
				if (k.Length > 1) {
					throw new NotImplementedException ("doesn't support longer keyspaces than 1");
				}

				joinedKeySpace.AddRange (Encoding.ASCII.GetBytes (k));
			}
				
			byte[] resultData = new byte[20];
			byte[] keyspaceJoined = joinedKeySpace.ToArray ();

			var resultBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, resultData);
			var hashBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, hash);
			var keyspaceBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, keyspaceJoined);
			var passLenBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly, 1);
			var flagBuffer = new ComputeBuffer<int> (Context, ComputeMemoryFlags.None, 1);


			Kernel.SetMemoryArgument (0, hashBuffer);
			Kernel.SetMemoryArgument (1, keyspaceBuffer);
			Kernel.SetMemoryArgument (2, resultBuffer);
			Kernel.SetMemoryArgument (3, passLenBuffer);
			Kernel.SetMemoryArgument (4, flagBuffer);

			// execute kernel
			var queue = new ComputeCommandQueue (Context, Device, ComputeCommandQueueFlags.None);

			long firstDim = joinedKeySpace.Count;
			var globalWorksize = new long[] { firstDim, 57 * 57, 57 * 57 };

			queue.Execute (Kernel, new long[] { 0, 0, 0 }, globalWorksize, null, null);

			byte[] passLen = new byte[1];

			queue.ReadFromBuffer (resultBuffer, ref resultData, true, null);
			queue.ReadFromBuffer (passLenBuffer, ref passLen, true, null);

			String password = null;

			if (passLen [0] > 0) {
				logger.Info ("pass len {0}", passLen [0]);
				password = Encoding.ASCII.GetString (resultData, 0, passLen [0]);
				logger.Info ("Found password: \"{0}\"", password);
			} else {
				logger.Info ("Password not found.");
			}

			queue.Finish ();

			return password;
		}

示例#35

0

显示文件

文件： OpenCLCalculatorFactory.cs 项目： sztupy/RestFract

            public unsafe void EndSend()
            {
                for (int i = 0; i < points.Count; i++)
                {
                  inx[i].x = (float)points[i].Item3.Real;
                  inx[i].y = (float)points[i].Item3.Imaginary;
                  inc[i].x = (float)points[i].Item4.Real;
                  inc[i].y = (float)points[i].Item4.Imaginary;
                }

                _krnl.SetMemoryArgument(0, x);
                _krnl.SetMemoryArgument(1, c);
                for (int i = 0; i < _ld.Count; i++)
                {
                  _krnl.SetMemoryArgument(2 + i, outp[i]);
                }

                ComputeCommandQueue command = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None);
                command.WriteToBuffer(inx, x, false, null);
                command.WriteToBuffer(inc, c, false, null);

                command.Execute(_krnl, null, new long[] { points.Count }, null, null);

                for (int i = 0; i < _ld.Count; i++)
                  command.ReadFromBuffer(outp[i], ref opl[i], false, null);

                command.Finish();

                output = new Queue<Tuple<int, int, List<ProcessLayer>>>();

                for (int i = 0; i < points.Count; i++)
                {
                  List<ProcessLayer> pl = new List<ProcessLayer>();
                  for (int ii = 0; ii < _ld.Count; ii++)
                  {
                ProcessLayer p = _ld[ii].Clone();
                p.c_active = opl[ii][i].c_active != 0;
                p.c_calc = opl[ii][i].c_calc;
                p.c_cmean = opl[ii][i].c_cmean;
                p.c_cvariance = opl[ii][i].c_cvariance;
                p.c_cvarsx = opl[ii][i].c_cvarsx;
                p.c_isin = opl[ii][i].c_isin != 0;
                p.c_n = opl[ii][i].c_n;
                p.c_old2x = new Complex(opl[ii][i].c_old2x.x,opl[ii][i].c_old2x.y);
                p.c_oldx = new Complex(opl[ii][i].c_oldx.x,opl[ii][i].c_oldx.y);
                p.c_resn = opl[ii][i].c_resn;
                p.c_resx = new Complex(opl[ii][i].c_resx.x,opl[ii][i].c_resx.y);
                p.c_x = new Complex(opl[ii][i].c_x.x,opl[ii][i].c_x.y);
                pl.Add(p);
                  }
                  output.Enqueue(Tuple.Create(points[i].Item1, points[i].Item2, pl));
                }
            }

示例#36

0

显示文件

文件： RenderPackage.cs 项目： khyperia/Clam

        private Bitmap Download(ComputeCommandQueue queue, ComputeBuffer<Vector4> buffer, int width, int height)
        {
            var pixels = new Vector4[width * height];
            queue.ReadFromBuffer(buffer, ref pixels, true, 0, 0, width * height, null);
            queue.Finish();

            var intPixels = Array.ConvertAll(pixels, pixel =>
            {
                pixel = Vector4.Clamp(pixel, new Vector4(0), new Vector4(1));
                return (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
            });
            var bmp = new Bitmap(width, height, PixelFormat.Format32bppRgb);
            var bmpData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            Marshal.Copy(intPixels, 0, bmpData.Scan0, intPixels.Length);
            bmp.UnlockBits(bmpData);
            return bmp;
        }

示例#37

0

显示文件

文件： OpenCLBuffer.cs 项目： Codzart/Parallel-Pollard-Rho

 public void CopyFromDevice()
 {
     queue.ReadFromBuffer(gpuBuffer, ref cpuBuffer, true, null);
 }

示例#38

0

显示文件

文件： Program.cs 项目： yeerkkiller1/Go-AI

        public static void CallOpenCL(int[,] libertyGroups, 
            int[,] groupNumbers, int x, int y, 
            int[] surroundingLibs, ref int emptySurroundings,
            int ourSgn, ref int duplicateGroups)
        {
            //Create arguments
            
            //Does not split yet
            //int[,] libertyGroups, 
            //int[,] groupNumbers, 
            //int x, 
            //int y, 
            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //int ourSgn, 
            
            //We have to map 2 dimension to 1 dimension            

            //Set arguments
            ComputeBuffer<int> libertyGroupsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(libertyGroups));
            openCLKernel.SetMemoryArgument(0, libertyGroupsIn);

            ComputeBuffer<int> groupNumbersIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(groupNumbers));
            openCLKernel.SetMemoryArgument(1, groupNumbersIn);

            openCLKernel.SetValueArgument<int>(2, x);
            openCLKernel.SetValueArgument<int>(3, y);
            
            ComputeBuffer<int> surroundingLibsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, surroundingLibs);
            openCLKernel.SetMemoryArgument(4, surroundingLibsIn);

            int[] emptySurroundRef = new int[1];
            ComputeBuffer<int> emptySurroundRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, emptySurroundRef);
            openCLKernel.SetMemoryArgument(5, emptySurroundRefIn);

            int[] duplicateGroupsRef = new int[1];
            ComputeBuffer<int> duplicateGroupsRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, duplicateGroupsRef);
            openCLKernel.SetMemoryArgument(6, duplicateGroupsRefIn);

            openCLKernel.SetValueArgument<int>(7, ourSgn);            

            //long localWorkSize = Math.Min(openCLDevice.MaxComputeUnits, sideSize);

            //Display input data

            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(openCLContext,
                openCLContext.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(openCLKernel, null,
                new long[] { 1 },
                new long[] { 1 }, null);

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //Read output data
            commands.ReadFromBuffer(surroundingLibsIn, ref surroundingLibs, true, null);
            commands.ReadFromBuffer(emptySurroundRefIn, ref emptySurroundRef, true, null); emptySurroundings = emptySurroundRef[0];
            commands.ReadFromBuffer(duplicateGroupsRefIn, ref duplicateGroupsRef, true, null); duplicateGroups = duplicateGroupsRef[0];

            //We could set blocking to false on reads and then read them all back in then, we could (possiblity) gain some performance
            //by telling it that commands can be executed out of order and then by queuing them up and calling Finish
            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
         //   openCLProgram.Dispose();            
            //display output data
            
            //Test are done by our caller now

            Console.WriteLine(executionTime / 10000.0);
        }

示例#39

0

显示文件

文件： CompareGPUCPU.cs 项目： arunganesan/hand-gesture-recognition

        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 640*480; // 
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                
                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                

                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);

                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                
                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("CompareGPUCPU");
                DateTime ExecutionStartTime; //Var will hold Execution Starting Time
                DateTime ExecutionStopTime;//Var will hold Execution Stopped Time
                TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero                
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
                
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                int repeatTimes = 100;
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    kernel.SetMemoryArgument(0, a);
                    //kernel.SetMemoryArgument(1, b);
                    //kernel.SetMemoryArgument(2, c);
                    kernel.SetMemoryArgument(1, c);

                    // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                    // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                    // For this reason their use should be avoided if possible.
                    //ComputeEventList eventList = new ComputeEventList();

                    // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                  

                    // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                    // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                    //commands.Execute(kernel, null, new long[] { count }, null, eventList);
                    commands.Execute(kernel, null, new long[] { count }, null, null);

                    // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                    // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                    // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                    // eventList will contain two events after this method returns.
                    //commands.ReadFromBuffer(c, ref arrC, false, eventList);
                    commands.ReadFromBuffer(c, ref arrC, false, null);

                    // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                    // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                    // to finish has to be issued before "arrC" can be used. 
                    // This explicit synchronization can be achieved in two ways:

                    // 1) Wait for the events in the list to finish,
                    //eventList.Wait();

                    // 2) Or simply use
                    commands.Finish();
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using GPU", perTaskTime);
 
                // Do that using CPU
                /*
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    for (int i = 0; i < count; i++)
                    {
                        //arrC[i] = arrA[i] + arrB[i];
                        int j;
                        for (j = 0; j < 330 * 10; j++)
                            arrC[i] = arrA[i] + j;
                    }
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString());
                 */
                log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }

示例#40

0

显示文件

文件： KernelManager.cs 项目： khyperia/Scatterlight

        public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender)
        {
            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            var computeBuffer = new ComputeBuffer<Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);
            var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None);

            var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight);

            for (var i = 0; i < slowRender; i++)
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            for (var i = 0; i < camera.Frame * slowRender; i++)
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);

            var pixels = new Vector4[screenshotWidth * screenshotHeight];
            queue.ReadFromBuffer(computeBuffer, ref pixels, true, null);
            queue.Finish();

            computeBuffer.Dispose();
            queue.Dispose();

            var bmp = new Bitmap(screenshotWidth, screenshotHeight);
            var destBuffer = new int[screenshotWidth * screenshotHeight];
            for (var y = 0; y < screenshotHeight; y++)
            {
                for (var x = 0; x < screenshotWidth; x++)
                {
                    var pixel = pixels[x + y * screenshotWidth];
                    if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                    {
                        Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!");
                        continue;
                    }
                    destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
                }
            }
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length);
            bmp.UnlockBits(bmpData);

            return bmp;
        }

示例#41

0

显示文件

        static void Main(string[] args)
        {
            //Test2();

            Test1();

            ComputePlatform plat = ComputePlatform.Platforms[0];

            Console.WriteLine("Plat:" + plat.Name);

            ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(plat), null, IntPtr.Zero);

            ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            StreamReader rs = new StreamReader("Foom/CL/testProg.txt");

            string clSrc = rs.ReadToEnd();

            rs.Close();

            ComputeProgram prog = new ComputeProgram(context, clSrc);

            prog.Build(null, null, null, IntPtr.Zero);

            Console.WriteLine("BS:" + prog.GetBuildStatus(context.Devices[0]).ToString());
            Console.WriteLine("Info:" + prog.GetBuildLog(context.Devices[0]));

            ComputeKernel kern = prog.CreateKernel("vector_add");

            int[] data = new int[1024];

            for (int i = 0; i < 1024; i++)
            {
                data[i] = 100;
            }

            ComputeBuffer <int> b1 = new ComputeBuffer <int>(context, ComputeMemoryFlags.CopyHostPointer, data);

            ComputeBuffer <int> b2 = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, 1024);

//            queue.WriteToBuffer<int>(data, b1, true, null);



            kern.SetMemoryArgument(0, b1);
            kern.SetMemoryArgument(1, b2);

            long[] wo = new long[1];
            wo[0] = 0;

            long[] ws = new long[1];
            ws[0] = 1024;

            long[] tc = new long[1];
            tc[0] = 16;

            queue.Execute(kern, wo, ws, tc, null);
            int c = Environment.TickCount;

            queue.Finish();

            c = Environment.TickCount - c;



            queue.ReadFromBuffer <int>(b2, ref data, true, null);


            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine("C:" + (int)data[i]);
            }

            Console.WriteLine("Done:" + c);


            while (true)
            {
            }
        }

示例#42

0

显示文件

文件： RenderPackage.cs 项目： khyperia/Clam

        public Bitmap Screenshot(int screenshotHeight, int slowRenderPower, Action<string> displayInformation)
        {
            displayInformation("Rendering screenshot");
            var ccontext = _kernel.ComputeContext;
            _kernelInUse++;

            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            Bitmap bmp;
            try
            {
                bmp = new Bitmap(screenshotWidth, screenshotHeight, PixelFormat.Format24bppRgb);
            }
            catch (ArgumentException)
            {
                MessageBox.Show("Image size too big", "Error");
                return null;
            }
            var nancount = 0;
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb);
            var scan0 = bmpData.Scan0.ToInt64();
            var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None);
            var localSize = _kernel.Threadsize(queue);
            for (var i = 0; i < localSize.Length; i++)
                localSize[i] *= slowRenderPower;
            var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, localSize[0] * localSize[1]);

            const int numFrames = 200;
            var frameDependantControls = _parameters as IFrameDependantControl;
            var framesToRender = frameDependantControls == null ? 1 : numFrames;

            var totalYs = (screenshotHeight + localSize[1] - 1) / localSize[1];
            var totalXs = (screenshotWidth + localSize[0] - 1) / localSize[0];
            var stopwatch = new Stopwatch();
            for (var y = 0; y < totalYs; y++)
            {
                for (var x = 0; x < totalXs; x++)
                {
                    stopwatch.Restart();
                    for (var frame = 0; frame < framesToRender; frame++)
                    {
                        if (frameDependantControls != null)
                            frameDependantControls.Frame = frame;
                        displayInformation(string.Format("Screenshot {0}% done", 100 * (y * totalXs * framesToRender + x * framesToRender + frame) / (totalXs * totalYs * framesToRender)));

                        _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight), slowRenderPower, new Size(x, y), (int)localSize[0]);
                    }

                    var pixels = new Vector4[localSize[0] * localSize[1]];
                    queue.ReadFromBuffer(computeBuffer, ref pixels, true, 0, 0, localSize[0] * localSize[1], null);
                    queue.Finish();

                    stopwatch.Stop();
                    var elapsed = stopwatch.Elapsed.TotalMilliseconds / framesToRender;
                    _kernel.AverageKernelTime = (elapsed + _kernel.AverageKernelTime * 4) / 5;

                    var blockWidth = Math.Min(localSize[0], screenshotWidth - x * localSize[0]);
                    var blockHeight = Math.Min(localSize[1], screenshotHeight - y * localSize[1]);
                    var intPixels = new byte[blockWidth * blockHeight * 3];
                    for (var py = 0; py < blockHeight; py++)
                    {
                        for (var px = 0; px < blockWidth; px++)
                        {
                            var pixel = pixels[py * localSize[1] + px];
                            if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                                nancount++;
                            // BGR
                            if (float.IsNaN(pixel.Z) == false)
                                intPixels[(py * blockWidth + px) * 3 + 0] = (byte)(pixel.Z * 255);
                            if (float.IsNaN(pixel.Y) == false)
                                intPixels[(py * blockWidth + px) * 3 + 1] = (byte)(pixel.Y * 255);
                            if (float.IsNaN(pixel.X) == false)
                                intPixels[(py * blockWidth + px) * 3 + 2] = (byte)(pixel.X * 255);
                        }
                    }
                    for (var line = 0; line < blockHeight; line++)
                        Marshal.Copy(intPixels, line * (int)blockWidth * 3, new IntPtr(scan0 + ((y * localSize[1] + line) * bmpData.Stride) + x * localSize[0] * 3), (int)blockWidth * 3);
                }
            }

            bmp.UnlockBits(bmpData);
            if (nancount != 0)
                MessageBox.Show(string.Format("Caught {0} NAN pixels while taking screenshot", nancount), "Warning");

            _kernelInUse--;
            return bmp;
        }

示例#43

0

显示文件

文件： Program.cs 项目： yeerkkiller1/Go-AI

        public static void Test()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            ComputeDevice device = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            ComputeProgram program = new ComputeProgram(context, source);
            program.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate");


            //Create arguments
            int sideSize = 4096;
            int[] inMatrixA = new int[sideSize * sideSize];
            int[] inMatrixB = new int[sideSize * sideSize];
            int[] outMatrixC = new int[sideSize * sideSize];
            Random random = new Random((int)DateTime.Now.Ticks);

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                    for (int x = 0; x < sideSize; x++)
                    {
                        inMatrixA[y * sideSize + x] = random.Next(3);
                        inMatrixB[y * sideSize + x] = random.Next(3);
                        outMatrixC[y * sideSize + x] = 0;
                    }


            ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixA);

            ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixB);

            ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, outMatrixC);

            long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);


            //Sets arguments
            kernel.SetMemoryArgument(0, bufferMatrixA);
            kernel.SetMemoryArgument(1, bufferMatrixB);
            kernel.SetMemoryArgument(2, bufferMatrixC);
            kernel.SetLocalArgument(3, sideSize * 2);
            kernel.SetValueArgument<int>(4, sideSize);
            //kernel.SetLocalArgument(1, localWorkSize);            

            string offset = " ";
            for (int x = 0; x < sideSize; x++)
                offset += "  ";

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    Console.Write(offset);
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixA[y * sideSize + x] + " ");
                    Console.WriteLine();
                }




            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(context,
                context.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(kernel, null,
                new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
                new long[] { localWorkSize, 1 }, null);

            //globalWorkSize can be any size
            //localWorkSize product much not be greater than device.MaxComputeUnits
            //and it must not be greater than kernel.GetWorkGroupSize()
            //ESSENTIALLY, the program iterates through globalWorkSize
            //in increments of localWorkSize. Both are multidimensional,
            //but this just saves us the time of doing that
            //(1 dimension can be put to multiple if the max dimension lengths
            //are known very easily with remainder).

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);

            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
            program.Dispose();

            Console.WriteLine();
            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixB[y * sideSize + x] + " ");
                    Console.Write(" ");
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(outMatrixC[y * sideSize + x] + " ");

                    Console.WriteLine();
                }

            int testY = random.Next(sideSize);
            int testX = random.Next(sideSize);

            int sum = 0;
            for (int q = 0; q < sideSize; q++)
                sum += inMatrixA[q * sideSize + testX] *
                    inMatrixB[testY * sideSize + q];

            Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);

            Console.WriteLine(executionTime / 10000.0);

        }

C# (CSharp) ComputeCommandQueue.ReadFromBuffer示例