Example #1
0
        // Wartosci min, max po OpenCL.
        public void GetMinMaxValuesCL(Mat frame, out int[] maxValues, out int[] minValues, int windowValue)
        {
            maxValues = null;
            minValues = null;


            try
            {
                MinMaxCL.UpdateArguments(frame, clooCtx, ctxMinMaxKernel, windowValue);

                // execute kernel
                queue.Execute(ctxMinMaxKernel, null, new long[] { frame.Cols }, null, null);

                // max Values.
                maxValues = new int[frame.Cols];
                GCHandle maxHandle = GCHandle.Alloc(maxValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.maxBufferCB, true, 0, maxValues.Length, maxHandle.AddrOfPinnedObject(), null);

                // min Values.
                minValues = new int[frame.Cols];
                GCHandle minHandle = GCHandle.Alloc(minValues, GCHandleType.Pinned);
                queue.Read(MinMaxCL.minBufferCB, true, 0, minValues.Length, minHandle.AddrOfPinnedObject(), null);

                // end opencl compute.
                queue.Finish();
            } catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
Example #2
0
        /// <summary>
        /// Gets the values for an explicit input
        /// </summary>
        /// <param name="input">The explicit input</param>
        /// <param name="output">The output values</param>
        public void GetValues(Single3[] input, ref float[] output)
        {
            if (context == null || kernelExplicit == null)
            {
                throw new Exception("Compile first!");
            }

            int inputLength = input.Length;

            // IO length changed
            if (lastLength != inputLength)
            {
                lastLength   = inputLength;
                outputBuffer = new Cloo.ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, inputLength);
                kernelExplicit.SetMemoryArgument(2, outputBuffer);
            }

            // Setup IO Buffers
            ComputeBuffer <Single3> bufIn = new Cloo.ComputeBuffer <Single3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input);

            // Arrange params
            kernelExplicit.SetMemoryArgument(0, bufIn);

            // Exec and read
            queue.Execute(kernelExplicit, null, new long[] { input.Length }, null, null);
            GCHandle outHandle = GCHandle.Alloc(output, GCHandleType.Pinned);

            queue.Read <float>(outputBuffer, true, 0, inputLength, outHandle.AddrOfPinnedObject(), null); // Read saves about 500 - 1000 ticks. Sweet for small queues
            outHandle.Free();

            queue.Finish();
        }
Example #3
0
        public void UnlockOpenGLObject(ComputeImage2D image)
        {
            queue.Finish();
            List <ComputeMemory> c = new List <ComputeMemory>()
            {
                image
            };

            queue.ReleaseGLObjects(c, null);
        }
Example #4
0
        public int[] ReadIntBuffer(string key, int length)
        {
            int[] rawBuffer = _intBuffers[key];

            if (HardwareAccelerationEnabled)
            {
                _commands.ReadFromBuffer(_intComputeBuffers[key], ref rawBuffer, true, 0, 0, length, null);

                _commands.Finish();
            }

            return(rawBuffer);
        }
Example #5
0
        protected override void RunInternal()
        {
            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.Profiling);

            Console.WriteLine("Original content:");

            Random rand = new Random();
            int count = 6;
            long[] bufferContent = new long[count];
            for (int i = 0; i < count; i++)
            {
                bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
                Console.WriteLine("\t" + bufferContent[i]);
            }

            ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
            IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);
            commands.Finish();

            Console.WriteLine("Mapped content:");

            for (int i = 0; i < bufferContent.Length; i++)
            {
                IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
                Console.WriteLine("\t" + Marshal.ReadInt64(ptr));
            }

            commands.Unmap(buffer, ref mappedPtr, null);
        }
Example #6
0
    static void Main(string[] args)
    {
        int[] r1 = new int[]
        { 8, 2, 3, 4 };
        int[] r2 = new int[]
        { 4, 3, 2, 5 };
        int[] r3      = new int[4];
        int   rowSize = r1.Length;
        // pick first platform
        ComputePlatform platform = ComputePlatform.Platforms[0];
        // create context with all gpu devices
        ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
                                                    new ComputeContextPropertyList(platform), null, IntPtr.Zero);
        // create a command queue with first gpu found
        ComputeCommandQueue queue = new ComputeCommandQueue(context,
                                                            context.Devices[0], ComputeCommandQueueFlags.None);
        // load opencl source and
        // create program with opencl source
        ComputeProgram program = new ComputeProgram(context, CalculateKernel);

        // compile opencl source
        program.Build(null, null, null, IntPtr.Zero);
        // load chosen kernel from program
        ComputeKernel kernel = program.CreateKernel("Calc");
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> row1Buffer = new ComputeBuffer <int>(context,
                                                                 ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> row2Buffer = new ComputeBuffer <int>(context,
                                                                 ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
        // allocate a memory buffer with the message (the int array)
        ComputeBuffer <int> resultBuffer = new ComputeBuffer <int>(context,
                                                                   ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]);

        kernel.SetMemoryArgument(0, row1Buffer);   // set the integer array
        kernel.SetMemoryArgument(1, row2Buffer);   // set the integer array
        kernel.SetValueArgument(2, rowSize);       // set the array size
        kernel.SetMemoryArgument(3, resultBuffer); // set the integer array
        // execute kernel
        queue.ExecuteTask(kernel, null);
        // wait for completion
        queue.Finish();
        GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned);

        queue.Read <int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null);
        Console.WriteLine("display result from gpu buffer:");
        for (int i = 0; i < r3.Length; i++)
        {
            Console.WriteLine(r3[i]);
        }
        arrCHandle.Free();
        row1Buffer.Dispose();
        row2Buffer.Dispose();
        kernel.Dispose();
        program.Dispose();
        queue.Dispose();
        context.Dispose();
        Console.WriteLine("Finished");
        Console.ReadKey();
    }
Example #7
0
        /// <summary>
        /// Finds the nonce for a block header hash that meets the given target.
        /// </summary>
        /// <param name="header">serialized block header</param>
        /// <param name="bits">the target</param>
        /// <param name="nonceStart">the first nonce value to try</param>
        /// <param name="iterations">the number of iterations</param>
        /// <returns></returns>
        public uint FindPow(byte[] header, byte[] bits, uint nonceStart, uint iterations)
        {
            if (this.computeDevice == null)
            {
                throw new InvalidOperationException("GPU not found");
            }

            this.ConstructOpenCLResources();

            using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header);
            using var bitsBuffer   = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits);
            using var powBuffer    = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1);

            this.computeKernel.SetMemoryArgument(0, headerBuffer);
            this.computeKernel.SetMemoryArgument(1, bitsBuffer);
            this.computeKernel.SetValueArgument(2, nonceStart);
            this.computeKernel.SetMemoryArgument(3, powBuffer);

            using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None);
            commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null);

            var nonceOut = new uint[1];

            commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null);
            commands.Finish();

            this.DisposeOpenCLResources();

            return(nonceOut[0]);
        }
Example #8
0
        public uint FindProofOfWork(byte[] header, byte[] bits, uint nonceStart, uint iterations, out long elapsedMilliseconds)
        {
            this.stopwatch.Restart();

            using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header);
            using var bitsBuffer   = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits);
            using var powBuffer    = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1);

            this.computeKernel.SetMemoryArgument(0, headerBuffer);
            this.computeKernel.SetMemoryArgument(1, bitsBuffer);
            this.computeKernel.SetValueArgument(2, nonceStart);
            this.computeKernel.SetMemoryArgument(3, powBuffer);

            using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None);
            commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null);

            var nonceOut = new uint[1];

            commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null);
            commands.Finish();

            elapsedMilliseconds = this.stopwatch.ElapsedMilliseconds;

            return(nonceOut[0]);
        }
Example #9
0
        public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b)
        {
            InitCloo();

            var ncols = result.GetUpperBound(0) + 1;
            var nrows = result.GetUpperBound(1) + 1;

            fixed(double *rp = result, ap = a, bp = b)
            {
                ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap);
                ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp);
                ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length);

                kernel.SetMemoryArgument(0, aBuffer);
                kernel.SetMemoryArgument(1, bBuffer);
                kernel.SetValueArgument(2, ncols);
                kernel.SetMemoryArgument(3, rBuffer);

                ComputeEventList events = new ComputeEventList();

                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                commands.Execute(kernel, null, new long[] { result.Length }, null, events);

                commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events);

                commands.Finish();
            }
        }
Example #10
0
        public override void Proccess()
        {
            // execute kernel
            queue.Execute(kernel, null, new long[] { DataGenerator.InputCount }, null, null);
            queue.Finish();

            /*
             * short[] results2 = new short[this.results.Length];
             * GCHandle arrCHandle = GCHandle.Alloc(results2, GCHandleType.Pinned);
             * queue.Read(result_dev, true, 0, DataFeeder.GetInputCount(), arrCHandle.AddrOfPinnedObject(), events);
             */

            //bool[] results2 = new bool[DataFeeder.GetInputCount()];
            queue.ReadFromBuffer(result_dev, ref resultsBytes, true, null);
            queue.ReadFromBuffer(resultCalc_dev, ref calculatables, true, null);
            //queue.ReadFromBuffer()

            /*
             * bool[] arrC = new bool[5];
             * GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);
             * queue.Read<bool>(result_dev, true, 0, 5, arrCHandle.AddrOfPinnedObject(), null);
             */
            // wait for completion
            //queue.Finish();

            //kernel.Dispose();
            //queue.Dispose();
            //context.Dispose();
        }
Example #11
0
        private double[] MatrixMultiply(Matrix <double> L, Matrix <double> R)
        {
            var L_array = L.To1D();
            var R_array = R.To1D();
            var O_array = new double[L.M * R.N];

            ComputeBuffer <double> a = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, L_array);
            ComputeBuffer <double> b = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, R_array);
            ComputeBuffer <double> c = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, O_array.Length);

            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);
            kernel.SetValueArgument(3, L.N);
            kernel.SetValueArgument(4, L.M);
            kernel.SetValueArgument(5, R.N);
            kernel.SetValueArgument(6, R.M);

            commands.Execute(kernel, null, new long[] { R.N, L.M }, null, null);

            commands.ReadFromBuffer(c, ref O_array, true, null);

            commands.Finish();

            // cleanup buffers
            a.Dispose();
            b.Dispose();
            c.Dispose();
            return(O_array);
        }
Example #12
0
        public static Tuple <List <List <int> >, TimeSpan> MultiplyParallel(List <List <int> > matrixOne, List <List <int> > matrixTwo)
        {
            if (!isRegularMatrix(matrixOne) || !isRegularMatrix(matrixTwo))
            {
                throw new ArgumentException("Non regular matrix detected. Rows size mismatch detected.");
            }
            if (matrixOne[0].Count != matrixTwo.Count)
            {
                throw new ArgumentException("Matrixes is not compatible. Columns count of first matrix is not equal to rows count of second matrix.");
            }
            List <List <int> > result   = new List <List <int> >();
            ComputePlatform    platform = GetGPU();

            if (platform is null)
            {
                throw new PlatformNotSupportedException("Platform doesn't have a dedicated GPU. Run is impossible.");
            }
            ComputeContext      context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero);
            ComputeCommandQueue queue   = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
            ComputeProgram      program = new ComputeProgram(context, CalculateKernel);

            program.Build(null, null, null, IntPtr.Zero);
            ComputeKernel kernel = program.CreateKernel("Multiply");

            List <ComputeBuffer <int> > rowsMatrixOne    = matrixOne.TransformMatrixToComputerBuffersOfRows(context);
            List <ComputeBuffer <int> > columnsMatrixTwo = matrixTwo.TransformMatrixToComputerBuffersOfColumns(context);
            List <ComputeBuffer <int> > resultRowsMatrix = TwoDToOneDResult(matrixOne.Count, matrixTwo[0].Count, context);

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            for (int i = 0; i < resultRowsMatrix.Count; ++i)
            {
                for (int j = 0; j < resultRowsMatrix[i].Count; ++j)
                {
                    kernel.SetMemoryArgument(0, rowsMatrixOne[i]);
                    kernel.SetMemoryArgument(1, columnsMatrixTwo[j]);
                    kernel.SetMemoryArgument(2, resultRowsMatrix[i]);
                    kernel.SetValueArgument(3, matrixTwo.Count);
                    kernel.SetValueArgument(4, j);

                    queue.ExecuteTask(kernel, null);
                }
            }

            queue.Finish();
            stopwatch.Stop();

            for (int i = 0; i < resultRowsMatrix.Count; ++i)
            {
                int[]    res      = new int[resultRowsMatrix[i].Count];
                GCHandle gCHandle = GCHandle.Alloc(res, GCHandleType.Pinned);
                queue.Read <int>(resultRowsMatrix[i], true, 0, res.Length, gCHandle.AddrOfPinnedObject(), null);
                result.Add(new List <int>(res));
            }

            return(new Tuple <List <List <int> >, TimeSpan>(result, stopwatch.Elapsed));
        }
Example #13
0
        public Bitmap ProcessImage(Bitmap inImage)
        {
            ComputeImage2D oclInImage  = null;
            ComputeImage2D oclOutImage = null;

            BitmapData inImageData = inImage.LockBits(new Rectangle(0, 0, inImage.Width, inImage.Height),
                                                      ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);

            Bitmap outImage = new Bitmap(inImage.Width, inImage.Height, PixelFormat.Format32bppArgb);

            BitmapData outImageData = outImage.LockBits(new Rectangle(0, 0, outImage.Width, outImage.Height),
                                                        ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);

            try
            {
                oclInImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.ReadOnly |
                                                ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra,
                                                                                                           ComputeImageChannelType.UNormInt8), inImage.Width, inImage.Height, 0, inImageData.Scan0);

                oclOutImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.WriteOnly |
                                                 ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra,
                                                                                                            ComputeImageChannelType.UNormInt8), outImage.Width, outImage.Height, 0, outImageData.Scan0);

                oclKernel.SetMemoryArgument(0, oclInImage);
                oclKernel.SetMemoryArgument(1, oclOutImage);

                oclCommandQueue.Execute(oclKernel, new long[] { 0, 0 }, new long[] { oclInImage.Width,
                                                                                     oclInImage.Height }, null, null);

                oclCommandQueue.Finish();

                oclCommandQueue.ReadFromImage(oclOutImage, outImageData.Scan0, true, null);
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                inImage.UnlockBits(inImageData);
                outImage.UnlockBits(outImageData);

                if (oclInImage != null)
                {
                    oclInImage.Dispose();
                    oclInImage = null;
                }

                if (oclOutImage != null)
                {
                    oclOutImage.Dispose();
                    oclOutImage = null;
                }
            }

            return(outImage);
        }
Example #14
0
        /// <summary>
        /// Subsequent calls to Invoke work faster without arguments
        /// </summary>
        public void Invoke(string Method, long Offset, long Worksize)
        {
            if (LastKernel == null)
            {
                throw new InvalidOperationException("You need to call Invoke with arguments before. All Arguments are saved");
            }

            ComputeEventList eventList = new ComputeEventList();

            InvokeStarted?.Invoke(this, EventArgs.Empty);

            queue.Execute(LastKernel, new long[] { Offset }, new long[] { Worksize }, null, eventList);

            eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, null);
            eventList[0].Aborted   += (sender, e) => EasyCL_Aborted(sender, Method);

            queue.Finish();
        }
Example #15
0
        public float[] MultiplyMatrices(float[] matrix1, float[] matrix2,
                                        int matrix1Height, int matrix1WidthMatrix2Height, int matrix2Width)
        {
            if (!_initialized)
            {
                Initialize();
                _initialized = true;
            }

            ComputeBuffer <float> matrix1Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix1);

            _kernel.SetMemoryArgument(0, matrix1Buffer);

            ComputeBuffer <float> matrix2Buffer = new ComputeBuffer <float>(_context,
                                                                            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer,
                                                                            matrix2);

            _kernel.SetMemoryArgument(1, matrix2Buffer);

            float[] ret = new float[matrix1Height * matrix2Width];
            ComputeBuffer <float> retBuffer = new ComputeBuffer <float>(_context,
                                                                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                                                                        ret);

            _kernel.SetMemoryArgument(2, retBuffer);

            _kernel.SetValueArgument <int>(3, matrix1WidthMatrix2Height);
            _kernel.SetValueArgument <int>(4, matrix2Width);

            _commandQueue.Execute(_kernel,
                                  new long[] { 0 },
                                  new long[] { matrix2Width, matrix1Height },
                                  null, null);

            unsafe
            {
                fixed(float *retPtr = ret)
                {
                    _commandQueue.Read(retBuffer,
                                       false, 0,
                                       ret.Length,
                                       new IntPtr(retPtr),
                                       null);

                    _commandQueue.Finish();
                }
            }

            matrix1Buffer.Dispose();
            matrix2Buffer.Dispose();
            retBuffer.Dispose();

            return(ret);
        }
        public void Can_copy()
        {
            var platform = ComputePlatform.Platforms.First();
            var device   = platform.Devices.First();
            var context  = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null, IntPtr.Zero);

            using var program = new ComputeProgram(context, Copy_kernel);

            try
            {
                program.Build(new[] { device }, "-cl-std=CL1.2", null, IntPtr.Zero);
            }
            catch (Exception ex)
            {
                OnProgramBuilt(program, device);
                return;
            }

            using var queue  = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None);
            using var kernel = program.CreateKernel("copy");

            var sourcePath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "sample00.png"));

            using var sourceImage = (Bitmap)Image.FromFile(sourcePath);

            var w = sourceImage.Width;
            var h = sourceImage.Height;

            var source = sourceImage.ToBytes();

            source.SaveFormatted("source.txt", w, h, channels: 4);
            using var sourceBuffer = context.CreateImage2D(source, w, h);

            var target = new byte[h * w * 4];

            using var targetBuffer = context.CreateBuffer(target);

            kernel.SetMemoryArgument(0, sourceBuffer);
            kernel.SetMemoryArgument(1, targetBuffer);

            queue.Execute(kernel, null, new long[] { w, h }, null, null);
            queue.Finish();

            target.SaveFormatted("target.txt", w, h, channels: 4);

            var result     = target.ToBitmap(w, h, 4);
            var resultPath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "copy.png"));

            result.Save(resultPath);

            Run("source.txt");
            Run("target.txt");
            Run(resultPath);
        }
Example #17
0
        protected T[] InternalExecuteOpencl <T>(
            String source,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams)
            where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice);

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);

            String         updatedSource = "#define OpenCL\r\n" + source;
            ComputeProgram program       = new ComputeProgram(queue.Context, updatedSource);

            program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            T[] resultBuffer = new T[bufferSize];

            ComputeBuffer <T>    resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize);
            List <ComputeMemory> vars            = new List <ComputeMemory>();

            vars.Add(resultBufferVar);
            vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context));

            ComputeKernel kernel = program.CreateKernel(function);

            for (int i = 0; i < vars.Count; i++)
            {
                kernel.SetMemoryArgument(i, vars[i]);
            }

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            long[] workersGlobal = new long[2] {
                loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height
            };
            queue.Execute(kernel, null, workersGlobal, null, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

            queue.Finish();
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

            return(resultBuffer);
        }
Example #18
0
        /// <summary>
        /// Ignores previously set parameters set by OpenCL.SetArgument, is slower than Execute
        /// </summary>
        public void Execute(long Offset, long Worksize, long Localsize, params object[] parameter)
        {
            if (parameter == null || parameter.Length == 0)
            {
                SetArgs();
            }
            else
            {
                SetParameter(parameter);
                SetArgs();
            }

            if (Localsize == -1)
            {
                queue.Execute(kernel, new long[] { Offset }, new long[] { Worksize }, null, null);
            }
            else
            {
                queue.Execute(kernel, new long[] { Offset }, new long[] { Worksize }, new long[] { Localsize }, null);
            }

            queue.Finish();
        }
Example #19
0
        public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender)
        {
            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            var computeBuffer   = new ComputeBuffer <Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);
            var queue           = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None);

            var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight);

            for (var i = 0; i < slowRender; i++)
            {
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            }
            for (var i = 0; i < camera.Frame * slowRender; i++)
            {
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            }

            var pixels = new Vector4[screenshotWidth * screenshotHeight];

            queue.ReadFromBuffer(computeBuffer, ref pixels, true, null);
            queue.Finish();

            computeBuffer.Dispose();
            queue.Dispose();

            var bmp        = new Bitmap(screenshotWidth, screenshotHeight);
            var destBuffer = new int[screenshotWidth * screenshotHeight];

            for (var y = 0; y < screenshotHeight; y++)
            {
                for (var x = 0; x < screenshotWidth; x++)
                {
                    var pixel = pixels[x + y * screenshotWidth];
                    if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                    {
                        Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!");
                        continue;
                    }
                    destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
                }
            }
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);

            Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length);
            bmp.UnlockBits(bmpData);

            return(bmp);
        }
Example #20
0
        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="args"></param>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute(string functionName, params object[] args)
        {
            ValidateArgs(functionName, args);

            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                var  ndobject = (Array)args.FirstOrDefault(x => (x.GetType().IsArray));
                long length   = ndobject != null ? ndobject.Length : 1;

                var method = KernelFunctions.FirstOrDefault(x => (x.Name == functionName));

                var buffers = BuildKernelArguments(method, args, kernel, length);
                commands.Execute(kernel, null, new long[] { length }, null, null);

                for (int i = 0; i < args.Length; i++)
                {
                    if (!args[i].GetType().IsArray)
                    {
                        continue;
                    }

                    var ioMode = method.Parameters.ElementAt(i).Value.IOMode;
                    if (ioMode == IOMode.InOut || ioMode == IOMode.Out)
                    {
                        Array r = (Array)args[i];
                        commands.ReadFromMemory(buffers[i], ref r, true, 0, null);
                    }
                    buffers[i].Dispose();
                }
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Finish();
                commands.Dispose();
            }
        }
Example #21
0
        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done  = new List <QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();

            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer <QueenTask>(context,
                                                  ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                                                  inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                {
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);
                }

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                        .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }
Example #22
0
        public void GPUCopy(byte[] origin, int originOffset, byte[] dest, int destOffset, int lenght)
        {
            ComputeBuffer <byte> originBuffer, destBuffer;

            originBuffer = new ComputeBuffer <byte>(context,
                                                    ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, origin);
            destBuffer = new ComputeBuffer <byte>(context,
                                                  ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, dest);

            kernel.SetMemoryArgument(0, originBuffer);
            kernel.SetValueArgument(1, originOffset);
            kernel.SetMemoryArgument(2, destBuffer);
            kernel.SetValueArgument(3, destOffset);

            queue.Execute(kernel, new long[] { 0 }, new long[] { lenght }, null, new ComputeEventList());
            queue.Finish();
        }
Example #23
0
 public void FindPoints(byte[] baseImage, byte[] nextImage, int[] X, int[] Y, int searchDelta, int subsetDelta, int BitmapWidth, int BitmapHeight, int PointsinX, int PointsinY)
 {
     using var commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
     using ComputeBuffer <byte> baseImageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, baseImage);
     using ComputeBuffer <byte> nextImageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, nextImage);
     using ComputeBuffer <int> XBuffer          = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, X);
     using ComputeBuffer <int> YBuffer          = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, Y);
     kernel.SetMemoryArgument(0, baseImageBuffer);
     kernel.SetMemoryArgument(1, nextImageBuffer);
     kernel.SetMemoryArgument(2, XBuffer);
     kernel.SetMemoryArgument(3, YBuffer);
     kernel.SetValueArgument(4, searchDelta);
     kernel.SetValueArgument(5, subsetDelta);
     kernel.SetValueArgument(6, BitmapHeight);
     kernel.SetValueArgument(7, PointsinX);
     commands.Execute(kernel, null, new long[] { PointsinX, PointsinY }, null, null);
     commands.Finish();
 }
Example #24
0
        private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
        {
            var todos = GetQueenTaskPartition(NumQueens, 4);
            var done = new List<QueenTask>();

            ComputeEventList eventList = new ComputeEventList();

            var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);

            Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);

            QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);

            var sw = new Stopwatch();
            sw.Start();

            while (inProgress.Any())
            {
                var taskBuffer =
                    new ComputeBuffer<QueenTask>(context,
                        ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
                        inProgress);

                kernel.SetMemoryArgument(0, taskBuffer);
                commands.WriteToBuffer(inProgress, taskBuffer, false, null);

                for (int i = 0; i < 12; i++)
                    commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);

                commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
                commands.Finish();

                inProgress = GetNextAssignment(inProgress, todos, done);
            }

            sw.Stop();

            Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);

            ulong sum = done.Select(state => state.solutions)
                            .Aggregate((total, next) => total + next);

            Console.WriteLine("Q({0})={1}", NumQueens, sum);
        }
Example #25
0
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                log.WriteLine("Original content:");

                Random rand          = new Random();
                int    count         = 6;
                long[] bufferContent = new long[count];
                for (int i = 0; i < count; i++)
                {
                    bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
                    log.WriteLine("\t" + bufferContent[i]);
                }

                ComputeBuffer <long> buffer = new ComputeBuffer <long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);

                IntPtr mappedPtr = commands.Map(buffer, true, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);

                log.WriteLine("Mapped content:");

                for (int i = 0; i < bufferContent.Length; i++)
                {
                    IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
                    log.WriteLine("\t" + Marshal.ReadInt64(ptr));
                }

                commands.Unmap(buffer, ref mappedPtr, null);

                // wait for the unmap to happen
                commands.Finish();
                // cleanup buffer
                buffer.Dispose();
                // cleanup commands
                commands.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Example #26
0
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                log.WriteLine("Original content:");

                Random rand = new Random();
                int count = 6;
                long[] bufferContent = new long[count];
                for (int i = 0; i < count; i++)
                {
                    bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
                    log.WriteLine("\t" + bufferContent[i]);
                }

                ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
                
                IntPtr mappedPtr = commands.Map(buffer, true, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);

                log.WriteLine("Mapped content:");

                for (int i = 0; i < bufferContent.Length; i++)
                {
                    IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
                    log.WriteLine("\t" + Marshal.ReadInt64(ptr));
                }

                commands.Unmap(buffer, ref mappedPtr, null);

                // wait for the unmap to happen
                commands.Finish();
                // cleanup buffer
                buffer.Dispose();
                // cleanup commands
                commands.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Example #27
0
        public override void RunKernel(ComputeContext context, ComputeKernel kernel, ComputeCommandQueue commands, long[] dimensions)
        {
            var tradeprofitsBuffer   = new ComputeBuffer <short>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeProfits);
            var tradearbitrageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeArbitrage);
            var fit_functionBuffer   = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, FitFunction);

            kernel.SetMemoryArgument(0, allvarsBuffer);
            kernel.SetMemoryArgument(1, loboundsBuffer);
            kernel.SetMemoryArgument(2, upboundsBuffer);
            kernel.SetMemoryArgument(3, tradeprofitsBuffer);
            kernel.SetMemoryArgument(4, tradearbitrageBuffer);
            kernel.SetMemoryArgument(5, fit_functionBuffer);
            kernel.SetValueArgument <int>(6, VariablesCount);

            var eventList = new ComputeEventList();

            commands.Execute(kernel, null, dimensions, null, eventList);
            commands.ReadFromBuffer(fit_functionBuffer, ref FitFunction, true, null);
            commands.Finish();
        }
Example #28
0
        public static void Run(TextWriter log, ComputeContext context)
        {
            StartTest(log, "Dummy test");

            try
            {
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                log.WriteLine("Original content:");

                Random rand = new Random();
                int count = 6;
                long[] bufferContent = new long[count];
                for (int i = 0; i < count; i++)
                {
                    bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
                    log.WriteLine("\t" + bufferContent[i]);
                }

                ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
                IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);
                commands.Finish();

                log.WriteLine("Mapped content:");

                for (int i = 0; i < bufferContent.Length; i++)
                {
                    IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
                    log.WriteLine("\t" + Marshal.ReadInt64(ptr));
                }

                commands.Unmap(buffer, ref mappedPtr, null);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }

            EndTest(log, "Dummy test");
        }
Example #29
0
 public void Draw(Action <ComputeBuffer <Vector4>, ComputeCommandQueue> renderer)
 {
     GL.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit);
     GL.Finish();
     _queue.AcquireGLObjects(new[] { _openCl }, null);
     renderer(_openCl, _queue);
     _queue.ReleaseGLObjects(new[] { _openCl }, null);
     _queue.Finish();
     GL.BindBuffer(BufferTarget.PixelUnpackBuffer, _pub);
     GL.BindTexture(TextureTarget.Texture2D, _texture);
     GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, _width, _height, PixelFormat.Rgba, PixelType.Float, IntPtr.Zero);
     GL.Begin(BeginMode.Quads);
     GL.TexCoord2(0f, 1f);
     GL.Vertex3(0f, 0f, 0f);
     GL.TexCoord2(0f, 0f);
     GL.Vertex3(0f, 1f, 0f);
     GL.TexCoord2(1f, 0f);
     GL.Vertex3(1f, 1f, 0f);
     GL.TexCoord2(1f, 1f);
     GL.Vertex3(1f, 0f, 0f);
     GL.End();
 }
Example #30
0
        public static void RunKernels(int nofKernels)
        {
            try
            {
                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();

                commandQueue1.WriteToBuffer(input, CB_input, false, eventList);
                commandQueue1.WriteToBuffer(weightIDs, CB_networkIndex, false, eventList);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.

                commandQueue1.Execute(kernel, null, new long[] { nofKernels }, null, eventList);

                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commandQueue1.ReadFromBuffer(CB_output, ref output, false, eventList); // , eventList

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands
                // to finish has to be issued before "arrC" can be used.
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                eventList.Wait();

                // 2) Or simply use
                commandQueue1.Finish();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }
Example #31
0
        /// <summary>
        /// Executes the specified kernel function name.
        /// </summary>
        /// <typeparam name="TSource">The type of the source.</typeparam>
        /// <param name="functionName">Name of the function.</param>
        /// <param name="inputs">The inputs.</param>
        /// <param name="returnInputVariable">The return result.</param>
        /// <returns></returns>
        /// <exception cref="ExecutionException">
        /// </exception>
        public override void Execute <TSource>(string functionName, params object[] args)
        {
            ComputeKernel       kernel   = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName));
            ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None);

            if (kernel == null)
            {
                throw new ExecutionException(string.Format("Kernal function {0} not found", functionName));
            }

            try
            {
                var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[])));

                long length = ndobject != null ? ndobject.Length : 1;

                var buffers = BuildKernelArguments <TSource>(args, kernel, length);
                commands.Execute(kernel, null, new long[] { length }, null, null);

                foreach (var item in buffers)
                {
                    TSource[] r = (TSource[])args[item.Key];
                    commands.ReadFromBuffer(item.Value, ref r, true, null);
                    //args[item.Key] = r;
                    item.Value.Dispose();
                }

                commands.Finish();
            }
            catch (Exception ex)
            {
                throw new ExecutionException(ex.Message);
            }
            finally
            {
                commands.Dispose();
            }
        }
Example #32
0
        private void Run(OpenCLInfo[] info, OpenCLPointInfo[] points, OpenCLNote[] envs, float[] result)
        {
            //var s = sw.ElapsedMilliseconds;

            ComputeBuffer <OpenCLInfo>      infoBuffer   = new ComputeBuffer <OpenCLInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, info);
            ComputeBuffer <OpenCLPointInfo> pointsBuffer = new ComputeBuffer <OpenCLPointInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, points);

            ComputeBuffer <OpenCLNote> envsBuffer   = new ComputeBuffer <OpenCLNote>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, envs);
            ComputeBuffer <float>      resultBuffer = new ComputeBuffer <float>(FContext, ComputeMemoryFlags.WriteOnly, result.Length);

            kernel.SetMemoryArgument(0, infoBuffer);
            kernel.SetMemoryArgument(1, pointsBuffer);
            // 2 - FWaveformBuffer
            kernel.SetMemoryArgument(3, envsBuffer);
            kernel.SetMemoryArgument(4, resultBuffer);

            //var f = sw.ElapsedMilliseconds;

            commands.Execute(kernel, null, new long[] { result.Length / 2 }, null, null);

            //var e = sw.ElapsedMilliseconds;

            commands.ReadFromBuffer(resultBuffer, ref result, true, null);

            //var r = sw.ElapsedMilliseconds;

            commands.Finish();

            infoBuffer.Dispose();
            pointsBuffer.Dispose();
            envsBuffer.Dispose();
            resultBuffer.Dispose();

            //var d = sw.ElapsedMilliseconds;

            //if (App.DebugMode)
            //	Debug.WriteLine($"OpenCLWaveProvider.Run: tot: {d - s}");
        }
 static void Main(string[] args)
     {
         int[] r1 = new int[]
             {1, 2, 3, 4};
         int[] r2 = new int[]
             {4, 3, 2, 1};
         int rowSize = r1.Length;
         // pick first platform
         ComputePlatform platform = ComputePlatform.Platforms[0];
         // create context with all gpu devices
         ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
             new ComputeContextPropertyList(platform), null, IntPtr.Zero);
         // create a command queue with first gpu found
         ComputeCommandQueue queue = new ComputeCommandQueue(context,
             context.Devices[0], ComputeCommandQueueFlags.None);
         // load opencl source and
         // create program with opencl source
         ComputeProgram program = new ComputeProgram(context, CalculateKernel);
         // compile opencl source
         program.Build(null, null, null, IntPtr.Zero);
         // load chosen kernel from program
         ComputeKernel kernel = program.CreateKernel("Calc");
         // allocate a memory buffer with the message (the int array)
         ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
             ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
         // allocate a memory buffer with the message (the int array)
         ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
             ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
         kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
         kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
         kernel.SetValueArgument(2, rowSize); // set the array size
             // execute kernel
         queue.ExecuteTask(kernel, null);
         // wait for completion
         queue.Finish();
         Console.WriteLine("Finished");
         Console.ReadKey();
     }
Example #34
0
        /// <summary>
        /// Run kernel against all elements.
        /// </summary>
        /// <typeparam name="TSource">Struct type that corresponds to kernel function type</typeparam>
        /// <param name="array">Array of elements to process</param>
        /// <param name="kernelCode">The code of kernel function</param>
        /// <param name="kernelSelector">Method that selects kernel by function name; if null uses first</param>
        /// <param name="deviceSelector">Method that selects device by index, description, OpenCL version; if null uses first</param>
        public static void ClooForEach <TSource>(this TSource[] array, string kernelCode, Func <string, bool> kernelSelector = null, Func <int, string, Version, bool> deviceSelector = null) where TSource : struct
        {
            kernelSelector = kernelSelector ?? ((k) => true);
            deviceSelector = deviceSelector ?? ((i, d, v) => true);

            var device = ComputePlatform.Platforms.SelectMany(p => p.Devices).Where((d, i) => deviceSelector(i, $"{d.Name} {d.DriverVersion}", d.Version)).First();

            var properties = new ComputeContextPropertyList(device.Platform);

            using (var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero))
                using (var program = new ComputeProgram(context, kernelCode))
                {
                    program.Build(new[] { device }, null, null, IntPtr.Zero);

                    var kernels = program.CreateAllKernels().ToList();
                    try
                    {
                        var kernel = kernels.First((k) => kernelSelector(k.FunctionName));

                        using (var primesBuffer = new ComputeBuffer <TSource>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, array))
                        {
                            kernel.SetMemoryArgument(0, primesBuffer);

                            using (var queue = new ComputeCommandQueue(context, context.Devices[0], 0))
                            {
                                queue.Execute(kernel, null, new long[] { primesBuffer.Count }, null, null);
                                queue.Finish();

                                queue.ReadFromBuffer(primesBuffer, ref array, true, null);
                            }
                        }
                    }
                    finally
                    {
                        kernels.ForEach(k => k.Dispose());
                    }
                }
        }
Example #35
0
        /// <summary>
        /// OpenCLでの計算プログラムを作成する
        /// </summary>
        /// <param name="maxDt">初期時間刻み</param>
        /// <param name="a">振幅</param>
        /// <param name="omega">角速度</param>
        public ComputerCL(double maxDt, double a, double omega)
            : base(maxDt, a, omega)
        {
            // プラットフォームとデバイス群を取得
            this.Platform = ComputePlatform.Platforms[0];
            this.Devices = this.Platform.Devices;

            // コンテキストを作成
            var context = new ComputeContext(this.Devices, new ComputeContextPropertyList(this.Platform), null, IntPtr.Zero);

            // キューを作成
            this.queue = new ComputeCommandQueue(context, this.Devices[0], ComputeCommandQueueFlags.None);

            // プログラムを作成
            var program = new ComputeProgram(context, Properties.Resources.SinAcceleration);

            // ビルドしてみて
            try
            {
                program.Build(this.Devices, null, null, IntPtr.Zero);
            }
            // 失敗したら
            catch(BuildProgramFailureComputeException ex)
            {
                // 例外を投げる
                throw new BuildCLException(program.Source[0], program.GetBuildLog(this.Devices[0]));
            }

            // カーネルを作成
            this.sinAccelerationKernel = program.CreateKernel("SinAcceleration");

            // 準備処理は何もしない
            this.prepare = () => { };

            // 粒子が追加された時に
            base.ParticleAdded += (sender, e) =>
            {
                // 準備処理の時の処理を実装
                this.prepare = () =>
                {
                    // 粒子数を設定
                    this.particleCount = this.inputParticles.Count;

                    // バッファーを作成
                    this.bufferX = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferU = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferA = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
                    this.bufferD = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly, this.particleCount);

                    // 入力データを確保
                    var particlesX = new Vector4[this.particleCount];
                    var particlesU = new Vector4[this.particleCount];
                    var particlesA = new Vector4[this.particleCount];
                    this.particlesD = new float[this.particleCount];
                    this.particlesMaterial = new Material[this.particleCount];
                    this.particlesType = new ParticleType[this.particleCount];

                    // 全粒子について
                    int i = 0;
                    foreach(var particle in this.inputParticles)
                    {
                        // データをコピー
                        particlesX[i] = new Vector4((Vector3)particle.X, 0);
                        particlesU[i] = new Vector4((Vector3)particle.U, 0);
                        particlesA[i] = new Vector4((Vector3)particle.A, 0);
                        this.particlesD[i] = (float)particle.D;
                        this.particlesMaterial[i] = particle.Material;
                        this.particlesType[i] = particle.Type;

                        i++;
                    }

                    // バッファーへ転送
                    this.queue.WriteToBuffer(particlesX, this.bufferX, false, null);
                    this.queue.WriteToBuffer(particlesU, this.bufferU, false, null);
                    this.queue.WriteToBuffer(particlesA, this.bufferA, false, null);
                    this.queue.WriteToBuffer(this.particlesD, this.bufferD, false, null);

                    // 入力粒子群を空にする
                    this.inputParticles.Clear();

                    // 準備処理は空
                    this.prepare = () => { };

                    // ここまで完了を待機
                    queue.Finish();
                };
            };
        }
            public unsafe void EndSend()
            {
                for (int i = 0; i < points.Count; i++)
                {
                  inx[i].x = (float)points[i].Item3.Real;
                  inx[i].y = (float)points[i].Item3.Imaginary;
                  inc[i].x = (float)points[i].Item4.Real;
                  inc[i].y = (float)points[i].Item4.Imaginary;
                }

                _krnl.SetMemoryArgument(0, x);
                _krnl.SetMemoryArgument(1, c);
                for (int i = 0; i < _ld.Count; i++)
                {
                  _krnl.SetMemoryArgument(2 + i, outp[i]);
                }

                ComputeCommandQueue command = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None);
                command.WriteToBuffer(inx, x, false, null);
                command.WriteToBuffer(inc, c, false, null);

                command.Execute(_krnl, null, new long[] { points.Count }, null, null);

                for (int i = 0; i < _ld.Count; i++)
                  command.ReadFromBuffer(outp[i], ref opl[i], false, null);

                command.Finish();

                output = new Queue<Tuple<int, int, List<ProcessLayer>>>();

                for (int i = 0; i < points.Count; i++)
                {
                  List<ProcessLayer> pl = new List<ProcessLayer>();
                  for (int ii = 0; ii < _ld.Count; ii++)
                  {
                ProcessLayer p = _ld[ii].Clone();
                p.c_active = opl[ii][i].c_active != 0;
                p.c_calc = opl[ii][i].c_calc;
                p.c_cmean = opl[ii][i].c_cmean;
                p.c_cvariance = opl[ii][i].c_cvariance;
                p.c_cvarsx = opl[ii][i].c_cvarsx;
                p.c_isin = opl[ii][i].c_isin != 0;
                p.c_n = opl[ii][i].c_n;
                p.c_old2x = new Complex(opl[ii][i].c_old2x.x,opl[ii][i].c_old2x.y);
                p.c_oldx = new Complex(opl[ii][i].c_oldx.x,opl[ii][i].c_oldx.y);
                p.c_resn = opl[ii][i].c_resn;
                p.c_resx = new Complex(opl[ii][i].c_resx.x,opl[ii][i].c_resx.y);
                p.c_x = new Complex(opl[ii][i].c_x.x,opl[ii][i].c_x.y);
                pl.Add(p);
                  }
                  output.Enqueue(Tuple.Create(points[i].Item1, points[i].Item2, pl));
                }
            }
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 640*480; // 
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                
                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                

                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);

                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                
                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("CompareGPUCPU");
                DateTime ExecutionStartTime; //Var will hold Execution Starting Time
                DateTime ExecutionStopTime;//Var will hold Execution Stopped Time
                TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero                
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
                
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                int repeatTimes = 100;
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    kernel.SetMemoryArgument(0, a);
                    //kernel.SetMemoryArgument(1, b);
                    //kernel.SetMemoryArgument(2, c);
                    kernel.SetMemoryArgument(1, c);

                    // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                    // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                    // For this reason their use should be avoided if possible.
                    //ComputeEventList eventList = new ComputeEventList();

                    // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                  

                    // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                    // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                    //commands.Execute(kernel, null, new long[] { count }, null, eventList);
                    commands.Execute(kernel, null, new long[] { count }, null, null);

                    // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                    // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                    // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                    // eventList will contain two events after this method returns.
                    //commands.ReadFromBuffer(c, ref arrC, false, eventList);
                    commands.ReadFromBuffer(c, ref arrC, false, null);

                    // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                    // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                    // to finish has to be issued before "arrC" can be used. 
                    // This explicit synchronization can be achieved in two ways:

                    // 1) Wait for the events in the list to finish,
                    //eventList.Wait();

                    // 2) Or simply use
                    commands.Finish();
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using GPU", perTaskTime);
 
                // Do that using CPU
                /*
                ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
                for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
                {
                    for (int i = 0; i < count; i++)
                    {
                        //arrC[i] = arrA[i] + arrB[i];
                        int j;
                        for (j = 0; j < 330 * 10; j++)
                            arrC[i] = arrA[i] + j;
                    }
                }
                ExecutionStopTime = DateTime.Now;
                ExecutionTime = ExecutionStopTime - ExecutionStartTime;
                perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
                log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString());
                 */
                log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]);
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Example #38
0
        private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr)
        {
            uint[] dst = new uint[16];

            fixed (uint* dstPtr = dst)
            {
                using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None))
                {
                    var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16);

                    var kernel = program.CreateKernel("test");
                    kernel.SetValueArgument(0, 1443351125U);
                    kernel.SetMemoryArgument(1, buf);

                    var eventList = new ComputeEventList();

                    queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null);
                    queue.Finish();
                    queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null);
                    queue.Finish();
                    queue.Finish();
                }
            }
        }
Example #39
0
        public void TakeGif(IGifableControl control, Action<string> displayInformation)
        {
            _kernelInUse++;
            var encoder = new AnimatedGifEncoder();
            encoder.Start(Ext.UniqueFilename("sequence", "gif"));
            encoder.SetDelay(1000 / StaticSettings.Fetch.GifFramerate);
            encoder.SetRepeat(0);
            var endIgnoreControl = control.StartIgnoreControl();

            var ccontext = _kernel.ComputeContext;
            var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None);
            var screenshotHeight = StaticSettings.Fetch.GifHeight;
            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);

            var fdc = control as IFrameDependantControl;
            for (var i = 0; i < StaticSettings.Fetch.GifFramecount + 1; i++)
            {
                if (fdc != null)
                    fdc.Frame = i;
                var teardown = control.SetupGif((double)i / StaticSettings.Fetch.GifFramecount);
                _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight));
                queue.Finish();
                teardown();
            }
            for (var i = 1; i < StaticSettings.Fetch.GifFramecount + 1; i++)
            {
                if (fdc != null)
                    fdc.Frame = i;
                displayInformation(string.Format("{0}% done with gif", (int)(100.0 * (i - 1) / StaticSettings.Fetch.GifFramecount)));
                var teardown = control.SetupGif((double)(i - 1) / StaticSettings.Fetch.GifFramecount);
                _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight));
                if (encoder.AddFrame(Download(queue, computeBuffer, screenshotWidth, screenshotHeight)) == false)
                    throw new Exception("Could not add frame to gif");
                teardown();
            }
            endIgnoreControl();
            encoder.Finish();
            computeBuffer.Dispose();
            queue.Dispose();
            displayInformation("Done with gif");
            _kernelInUse--;
        }
Example #40
0
        public static void Calculate(List<Calculation> calculations)
        {
            Stopwatch s = new Stopwatch();
            s.Start();

            int count = calculations.Count;

            IntVec2[] p_p = new IntVec2[count];

            IntVec2[] p_a = new IntVec2[count];
            IntVec2[] p_b = new IntVec2[count];
            IntVec2[] p_c = new IntVec2[count];

            FloatVec3[] c = new FloatVec3[count];

            int[] c_valid = new int[count];

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];

                p_p[i] = new IntVec2(calc.P);
                p_a[i] = new IntVec2(calc.A);
                p_b[i] = new IntVec2(calc.B);
                p_c[i] = new IntVec2(calc.C);
            });

            mark(s, "memory init");

            ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p);

            ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a);

            ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b);

            ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c);

            ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length);
            ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length);

            mark(s, "memory buffer init");

            ComputeKernel kernel = program.CreateKernel("Barycentric");
            kernel.SetMemoryArgument(0, _p_p);

            kernel.SetMemoryArgument(1, _p_a);

            kernel.SetMemoryArgument(2, _p_b);

            kernel.SetMemoryArgument(3, _p_c);

            kernel.SetMemoryArgument(4, _c);
            kernel.SetMemoryArgument(5, _c_valid);

            mark(s, "memory init 2");

            ComputeEventList eventList = new ComputeEventList();

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            commands.Execute(kernel, null, new long[] { count }, null, eventList);

            mark(s, "execute");

            commands.ReadFromBuffer(_c, ref c, false, eventList);
            commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList);
            commands.Finish();

            mark(s, "read 1");

            Parallel.For(0, count, i =>
            {
                var calc = calculations[i];
                calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W);

                if (c_valid[i] == 1)
                {
                    lock (calc.Tri)
                        calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P));
                }
            });

            mark(s, "read 2");

            // cleanup commands
            commands.Dispose();

            // cleanup events
            foreach (ComputeEventBase eventBase in eventList)
            {
                eventBase.Dispose();
            }
            eventList.Clear();

            // cleanup kernel
            kernel.Dispose();

            _p_p.Dispose();

            _p_a.Dispose();
            _p_b.Dispose();
            _p_c.Dispose();

            _c.Dispose();
            _c_valid.Dispose();

            mark(s, "dispose");
        }
Example #41
0
        public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender)
        {
            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            var computeBuffer = new ComputeBuffer<Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);
            var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None);

            var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight);

            for (var i = 0; i < slowRender; i++)
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
            for (var i = 0; i < camera.Frame * slowRender; i++)
                CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);

            var pixels = new Vector4[screenshotWidth * screenshotHeight];
            queue.ReadFromBuffer(computeBuffer, ref pixels, true, null);
            queue.Finish();

            computeBuffer.Dispose();
            queue.Dispose();

            var bmp = new Bitmap(screenshotWidth, screenshotHeight);
            var destBuffer = new int[screenshotWidth * screenshotHeight];
            for (var y = 0; y < screenshotHeight; y++)
            {
                for (var x = 0; x < screenshotWidth; x++)
                {
                    var pixel = pixels[x + y * screenshotWidth];
                    if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                    {
                        Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!");
                        continue;
                    }
                    destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
                }
            }
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length);
            bmp.UnlockBits(bmpData);

            return bmp;
        }
Example #42
0
        private void CalculateConvolution(ComputeContext computeContext)
        {
            Stopwatch stopwatch = new Stopwatch();
            stopwatch.Start();

            float dx;
            bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx);
            if (!shiftXParse)
                throw new SyntaxErrorException(", needs to be .");

            float dy;
            bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy);
            if (!shiftYParse)
                throw new SyntaxErrorException(", needs to be  .");

            float dz;
            bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz);
            if (!shiftZParse)
                throw new SyntaxErrorException(", needs to be  .");

            int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ;

            Console.WriteLine("Computing...");
            Console.WriteLine("Reading kernel...");

            String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;

            String kernelString;
            using (var sr = new StreamReader(kernelPath + "\\convolution.cl"))
                kernelString = sr.ReadToEnd();

            Console.WriteLine("Reading kernel... done");

            float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz);

            //create openCL program
            ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString);

            computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero);

            ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice);
            Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus);

            String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice);
            Console.WriteLine("buildLog");
            if (buildLog.Equals("\n"))
                Console.WriteLine("\tbuildLog is empty...");
            else
                Console.WriteLine("\t" + buildLog);

            float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename);

            /////////////////////////////////////////////
            // Create a Command Queue & Event List
            /////////////////////////////////////////////
            ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None);

            ////////////////////////////////////////////////////////////////
            // Create Buffers Transform
            ////////////////////////////////////////////////////////////////
            ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength);

            ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            ///////////////////////////////////////////////////////////
            ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores");

            /////////////////////////////////////////////
            // Set the transformFluorophoresKernel arguments
            /////////////////////////////////////////////
            transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords);
            transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformFluorophoresKernel = null;
            long[] globalWorkSizeTransformFluorophoresKernel = new long[]   { fluorophores.Length / 4 };
            long[] localWorkSizeTransformFluorophoresKernel = null;

            ////////////////////////////////////////////////////////
            // Enqueue the transformFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null);
            computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null);

            computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null);
            //            computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents);

            float[] transformedFluorophores = new float[fluorophores.Length];

            computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null);

            computeCommandQueue.Finish();

            //TODO remove, only for testing
            //            for (int i = 0; i < transformedFluorophores.Length; i++)
            //            {
            //                Console.WriteLine(transformedFluorophores[i]);
            //            }
            // /TODO remove, only for testing

            stopwatch.Stop();
            Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed);
            stopwatch.Reset();
            stopwatch.Start();
            // fluorophoresCoords are now transformed (done in place)

            ////////////////////////////////////////////////////////////////
            // Create Buffers Convolve Fluorophores
            ////////////////////////////////////////////////////////////////

            const int convolve_kernel_lwgs = 16;
            int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs;

            ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer);

            /////////////////////////////////////////////
            // Create the transformFluorophoresKernel
            /////////////////////////////////////////////
            ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores");

            /////////////////////////////////////////////
            // Set the convolveFluorophoresKernel arguments
            /////////////////////////////////////////////

            convolveFluorophoresKernel.SetMemoryArgument(0, resultImage);
            convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX);
            convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY);
            convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords);
            convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs);
            convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4);

            /////////////////////////////////////////////
            // Configure the work-item structure
            /////////////////////////////////////////////
            long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null;
            long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount };
            long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs};

            ////////////////////////////////////////////////////////
            // Enqueue the convolveFluorophoresKernel for execution
            ////////////////////////////////////////////////////////

            computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null);

            float[] resultImageData = new float[totalBuffer];
            computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null);

            computeCommandQueue.Finish();

            for (int i = 0; i < pixelCount; i++)
            {
                Console.WriteLine(resultImageData[i]);
            }

            Console.WriteLine("Writing data to file...");
            //            CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData);
            TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY);

            Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY);

            float max = resultImageData.Max();

            float scale = 255/(float)max;

            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }

            ushort[] ushortdata = new ushort[resultImageData.Length];

            for (int i = 0; i < resultImageData.Length; i++)
            {
                ushortdata[i] = (ushort)resultImageData[i];
            }

            uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16);

            byte[] bytes = new byte[convertGray16ToRgb.Length * 4];
            //
            //            int[] resultImageData2 = new int[resultImageData.Length];
            //
            for (int index = 0; index < convertGray16ToRgb.Length; index++)
            {
            //                resultImageData2[index] = (int)(scale*resultImageData[index]);

                byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]);
                bytes[index] = bytes1[0];
                bytes[4 * index + 1] = bytes1[1];
                bytes[4 * index + 2] = bytes1[2];
                bytes[4 * index + 3] = bytes1[3];
            }
            //
            //            for (int r = 0; r < _imageDimensionY; r++)
            //            {
            //                for (int c = 0; c < _imageDimensionX; c++)
            //                {
            //                    float value = resultImageData2[c*(r + 1)];
            //                    Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value));
            //                    bitmap.SetPixel(c,r, newColor);
            //                }
            //            }
            //            bitmap.Save("c:\\temp.bmp");

            using (MemoryStream ms = new MemoryStream(bytes))
            {
                Image image = Bitmap.FromStream(ms);
                image.Save("c:\\temp.bmp");
            }

            Console.WriteLine("Writing data to file... done");

            stopwatch.Stop();
            Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed);
            Console.WriteLine("Computing... done");
        }
Example #43
0
        public void Run(ComputeContext context, TextWriter log)
        {
            try
            {
                // Create the arrays and fill them with random data.
                int count = 10;
                float[] arrA = new float[count];
                float[] arrB = new float[count];
                float[] arrC = new float[count];

                Random rand = new Random();
                for (int i = 0; i < count; i++)
                {
                    arrA[i] = (float)(rand.NextDouble() * 100);
                    arrB[i] = (float)(rand.NextDouble() * 100);
                }

                // Create the input buffers and fill them with data from the arrays.
                // Access modifiers should match those in a kernel.
                // CopyHostPointer means the buffer should be filled with the data provided in the last argument.
                ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
                ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
                
                // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
                ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

                // Create and build the opencl program.
                program = new ComputeProgram(context, clProgramSource);
                program.Build(null, null, null, IntPtr.Zero);

                // Create the kernel function and set its arguments.
                ComputeKernel kernel = program.CreateKernel("VectorAdd");
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, b);
                kernel.SetMemoryArgument(2, c);

                // Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
                // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
                // For this reason their use should be avoided if possible.
                ComputeEventList eventList = new ComputeEventList();
                
                // Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
                ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

                // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
                // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
                commands.Execute(kernel, null, new long[] { count }, null, eventList);
                
                // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer 
                // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete 
                // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
                // eventList will contain two events after this method returns.
                commands.ReadFromBuffer(c, ref arrC, false, eventList);

                // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
                // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands 
                // to finish has to be issued before "arrC" can be used. 
                // This explicit synchronization can be achieved in two ways:

                // 1) Wait for the events in the list to finish,
                //eventList.Wait();

                // 2) Or simply use
                commands.Finish();

                // Print the results to a log/console.
                for (int i = 0; i < count; i++)
                    log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);

                // cleanup commands
                commands.Dispose();

                // cleanup events
                foreach (ComputeEventBase eventBase in eventList)
                {
                    eventBase.Dispose();
                }
                eventList.Clear();

                // cleanup kernel
                kernel.Dispose();

                // cleanup program
                program.Dispose();

                // cleanup buffers
                a.Dispose();
                b.Dispose();
                c.Dispose();
            }
            catch (Exception e)
            {
                log.WriteLine(e.ToString());
            }
        }
Example #44
0
        public static void CallOpenCL(int[,] libertyGroups, 
            int[,] groupNumbers, int x, int y, 
            int[] surroundingLibs, ref int emptySurroundings,
            int ourSgn, ref int duplicateGroups)
        {
            //Create arguments
            
            //Does not split yet
            //int[,] libertyGroups, 
            //int[,] groupNumbers, 
            //int x, 
            //int y, 
            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //int ourSgn, 
            
            //We have to map 2 dimension to 1 dimension            

            //Set arguments
            ComputeBuffer<int> libertyGroupsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(libertyGroups));
            openCLKernel.SetMemoryArgument(0, libertyGroupsIn);

            ComputeBuffer<int> groupNumbersIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(groupNumbers));
            openCLKernel.SetMemoryArgument(1, groupNumbersIn);

            openCLKernel.SetValueArgument<int>(2, x);
            openCLKernel.SetValueArgument<int>(3, y);
            
            ComputeBuffer<int> surroundingLibsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, surroundingLibs);
            openCLKernel.SetMemoryArgument(4, surroundingLibsIn);

            int[] emptySurroundRef = new int[1];
            ComputeBuffer<int> emptySurroundRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, emptySurroundRef);
            openCLKernel.SetMemoryArgument(5, emptySurroundRefIn);

            int[] duplicateGroupsRef = new int[1];
            ComputeBuffer<int> duplicateGroupsRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, duplicateGroupsRef);
            openCLKernel.SetMemoryArgument(6, duplicateGroupsRefIn);

            openCLKernel.SetValueArgument<int>(7, ourSgn);            

            //long localWorkSize = Math.Min(openCLDevice.MaxComputeUnits, sideSize);

            //Display input data

            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(openCLContext,
                openCLContext.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(openCLKernel, null,
                new long[] { 1 },
                new long[] { 1 }, null);

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            //int[] surroundingLibs, 
            //ref int emptySurroundings,            
            //ref int duplicateGroups,
            //Read output data
            commands.ReadFromBuffer(surroundingLibsIn, ref surroundingLibs, true, null);
            commands.ReadFromBuffer(emptySurroundRefIn, ref emptySurroundRef, true, null); emptySurroundings = emptySurroundRef[0];
            commands.ReadFromBuffer(duplicateGroupsRefIn, ref duplicateGroupsRef, true, null); duplicateGroups = duplicateGroupsRef[0];

            //We could set blocking to false on reads and then read them all back in then, we could (possiblity) gain some performance
            //by telling it that commands can be executed out of order and then by queuing them up and calling Finish
            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
         //   openCLProgram.Dispose();            
            //display output data
            
            //Test are done by our caller now

            Console.WriteLine(executionTime / 10000.0);
        }
Example #45
0
        private Bitmap Download(ComputeCommandQueue queue, ComputeBuffer<Vector4> buffer, int width, int height)
        {
            var pixels = new Vector4[width * height];
            queue.ReadFromBuffer(buffer, ref pixels, true, 0, 0, width * height, null);
            queue.Finish();

            var intPixels = Array.ConvertAll(pixels, pixel =>
            {
                pixel = Vector4.Clamp(pixel, new Vector4(0), new Vector4(1));
                return (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
            });
            var bmp = new Bitmap(width, height, PixelFormat.Format32bppRgb);
            var bmpData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            Marshal.Copy(intPixels, 0, bmpData.Scan0, intPixels.Length);
            bmp.UnlockBits(bmpData);
            return bmp;
        }
		public String SearchPassword (byte[] hash, HashType type, int maxLength, String[] keySpace)
		{
			if (type != HashType.MD5) {
				throw new NotImplementedException ("sums other than MD5 not supported");
			}

			if (maxLength > 6) {
				throw new NotImplementedException ("doesn't support longer passwords than 7");
			}

			var joinedKeySpace = new List<byte> ();

			foreach (var k in keySpace) {
				if (k.Length > 1) {
					throw new NotImplementedException ("doesn't support longer keyspaces than 1");
				}

				joinedKeySpace.AddRange (Encoding.ASCII.GetBytes (k));
			}
				
			byte[] resultData = new byte[20];
			byte[] keyspaceJoined = joinedKeySpace.ToArray ();

			var resultBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, resultData);
			var hashBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, hash);
			var keyspaceBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, keyspaceJoined);
			var passLenBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly, 1);
			var flagBuffer = new ComputeBuffer<int> (Context, ComputeMemoryFlags.None, 1);


			Kernel.SetMemoryArgument (0, hashBuffer);
			Kernel.SetMemoryArgument (1, keyspaceBuffer);
			Kernel.SetMemoryArgument (2, resultBuffer);
			Kernel.SetMemoryArgument (3, passLenBuffer);
			Kernel.SetMemoryArgument (4, flagBuffer);

			// execute kernel
			var queue = new ComputeCommandQueue (Context, Device, ComputeCommandQueueFlags.None);

			long firstDim = joinedKeySpace.Count;
			var globalWorksize = new long[] { firstDim, 57 * 57, 57 * 57 };

			queue.Execute (Kernel, new long[] { 0, 0, 0 }, globalWorksize, null, null);

			byte[] passLen = new byte[1];

			queue.ReadFromBuffer (resultBuffer, ref resultData, true, null);
			queue.ReadFromBuffer (passLenBuffer, ref passLen, true, null);

			String password = null;

			if (passLen [0] > 0) {
				logger.Info ("pass len {0}", passLen [0]);
				password = Encoding.ASCII.GetString (resultData, 0, passLen [0]);
				logger.Info ("Found password: \"{0}\"", password);
			} else {
				logger.Info ("Password not found.");
			}

			queue.Finish ();

			return password;
		}
Example #47
0
        protected override void RunInternal()
        {
            int count = 10;
            float[] arrA = new float[count];
            float[] arrB = new float[count];
            float[] arrC = new float[count];

            Random rand = new Random();

            for (int i = 0; i < count; i++)
            {
                arrA[i] = (float)(rand.NextDouble() * 100);
                arrB[i] = (float)(rand.NextDouble() * 100);
            }

            ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
            ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
            ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);

            ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });
            program.Build(null, null, null, IntPtr.Zero);

            ComputeKernel kernel = program.CreateKernel("VectorAdd");
            kernel.SetMemoryArgument(0, a);
            kernel.SetMemoryArgument(1, b);
            kernel.SetMemoryArgument(2, c);

            ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);

            ComputeEventList events = new ComputeEventList();

            commands.Execute(kernel, null, new long[] { count }, null, events);

            arrC = new float[count];
            GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);

            commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events);
            commands.Finish();

            arrCHandle.Free();

            for (int i = 0; i < count; i++)
                Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
        }
Example #48
0
        public static void Test()
        {
            string source = File.ReadAllText("MonteCarloSimulate.cl");

            //Choose Device
            ComputePlatform platform = ComputePlatform.Platforms[0];

            ComputeDevice device = platform.QueryDevices()[0];

            ComputeContextPropertyList properties =
                new ComputeContextPropertyList(platform);

            //Setup of stuff on our side
            ComputeContext context = new ComputeContext(ComputeDeviceTypes.All,
                properties, null, IntPtr.Zero);

            //Build the program, which gets us the kernel
            ComputeProgram program = new ComputeProgram(context, source);
            program.Build(null, null, null, IntPtr.Zero);
            //can use notify as the 3rd command... if you want this to be non-blocking

            ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate");


            //Create arguments
            int sideSize = 4096;
            int[] inMatrixA = new int[sideSize * sideSize];
            int[] inMatrixB = new int[sideSize * sideSize];
            int[] outMatrixC = new int[sideSize * sideSize];
            Random random = new Random((int)DateTime.Now.Ticks);

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                    for (int x = 0; x < sideSize; x++)
                    {
                        inMatrixA[y * sideSize + x] = random.Next(3);
                        inMatrixB[y * sideSize + x] = random.Next(3);
                        outMatrixC[y * sideSize + x] = 0;
                    }


            ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixA);

            ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, inMatrixB);

            ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
                ComputeMemoryFlags.UseHostPointer, outMatrixC);

            long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);


            //Sets arguments
            kernel.SetMemoryArgument(0, bufferMatrixA);
            kernel.SetMemoryArgument(1, bufferMatrixB);
            kernel.SetMemoryArgument(2, bufferMatrixC);
            kernel.SetLocalArgument(3, sideSize * 2);
            kernel.SetValueArgument<int>(4, sideSize);
            //kernel.SetLocalArgument(1, localWorkSize);            

            string offset = " ";
            for (int x = 0; x < sideSize; x++)
                offset += "  ";

            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    Console.Write(offset);
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixA[y * sideSize + x] + " ");
                    Console.WriteLine();
                }




            //Runs commands
            ComputeCommandQueue commands = new ComputeCommandQueue(context,
                context.Devices[0], ComputeCommandQueueFlags.None);

            long executionTime = DateTime.Now.Ticks;

            //Execute kernel
            //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
            commands.Execute(kernel, null,
                new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
                new long[] { localWorkSize, 1 }, null);

            //globalWorkSize can be any size
            //localWorkSize product much not be greater than device.MaxComputeUnits
            //and it must not be greater than kernel.GetWorkGroupSize()
            //ESSENTIALLY, the program iterates through globalWorkSize
            //in increments of localWorkSize. Both are multidimensional,
            //but this just saves us the time of doing that
            //(1 dimension can be put to multiple if the max dimension lengths
            //are known very easily with remainder).

            //Also, you should probably use this
            //kernel.GetPreferredWorkGroupSizeMultiple(device);

            commands.Finish();

            commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);

            commands.Finish();
            executionTime = DateTime.Now.Ticks - executionTime;


            GC.Collect();
            program.Dispose();

            Console.WriteLine();
            if (sideSize <= 32)
                for (int y = 0; y < sideSize; y++)
                {
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(inMatrixB[y * sideSize + x] + " ");
                    Console.Write(" ");
                    for (int x = 0; x < sideSize; x++)
                        Console.Write(outMatrixC[y * sideSize + x] + " ");

                    Console.WriteLine();
                }

            int testY = random.Next(sideSize);
            int testX = random.Next(sideSize);

            int sum = 0;
            for (int q = 0; q < sideSize; q++)
                sum += inMatrixA[q * sideSize + testX] *
                    inMatrixB[testY * sideSize + q];

            Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);

            Console.WriteLine(executionTime / 10000.0);

        }
Example #49
0
        public Bitmap Screenshot(int screenshotHeight, int slowRenderPower, Action<string> displayInformation)
        {
            displayInformation("Rendering screenshot");
            var ccontext = _kernel.ComputeContext;
            _kernelInUse++;

            var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
            Bitmap bmp;
            try
            {
                bmp = new Bitmap(screenshotWidth, screenshotHeight, PixelFormat.Format24bppRgb);
            }
            catch (ArgumentException)
            {
                MessageBox.Show("Image size too big", "Error");
                return null;
            }
            var nancount = 0;
            var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb);
            var scan0 = bmpData.Scan0.ToInt64();
            var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None);
            var localSize = _kernel.Threadsize(queue);
            for (var i = 0; i < localSize.Length; i++)
                localSize[i] *= slowRenderPower;
            var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, localSize[0] * localSize[1]);

            const int numFrames = 200;
            var frameDependantControls = _parameters as IFrameDependantControl;
            var framesToRender = frameDependantControls == null ? 1 : numFrames;

            var totalYs = (screenshotHeight + localSize[1] - 1) / localSize[1];
            var totalXs = (screenshotWidth + localSize[0] - 1) / localSize[0];
            var stopwatch = new Stopwatch();
            for (var y = 0; y < totalYs; y++)
            {
                for (var x = 0; x < totalXs; x++)
                {
                    stopwatch.Restart();
                    for (var frame = 0; frame < framesToRender; frame++)
                    {
                        if (frameDependantControls != null)
                            frameDependantControls.Frame = frame;
                        displayInformation(string.Format("Screenshot {0}% done", 100 * (y * totalXs * framesToRender + x * framesToRender + frame) / (totalXs * totalYs * framesToRender)));

                        _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight), slowRenderPower, new Size(x, y), (int)localSize[0]);
                    }

                    var pixels = new Vector4[localSize[0] * localSize[1]];
                    queue.ReadFromBuffer(computeBuffer, ref pixels, true, 0, 0, localSize[0] * localSize[1], null);
                    queue.Finish();

                    stopwatch.Stop();
                    var elapsed = stopwatch.Elapsed.TotalMilliseconds / framesToRender;
                    _kernel.AverageKernelTime = (elapsed + _kernel.AverageKernelTime * 4) / 5;

                    var blockWidth = Math.Min(localSize[0], screenshotWidth - x * localSize[0]);
                    var blockHeight = Math.Min(localSize[1], screenshotHeight - y * localSize[1]);
                    var intPixels = new byte[blockWidth * blockHeight * 3];
                    for (var py = 0; py < blockHeight; py++)
                    {
                        for (var px = 0; px < blockWidth; px++)
                        {
                            var pixel = pixels[py * localSize[1] + px];
                            if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
                                nancount++;
                            // BGR
                            if (float.IsNaN(pixel.Z) == false)
                                intPixels[(py * blockWidth + px) * 3 + 0] = (byte)(pixel.Z * 255);
                            if (float.IsNaN(pixel.Y) == false)
                                intPixels[(py * blockWidth + px) * 3 + 1] = (byte)(pixel.Y * 255);
                            if (float.IsNaN(pixel.X) == false)
                                intPixels[(py * blockWidth + px) * 3 + 2] = (byte)(pixel.X * 255);
                        }
                    }
                    for (var line = 0; line < blockHeight; line++)
                        Marshal.Copy(intPixels, line * (int)blockWidth * 3, new IntPtr(scan0 + ((y * localSize[1] + line) * bmpData.Stride) + x * localSize[0] * 3), (int)blockWidth * 3);
                }
            }

            bmp.UnlockBits(bmpData);
            if (nancount != 0)
                MessageBox.Show(string.Format("Caught {0} NAN pixels while taking screenshot", nancount), "Warning");

            _kernelInUse--;
            return bmp;
        }
        public TerrainGen()
        {
            #if CPU_DEBUG
            var platform = ComputePlatform.Platforms[1];
            #else
            var platform = ComputePlatform.Platforms[0];
            #endif
            _devices = new List<ComputeDevice>();
            _devices.Add(platform.Devices[0]);
            _properties = new ComputeContextPropertyList(platform);
            _context = new ComputeContext(_devices, _properties, null, IntPtr.Zero);
            _cmdQueue = new ComputeCommandQueue(_context, _devices[0], ComputeCommandQueueFlags.None);

            #region setup generator kernel
            bool loadFromSource = Gbl.HasRawHashChanged[Gbl.RawDir.Scripts];
            loadFromSource = true;
            _chunkWidthInBlocks = Gbl.LoadContent<int>("TGen_ChunkWidthInBlocks");
            _chunkWidthInVerts = _chunkWidthInBlocks + 1;
            _blockWidth = Gbl.LoadContent<int>("TGen_BlockWidthInMeters");
            float lacunarity = Gbl.LoadContent<float>("TGen_Lacunarity");
            float gain = Gbl.LoadContent<float>("TGen_Gain");
            int octaves = Gbl.LoadContent<int>("TGen_Octaves");
            float offset = Gbl.LoadContent<float>("TGen_Offset");
            float hScale = Gbl.LoadContent<float>("TGen_HScale");
            float vScale = Gbl.LoadContent<float>("TGen_VScale");

            _genConstants = new ComputeBuffer<float>(_context, ComputeMemoryFlags.ReadOnly, 8);
            var genArr = new[]{
                lacunarity,
                gain,
                offset,
                octaves,
                hScale,
                vScale,
                _blockWidth,
                _chunkWidthInBlocks
            };

            _cmdQueue.WriteToBuffer(genArr, _genConstants, false, null);
            if (loadFromSource){
                _generationPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_Generator"));
            #if CPU_DEBUG
                _generationPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\GenTerrain.cl", null, IntPtr.Zero); //use option -I + scriptDir for header search
            #else
                _generationPrgm.Build(null, "", null, IntPtr.Zero);//use option -I + scriptDir for header search
            #endif
                Gbl.SaveBinary(_generationPrgm.Binaries, "TGen_Generator");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_Generator");
                _generationPrgm = new ComputeProgram(_context, binary, _devices);
                _generationPrgm.Build(null, "", null, IntPtr.Zero);
            }
            //loadFromSource = false;

            _terrainGenKernel = _generationPrgm.CreateKernel("GenTerrain");
            _normalGenKernel = _generationPrgm.CreateKernel("GenNormals");

            //despite the script using float3 for these fields, we need to consider it to be float4 because the
            //implementation is basically a float4 wrapper that uses zero for the last variable
            _geometry = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _normals = new ComputeBuffer<ushort>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts * _chunkWidthInVerts * 4);
            _binormals = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _tangents = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4);
            _uvCoords = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*2);

            _terrainGenKernel.SetMemoryArgument(0, _genConstants);
            _terrainGenKernel.SetMemoryArgument(3, _geometry);
            _terrainGenKernel.SetMemoryArgument(4, _uvCoords);

            _normalGenKernel.SetMemoryArgument(0, _genConstants);
            _normalGenKernel.SetMemoryArgument(3, _geometry);
            _normalGenKernel.SetMemoryArgument(4, _normals);
            _normalGenKernel.SetMemoryArgument(5, _binormals);
            _normalGenKernel.SetMemoryArgument(6, _tangents);

            #endregion

            #region setup quadtree kernel

            if (loadFromSource){
                _qTreePrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_QTree"));
            #if CPU_DEBUG
                _qTreePrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\Quadtree.cl", null, IntPtr.Zero);
            #else
                _qTreePrgm.Build(null, "", null, IntPtr.Zero);
            #endif
                Gbl.SaveBinary(_qTreePrgm.Binaries, "TGen_QTree");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_QTree");
                _qTreePrgm = new ComputeProgram(_context, binary, _devices);
                _qTreePrgm.Build(null, "", null, IntPtr.Zero);
            }

            _qTreeKernel = _qTreePrgm.CreateKernel("QuadTree");
            _crossCullKernel = _qTreePrgm.CreateKernel("CrossCull");

            _activeVerts = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts);

            _dummy = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, 50);
            var rawNormals = new ushort[_chunkWidthInVerts * _chunkWidthInVerts * 4];
            _emptyVerts = new byte[_chunkWidthInVerts*_chunkWidthInVerts];
            for (int i = 0; i < _emptyVerts.Length; i++){
                _emptyVerts[i] = 1;
            }
            _cmdQueue.WriteToBuffer(rawNormals, _normals, true, null);
            _cmdQueue.WriteToBuffer(_emptyVerts, _activeVerts, true, null);

            _qTreeKernel.SetValueArgument(1, _chunkWidthInBlocks);
            _qTreeKernel.SetMemoryArgument(2, _normals);
            _qTreeKernel.SetMemoryArgument(3, _activeVerts);
            _qTreeKernel.SetMemoryArgument(4, _dummy);

            _crossCullKernel.SetValueArgument(1, _chunkWidthInBlocks);
            _crossCullKernel.SetMemoryArgument(2, _normals);
            _crossCullKernel.SetMemoryArgument(3, _activeVerts);
            _crossCullKernel.SetMemoryArgument(4, _dummy);

            #endregion

            #region setup winding kernel

            if (loadFromSource){
                _winderPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_VertexWinder"));
            #if CPU_DEBUG
                _winderPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\VertexWinder.cl", null, IntPtr.Zero);
            #else
                _winderPrgm.Build(null, "", null, IntPtr.Zero);
            #endif
                Gbl.SaveBinary(_winderPrgm.Binaries, "TGen_VertexWinder");
            }
            else{
                var binary = Gbl.LoadBinary("TGen_VertexWinder");
                _winderPrgm = new ComputeProgram(_context, binary, _devices);
                _winderPrgm.Build(null, "", null, IntPtr.Zero);
            }

            _winderKernel = _winderPrgm.CreateKernel("VertexWinder");
            _indicies = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8);

            _winderKernel.SetMemoryArgument(0, _activeVerts);
            _winderKernel.SetMemoryArgument(1, _indicies);

            _emptyIndices = new int[(_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8];
            for (int i = 0; i < (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8; i++){
                _emptyIndices[i] = 0;
            }
            _cmdQueue.WriteToBuffer(_emptyIndices, _indicies, true, null);

            #endregion

            if (loadFromSource){
                Gbl.AllowMD5Refresh[Gbl.RawDir.Scripts] = true;
            }

            _cmdQueue.Finish();
        }
Example #51
0
        static void Main(string[] args)
        {
            #region
            const string programName = "Prime Number";

            Stopwatch stopWatch = new Stopwatch();

            string clProgramSource = KernelProgram();

            Console.WriteLine("Environment OS:");
            Console.WriteLine("-----------------------------------------");
            Console.WriteLine(Environment.OSVersion);
            #endregion
            if (ComputePlatform.Platforms.Count == 0)
            {
                Console.WriteLine("No OpenCL Platforms are availble!");
            }
            else
            {
                #region 1
                // step 1 choose the first available platform
                ComputePlatform platform = ComputePlatform.Platforms[0];

                // output the basic info
                BasicInfo(platform);

                Console.WriteLine("Program: " + programName);
                Console.WriteLine("-----------------------------------------");
                #endregion
                //Cpu 10 seconds Gpu 28 seconds
                int count = 64;

                int[] output_Z = new int[count * count * count];

                int[] input_X = new int[count * count * count];

                for (int x = 0; x < count * count * count; x++)
                {
                    input_X[x] = x;
                }
                #region 2
                // step 2 create context for that platform and all devices
                ComputeContextPropertyList properties = new ComputeContextPropertyList(platform);
                ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero);

                // step 3 create and build program
                ComputeProgram program = new ComputeProgram(context, clProgramSource);
                program.Build(platform.Devices, null, null, IntPtr.Zero);
                #endregion
                // step 4 create memory objects
                ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X);
                ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length);

                // step 5 create kernel object with same kernel programe name VectorAdd
                ComputeKernel kernel = program.CreateKernel("PrimeNumber");

                // step 6 set kernel arguments
                //kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(0, a);
                kernel.SetMemoryArgument(1, z);

                ComputeEventList eventList = new ComputeEventList();

                //for (int j = 0; j < context.Devices.Count; j++)
                // query available devices n,...,1,0.  cpu first then gpu
                for (int j = context.Devices.Count-1; j > -1; j--)
                {
                    #region 3
                    stopWatch.Start();

                    // step 7 create command queue on that context on that device
                    ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None);

                    // step 8 run the kernel program
                    commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList);
                    //Application.DoEvents();

                    #endregion
                    // step 9 read results
                    commands.ReadFromBuffer(z, ref output_Z, false, eventList);
                    #region 4
                    commands.Finish();

                    string fileName = "C:\\primenumber\\PrimeNumberGPU.txt";
                    StreamWriter file = new StreamWriter(fileName, true);

                    FileInfo info = new FileInfo(fileName);
                    long fs = info.Length;

                    // 1 MegaByte = 1.049e+6 Byte
                    int index = 1;
                    if (fs == 1.049e+6)
                    {
                        fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt";
                        file = new System.IO.StreamWriter(fileName, true);
                        index++;
                    }
                    #endregion

                    for (uint xx = 0; xx < count * count * count; xx++)
                    {
                        if (output_Z[xx] != 0 && output_Z[xx] != 1)
                        {
                            Console.WriteLine(output_Z[xx]);
                            file.Write(output_Z[xx]);
                            file.Write("x");
                        }
                    }
                    #region 5
                    file.Close();
                    stopWatch.Stop();

                    ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started;
                    ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended;
                    double time = 10e-9 * (end - start);
                    //Console.WriteLine("Nanosecond: " + time);

                    TimeSpan ts = stopWatch.Elapsed;
                    string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds);
                    Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime);

                    Console.WriteLine("-----------------------------------------");
                    #endregion
                }
                Console.ReadLine();
            }
        }