Exemple #1
0
        public virtual double[] GetInputGradients()
        {
            int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize;

            double[] inputGradients = new double[inputArraySize];

            // Copy device buffer to host
            float[] tmpInputGradients = new float[inputArraySize];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       inputNeurons.DeltaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputArraySize),
                                                       tmpInputGradients,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into public fields
            for (int i = 0; i < inputArraySize; ++i)
            {
                inputGradients[i] = (double)tmpInputGradients[i];
            }

            return(inputGradients);
        }
Exemple #2
0
        public Event EnqueueReadBuffer(Buffer buffer, bool blocking, ulong offset, ulong count, IntPtr destination, Event[] events)
        {
            ClHelper.ThrowNullException(Handle);

            if (buffer == Buffer.Null)
            {
                throw new ArgumentNullException("buffer");
            }
            if (destination == IntPtr.Zero)
            {
                throw new ArgumentNullException("destination");
            }

            unsafe
            {
                int     num_events_in_wait_list = events == null ? 0 : events.Length;
                IntPtr *wait_list = stackalloc IntPtr[num_events_in_wait_list];
                for (int i = 0; i < num_events_in_wait_list; ++i)
                {
                    wait_list[i] = events[i].Handle;
                }
                if (events == null)
                {
                    wait_list = null;
                }

                IntPtr event_ptr = IntPtr.Zero;

                ClHelper.GetError(Cl.EnqueueReadBuffer(Handle, buffer.Handle,
                                                       blocking ? 1u : 0u, new UIntPtr(offset), new UIntPtr(count), destination.ToPointer(),
                                                       (uint)num_events_in_wait_list, wait_list, &event_ptr));

                return(new Event(event_ptr));
            }
        }
Exemple #3
0
        public override void CopyBuffersToHost()
        {
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       gammaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputDepth),
                                                       gammaHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer gammaGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       betaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputDepth),
                                                       betaHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer betaGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Speeds are not saved.
        }
Exemple #4
0
        public void ExternalLoopBody(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ExternalLoopBody", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            int[] hres = { 0, 1, 2, 3, 4, 5 };

            // allocate device vectors
            Cl.Mem dres = Cl.CreateBuffer(context, Cl.MemFlags.ReadWrite | Cl.MemFlags.CopyHostPtr,
                                          (IntPtr)(sizeof(int) * hres.Length), hres, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dres));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hres.Length));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hres.Length), hres, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { 1, 4, 3, 6, 5, 8 }, hres);
        }
        // images cannot be read_write... so let's continue using plain buffers
        // should implement this in a way that allows imgfAccu to be loaded only once
        // should test for image size consistency
        public void Accumulate(FloatMap imgfAccu, FloatMap imgfSrc, float k)
        {
            var kernel = _kernels["accumulate"];

            // Creation of on-device memory objects
            IMem <float> accuMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.ReadWrite, imgfAccu.Size, out err); // why MemFlags.CopyHostPtr doesn't work here (and forces me to manual copy) ???

            assert(err, "accu buf creation");

            IMem <float> srcMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.WriteOnly, imgfSrc.Size, out err);

            assert(err, "src buf creation");

            // Set memory objects as parameters to kernel
            err = Cl.SetKernelArg(kernel, 0, intPtrSize, accuMapBuffer);
            assert(err, "accu map setKernelArg");

            err = Cl.SetKernelArg(kernel, 1, intPtrSize, srcMapBuffer);
            assert(err, "src map setKernelArg");

            err = Cl.SetKernelArg(kernel, 2, intSize, imgfAccu.Stride);
            assert(err, "in stride setKernelArg");

            err = Cl.SetKernelArg(kernel, 3, intSize, imgfSrc.Stride);
            assert(err, "out stride setKernelArg");

            err = Cl.SetKernelArg(kernel, 4, floatSize, k);
            assert(err, "out stride setKernelArg");

            // write actual data into memory object
            Event clevent;

            err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, 0, imgfAccu.Size, imgfAccu._buf, 0, null, out clevent);
            clevent.Dispose();
            assert(err, "write accu buffer");

            err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, srcMapBuffer, Bool.True, 0, imgfSrc.Size, imgfSrc._buf, 0, null, out clevent);
            clevent.Dispose();
            assert(err, "write src buffer");

            // execute
            err = Cl.EnqueueNDRangeKernel(_commandsQueue, kernel, 2,
                                          new[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 },                           // offset
                                          new[] { new IntPtr(imgfAccu.W), new IntPtr(imgfAccu.H), (IntPtr)1 }, // range
                                          null, 0, null, out clevent);
            clevent.Dispose();
            assert(err, "Cl.EnqueueNDRangeKernel");

            // sync
            Cl.Finish(_commandsQueue);

            // read from output memory object into actual buffer
            err = Cl.EnqueueReadBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, imgfAccu._buf, 0, null, out clevent);
            clevent.Dispose();
            assert(err, "read output buffer");

            Cl.ReleaseMemObject(srcMapBuffer);
            Cl.ReleaseMemObject(accuMapBuffer); // maybe i could return this without disposing; would affect non-OpenCl implementation
        }
Exemple #6
0
        public static long CrackHigh(int[] sequence, long low)
        {
            if (!crackHighProgram.HasValue || !crackHighKernel.HasValue || !initialized || sequence.Length != 16)
            {
                return(-1);
            }

            ErrorCode error;

            IMem <int> sequence_dev = Cl.CreateBuffer <int>(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, sequence, out error);

            ErrorCheck(error, "CrackHigh(): Cl.CreateBuffer");

            long[]      seeds    = new long[1];
            IMem <long> seed_dev = Cl.CreateBuffer <long>(context, MemFlags.CopyHostPtr | MemFlags.WriteOnly, seeds, out error);

            ErrorCheck(error, "InitializeParameters(): Cl.CreateBuffer");

            error = Cl.SetKernelArg(crackHighKernel.Value, 0, sequence_dev);
            ErrorCheck(error, "Cl.SetKernelArg");
            error = Cl.SetKernelArg(crackHighKernel.Value, 1, seed_dev);
            ErrorCheck(error, "Cl.SetKernelArg");
            error = Cl.SetKernelArg(crackHighKernel.Value, 2, (IntPtr)sizeof(long), low);
            ErrorCheck(error, "Cl.SetKernelArg");

            CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, 0, out error);

            ErrorCheck(error, "Cl.CreateCommandQueue");

            int maxGroupWorkSize = Cl.GetKernelWorkGroupInfo(crackHighKernel.Value, device, KernelWorkGroupInfo.WorkGroupSize, out error).CastTo <int>();

            ErrorCheck(error, "Cl.GetKernelWorkGroupInfo");

            Event e;
            int   threads = maxGroupWorkSize * 12;

            IntPtr[] workSize = new IntPtr[] { (IntPtr)threads };
            error = Cl.EnqueueNDRangeKernel(cmdQueue, crackHighKernel.Value, 1, null, workSize, null, 0, null, out e);
            ErrorCheck(error, "Cl.EnqueueNDRangeKernel");

            error = Cl.Finish(cmdQueue);
            ErrorCheck(error, "Cl.Finish");

            long[] seed_host = new long[1];
            error = Cl.EnqueueReadBuffer(cmdQueue, seed_dev, Bool.True, (IntPtr)0, (IntPtr)(sizeof(long) * 1), seed_host, 0, null, out e);
            ErrorCheck(error, "CL.EnqueueReadBuffer");

            //Dispose your shit
            error = Cl.ReleaseCommandQueue(cmdQueue);
            ErrorCheck(error, "CL.ReleaseCommandQueue");

            error = Cl.ReleaseMemObject(sequence_dev);
            ErrorCheck(error, "CL.ReleaseMemObject");

            error = Cl.ReleaseMemObject(seed_dev);
            ErrorCheck(error, "CL.ReleaseMemObject");
            return(seed_host[0]);
        }
        /// <summary>
        /// Run network backwards, propagating the gradient backwards and also updating parameters.
        /// Requires that gradient has ALREADY BEEN WRITTEN in network.Layers[nLayers-1].InputNeurons.Delta
        /// </summary>
        public void BackwardPass(double learningRate, double momentumMultiplier, double weightDecayCoeff, double weightMaxNorm)
        {
            for (int l = nLayers - 2; l > 0; l--) // propagate error signal backwards (layers L-2 to 1, i.e. second last to second)
            {
                // 1. Update layer's parameters' change speed using gradient
                layers[l].UpdateSpeeds(learningRate, momentumMultiplier, weightDecayCoeff);

                // 2. Backpropagate errors to previous layer (no need to do it for layer 1)
                if (l > 1)
                {
                    layers[l].BackPropagate();
                }


#if DEBUGGING_STEPBYSTEP
                /* ------------------------- DEBUGGING --------------------------------------------- */

                // Display input delta  layer-by-layer

                int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize;
#if OPENCL_ENABLED
                float[] deltaInputAll = new float[layers[l].InputNeurons.NumberOfUnits * miniBatchSize];
                OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                           layers[l].InputNeurons.DeltaGPU,  // source
                                                           Bool.True,
                                                           (IntPtr)0,
                                                           (IntPtr)(layers[l].InputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)),
                                                           deltaInputAll,   // destination
                                                           0,
                                                           null,
                                                           out OpenCLSpace.ClEvent);
                OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer deltaInputAll");
#endif
                Console.WriteLine("\nLayer {0} ({1}) backpropagated delta:", l, layers[l].Type);
                for (int m = 0; m < miniBatchSize; m++)
                {
                    float[] deltaInput = new float[layers[l].InputNeurons.NumberOfUnits];
                    Array.Copy(deltaInputAll, m * layers[l].InputNeurons.NumberOfUnits, deltaInput, 0, layers[l].InputNeurons.NumberOfUnits);

                    Console.WriteLine("\n --- Mini-batch item {0} -----", m);
                    for (int j = 0; j < deltaInput.Length; j++)
                    {
                        Console.Write("{0}  ", deltaInput[j]);
                    }
                    Console.WriteLine();
                    Console.ReadKey();
                }

                /* ------------------------- END DEBUGGING --------------------------------------------- */
#endif

                // 3. Update layer's parameters
                layers[l].UpdateParameters(weightMaxNorm);
            }
        }
Exemple #8
0
        public override double[] GetParameterGradients()
        {
            int nParameters = nInputUnits * nOutputUnits + nOutputUnits;

            double[] parameterGradients = new double[nParameters];

            // Copy weights and biases gradients buffers to host
            float[] tmpWeightsGrad = new float[nInputUnits * nOutputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       weightsGradientsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits),
                                                       tmpWeightsGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            float[] tmpBiasesGrad = new float[nOutputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       biasesGradientsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nOutputUnits),
                                                       tmpBiasesGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into parameterGradients
            //Console.WriteLine("Weight gradients:\n");
            for (int i = 0; i < nInputUnits * nOutputUnits; ++i)
            {
                parameterGradients[i] = (double)tmpWeightsGrad[i];
                //Console.Write(" {0}", tmpWeightsGrad[i]);
            }
            //Console.ReadKey();
            for (int i = 0; i < nOutputUnits; ++i)
            {
                parameterGradients[nInputUnits * nOutputUnits + i] = (double)tmpBiasesGrad[i];
            }

            return(parameterGradients);
        }
Exemple #9
0
        public void ReadOutBuffer <T>(int argIndex, T *localBuffer) where T : unmanaged
        {
            fixed(IntPtr *memObjPtr = _memObj)
            {
                ErrorCode err;

                if ((err = Cl.EnqueueReadBuffer(_commandQueue, *(memObjPtr + argIndex), true.ToInt(), 0, *(_memObjSize + argIndex), localBuffer, 0, null, out Event @event)) == ErrorCode.Success)
                {
                    @event.WaitForComplete();
                }
            }
        }
        public float CalculateVar(out double totalMs, float cf = .9f, int iterations = 10)
        {
            using (var annualizedReturnsKernel = _env.Context.CompileKernelFromSource(_programSource, "annualizedReturns"))
                using (var aggregateReturnsKernel = _env.Context.CompileKernelFromSource(_programSource, "aggregateReturnsKernel"))
                {
                    Event clStart, clEnd;
                    var   queue = _env.CommandQueues[0];

                    var xSize = (_stockData.QuotesPerStock - _holding);
                    var ySize = _stockData.StocksCount;

                    var err = Cl.SetKernelArg(annualizedReturnsKernel, 0, _d_output);
                    err = Cl.SetKernelArg(annualizedReturnsKernel, 1, _d_stocksAndPrices);
                    err = Cl.SetKernelArg(annualizedReturnsKernel, 2, _d_portfolioStockMv);
                    err = Cl.SetKernelArg(annualizedReturnsKernel, 3, _ann);
                    err = Cl.SetKernelArg(annualizedReturnsKernel, 4, _holding);

                    err = Cl.SetKernelArg(aggregateReturnsKernel, 0, _d_output);
                    err = Cl.SetKernelArg(aggregateReturnsKernel, 1, ySize);

                    Cl.EnqueueMarker(queue, out clStart);
                    Cl.Finish(queue);

                    for (int i = 0; i < iterations; i++)
                    {
                        err = Cl.EnqueueNDRangeKernel(queue, annualizedReturnsKernel, 2, null,
                                                      new[] { (IntPtr)xSize, (IntPtr)ySize },
                                                      new[] { (IntPtr)256, (IntPtr)1 },
                                                      0, null, out Event notInterestedInThisEvent1);

                        err = Cl.EnqueueNDRangeKernel(queue, aggregateReturnsKernel, 2, null,
                                                      new[] { (IntPtr)xSize, (IntPtr)1 },
                                                      new[] { (IntPtr)256, (IntPtr)1 },
                                                      0, null, out notInterestedInThisEvent1);
                    }

                    Cl.EnqueueMarker(queue, out clEnd);
                    Cl.Finish(queue);

                    var startInfo = Cl.GetEventProfilingInfo(clStart, ProfilingInfo.Start, out err);
                    var endInfo   = Cl.GetEventProfilingInfo(clEnd, ProfilingInfo.End, out err);

                    Cl.EnqueueReadBuffer <float>(queue, _d_output, Bool.True, _h_output, 0, null, out Event notInterestedInThisEvent2);

                    totalMs = (endInfo.CastTo <ulong>() - startInfo.CastTo <ulong>()) * 10e-6;

                    clStart.Dispose();
                    clEnd.Dispose();
                    annualizedReturnsKernel.Dispose();

                    return(_h_output[0] + _h_output[xSize - 1]);
                }
        }
        public static void ReadFromBuffer <T>(this CommandQueue commandQueue, IMem buffer, T[] array, int offset = 0, long length = -1, params Event[] waitFor)
            where T : struct
        {
            Event e;
            var   elemSize = TypeSize <T> .SizeInt;

            Cl.EnqueueReadBuffer(commandQueue, buffer, Bool.True, (IntPtr)(offset * elemSize), (IntPtr)((length == -1 ? array.Length : length) * elemSize), array,
                                 (uint)waitFor.Length, waitFor.Length == 0 ? null : waitFor, out e)
            .Check();

            e.Dispose();
        }
Exemple #12
0
        public void ArrayCompare(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayCompare", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            bool[] res = { true, false, true, false };

            // allocate device vectors
            Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error);
            clSafeCall(error);
            Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error);
            clSafeCall(error);
            Cl.Mem dp3 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(bool) * res.Length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dp1));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dp2));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dp3));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { false, true, false, true }, res);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dummy));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dummy));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { true, false, true, false }, res);
        }
Exemple #13
0
 public void EnqueueReadBuffer <T>(IMem <T> buffer, int offset, int length, T[] data)
     where T : struct
 {
     Cl.EnqueueReadBuffer(
         CommandQueue,
         buffer,
         Bool.True,
         offset,
         length,
         data,
         0,    //_awaitEvent == null ? 0 : 1,
         null, //_awaitEvent == null ? new Event[0] : new Event[]{ _awaitEvent.Value },
         out var awaitEvent
         );
     //_awaitEvent = awaitEvent;
 }
        public override double[] GetParameterGradients()
        {
            double[] parameterGradients = new double[2 * nInputUnits];

            // Copy gamma and beta gradients buffers to host
            float[] tmpGammaGrad = new float[nInputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       deltaGammaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits),
                                                       tmpGammaGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            float[] tmpBetaGrad = new float[nInputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       deltaBetaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits),
                                                       tmpBetaGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into public fields
            for (int i = 0; i < nInputUnits; ++i)
            {
                parameterGradients[i] = (double)tmpGammaGrad[i];
                parameterGradients[nInputUnits + i] = (double)tmpBetaGrad[i];
            }

            return(parameterGradients);
        }
Exemple #15
0
        public T[] ReadBuffer <T>(IMem <T> buffer, int offset, int length)
            where T : struct
        {
            var data = new T[length];

            Cl.EnqueueReadBuffer(
                CommandQueue,
                buffer,
                Bool.True,
                offset,
                length,
                data,
                0,    //_awaitEvent == null ? 0 : 1,
                null, //_awaitEvent == null ? new Event[0] : new Event[]{ _awaitEvent.Value },
                out var awaitEvent
                );
            //s_awaitEvent = awaitEvent;
            return(data);
        }
        public override void Execute(object sender)
        {
            var commandQueue = sender as CommandQueue;

            var waitList = from name in WaitList
                           let ev = CommandQueue.FindEvent(name)
                                    where ev != null
                                    select ev.Value;

            Event     eventID;
            ErrorCode error;
            var       mem         = (Buffer == null) ? Mem : Buffer.Mem;
            var       elementSize = (Buffer == null) ? ElementSize : Buffer.ElementSize;

            if (Data == null)
            {
                error = Cl.EnqueueReadBuffer(commandQueue.Queue, mem,
                                             Blocking ? Bool.True : Bool.False, (IntPtr)Offset,
                                             (IntPtr)(Count * elementSize), DataPtr, (uint)waitList.Count(), waitList.Count() == 0 ? null : waitList.ToArray(), out eventID);
            }
            else
            {
                error = Cl.EnqueueReadBuffer(commandQueue.Queue, mem,
                                             Blocking ? Bool.True : Bool.False, (IntPtr)Offset,
                                             (IntPtr)(Count * elementSize), Data, (uint)waitList.Count(), waitList.Count() == 0 ? null : waitList.ToArray(), out eventID);
            }

            if (error != ErrorCode.Success)
            {
                throw new Cl.Exception(error);
            }

            if (Name == string.Empty)
            {
                eventID.Dispose();
            }
            else
            {
                CommandQueue.AddEvent(Name, eventID);
            }
        }
Exemple #17
0
        public void ArrayRefOut(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayRefOut", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            int[] hp1 = { 1 };
            int[] hp2 = { 2 };

            // allocate device vectors
            Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                         (IntPtr)(sizeof(int) * hp1.Length), hp1, out error);
            clSafeCall(error);
            Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                         (IntPtr)(sizeof(int) * hp2.Length), hp2, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dp1));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dp2));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dummy));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp1, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hp1.Length), hp1, 0, null, out clevent));
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp2, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hp1.Length), hp2, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(5, hp1[0]);
            Assert.AreEqual(4, hp2[0]);
        }
Exemple #18
0
        public override void CopyBuffersToHost()
        {
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       weightsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits),
                                                       weightsHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       biasesGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nOutputUnits),
                                                       biasesHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer biasesGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");


            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Speeds are not saved.
        }
Exemple #19
0
        public float[][] calc_grad_rho_c(SimpleImage I0, SimpleImage I1d, FlowArray Flow)

        {
            float[][] arrays = new float[4][];

            arrays[0] = new float[I0.ImageHeight * I0.ImageWidth];
            arrays[1] = new float[I0.ImageHeight * I0.ImageWidth];
            arrays[2] = new float[I0.ImageHeight * I0.ImageWidth];
            arrays[3] = new float[I0.ImageHeight * I0.ImageWidth];


            Mem leftImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)0, I0.ByteArray, out error);

            Mem rightImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I1d.ImageWidth, (IntPtr)I1d.ImageHeight, (IntPtr)0, I1d.ByteArray, out error);

            IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error);

            IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error);

            IMem <float> gradXBuf  = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error);
            IMem <float> gradYBuf  = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error);
            IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error);
            IMem <float> rho_cBuf  = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error);

            Kernel _Kernel = Cl.CreateKernel(program, "gradRho", out error);

            error |= Cl.SetKernelArg(_Kernel, 0, leftImageMemObject);
            error |= Cl.SetKernelArg(_Kernel, 1, rightImageMemObject);
            error |= Cl.SetKernelArg <float>(_Kernel, 2, uInputFlowMemObject);
            error |= Cl.SetKernelArg <float>(_Kernel, 3, vInputFlowMemObject);
            error |= Cl.SetKernelArg <float>(_Kernel, 4, gradXBuf);
            error |= Cl.SetKernelArg <float>(_Kernel, 5, gradYBuf);
            error |= Cl.SetKernelArg <float>(_Kernel, 6, grad_2Buf);
            error |= Cl.SetKernelArg <float>(_Kernel, 7, rho_cBuf);
            error |= Cl.SetKernelArg(_Kernel, 8, I0.ImageWidth);
            error |= Cl.SetKernelArg(_Kernel, 9, I0.ImageHeight);

            Event _event;

            IntPtr[] originPtr        = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 };
            IntPtr[] regionPtr        = new IntPtr[] { (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)1 };
            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(Flow.Height * Flow.Width) };


            error = Cl.EnqueueWriteImage(commandQueue, leftImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I0.ByteArray, 0, null, out _event);

            error = Cl.EnqueueWriteImage(commandQueue, rightImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I1d.ByteArray, 0, null, out _event);



            error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event);

            error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, gradXBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[0], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, gradYBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[1], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, grad_2Buf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[2], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, rho_cBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[3], 0, null, out _event);
            error = Cl.Finish(commandQueue);


            Cl.ReleaseMemObject(uInputFlowMemObject);
            Cl.ReleaseMemObject(vInputFlowMemObject);
            Cl.ReleaseMemObject(leftImageMemObject);
            Cl.ReleaseMemObject(rightImageMemObject);
            Cl.ReleaseMemObject(gradXBuf);
            Cl.ReleaseMemObject(gradYBuf);
            Cl.ReleaseMemObject(grad_2Buf);
            Cl.ReleaseMemObject(rho_cBuf);
            Cl.ReleaseKernel(_Kernel);


            return(arrays);
        }
Exemple #20
0
        public FlowArray[] calc_P_field(FlowArray Flow, FlowArray P1, FlowArray P2)
        {
            ErrorCode error;

            FlowArray outputFlow1 = new FlowArray();

            outputFlow1.Array    = new float[2][];
            outputFlow1.Width    = P1.Width;
            outputFlow1.Height   = P1.Height;
            outputFlow1.Array[0] = new float[outputFlow1.Width * outputFlow1.Height];
            outputFlow1.Array[1] = new float[outputFlow1.Width * outputFlow1.Height];

            FlowArray outputFlow2 = new FlowArray();

            outputFlow2.Array    = new float[2][];
            outputFlow2.Width    = P2.Width;
            outputFlow2.Height   = P2.Height;
            outputFlow2.Array[0] = new float[outputFlow2.Width * outputFlow2.Height];
            outputFlow2.Array[1] = new float[outputFlow2.Width * outputFlow2.Height];


            IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error);

            IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error);

            IMem <float> P11_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error);

            IMem <float> P12_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error);

            IMem <float> P21_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error);

            IMem <float> P22_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error);



            IMem <float> P11_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error);

            IMem <float> P12_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error);

            IMem <float> P21_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error);

            IMem <float> P22_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error);


            Kernel _Kernel = Cl.CreateKernel(program, "calcP", out error);

            error |= Cl.SetKernelArg <float>(_Kernel, 0, uInputFlowMemObject);
            error |= Cl.SetKernelArg <float>(_Kernel, 1, vInputFlowMemObject);
            error |= Cl.SetKernelArg <float>(_Kernel, 2, this.tau);
            error |= Cl.SetKernelArg <float>(_Kernel, 3, this.theta);
            error |= Cl.SetKernelArg <float>(_Kernel, 4, P11_input);
            error |= Cl.SetKernelArg <float>(_Kernel, 5, P12_input);
            error |= Cl.SetKernelArg <float>(_Kernel, 6, P21_input);
            error |= Cl.SetKernelArg <float>(_Kernel, 7, P22_input);
            error |= Cl.SetKernelArg <float>(_Kernel, 8, P11_output);
            error |= Cl.SetKernelArg <float>(_Kernel, 9, P12_output);
            error |= Cl.SetKernelArg <float>(_Kernel, 10, P21_output);
            error |= Cl.SetKernelArg <float>(_Kernel, 11, P22_output);
            error |= Cl.SetKernelArg(_Kernel, 12, Flow.Width);
            error |= Cl.SetKernelArg(_Kernel, 13, Flow.Height);

            Event _event;

            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(outputFlow1.Height * outputFlow1.Width) };

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, P11_input, Bool.True, P1.Array[0], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, P12_input, Bool.True, P1.Array[1], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, P21_input, Bool.True, P2.Array[0], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, P22_input, Bool.True, P2.Array[1], 0, null, out _event);

            error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, P11_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[0], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, P12_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[1], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, P21_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[0], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, P22_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[1], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.ReleaseMemObject(uInputFlowMemObject);
            Cl.ReleaseMemObject(vInputFlowMemObject);
            Cl.ReleaseMemObject(P11_input);
            Cl.ReleaseMemObject(P12_input);
            Cl.ReleaseMemObject(P21_input);
            Cl.ReleaseMemObject(P22_input);
            Cl.ReleaseMemObject(P11_output);
            Cl.ReleaseMemObject(P12_output);
            Cl.ReleaseMemObject(P21_output);
            Cl.ReleaseMemObject(P22_output);
            Cl.ReleaseKernel(_Kernel);

            FlowArray[] OutputFlows = new FlowArray[2];
            OutputFlows[0] = outputFlow1;
            OutputFlows[1] = outputFlow2;
            return(OutputFlows);
        }
Exemple #21
0
        public FlowArray calc_divP_Flow(float[] Idx, float[] Idy, float[] grad_2, float[] rho_c, FlowArray inFlow, FlowArray P1, FlowArray P2)

        {
            FlowArray outFlow = new FlowArray();

            outFlow.Array    = new float[2][];
            outFlow.Width    = inFlow.Width;
            outFlow.Height   = inFlow.Height;
            outFlow.Array[0] = new float[outFlow.Width * outFlow.Height];
            outFlow.Array[1] = new float[outFlow.Width * outFlow.Height];


            IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> rho_cBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> IdxBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> IdyBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> InFlow_U = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> InFlow_V = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> divP11 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);
            IMem <float> divP12 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);
            IMem <float> divP21 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);
            IMem <float> divP22 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error);

            IMem <float> OutFlow_U = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error);
            IMem <float> OutFlow_V = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error);


            Kernel _Kernel = Cl.CreateKernel(program, "divP_Flow", out error);

            error |= Cl.SetKernelArg(_Kernel, 0, rho_cBuf);
            error |= Cl.SetKernelArg(_Kernel, 1, IdxBuf);
            error |= Cl.SetKernelArg <float>(_Kernel, 2, IdyBuf);
            error |= Cl.SetKernelArg <float>(_Kernel, 3, InFlow_U);
            error |= Cl.SetKernelArg <float>(_Kernel, 4, InFlow_V);
            error |= Cl.SetKernelArg <float>(_Kernel, 5, OutFlow_U);
            error |= Cl.SetKernelArg <float>(_Kernel, 6, OutFlow_V);
            error |= Cl.SetKernelArg <float>(_Kernel, 7, this.theta);
            error |= Cl.SetKernelArg <float>(_Kernel, 8, this.lambda);
            error |= Cl.SetKernelArg <float>(_Kernel, 9, grad_2Buf);
            error |= Cl.SetKernelArg <float>(_Kernel, 10, divP11);
            error |= Cl.SetKernelArg <float>(_Kernel, 11, divP12);
            error |= Cl.SetKernelArg <float>(_Kernel, 12, divP21);
            error |= Cl.SetKernelArg <float>(_Kernel, 13, divP22);
            error |= Cl.SetKernelArg <float>(_Kernel, 14, threshold);
            error |= Cl.SetKernelArg(_Kernel, 15, inFlow.Width);
            error |= Cl.SetKernelArg(_Kernel, 16, inFlow.Height);

            Event _event;

            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(inFlow.Height * inFlow.Width) };

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, rho_cBuf, Bool.True, rho_c, 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdxBuf, Bool.True, Idx, 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdyBuf, Bool.True, Idy, 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, grad_2Buf, Bool.True, grad_2, 0, null, out _event);


            error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_U, Bool.True, inFlow.Array[0], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_V, Bool.True, inFlow.Array[1], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP11, Bool.True, P1.Array[0], 0, null, out _event);
            error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP12, Bool.True, P1.Array[1], 0, null, out _event);

            error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP21, Bool.True, P2.Array[0], 0, null, out _event);
            error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP22, Bool.True, P2.Array[1], 0, null, out _event);



            error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_U, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[0], 0, null, out _event);
            error = Cl.Finish(commandQueue);

            Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_V, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[1], 0, null, out _event);
            error = Cl.Finish(commandQueue);



            Cl.ReleaseMemObject(grad_2Buf);
            Cl.ReleaseMemObject(rho_cBuf);
            Cl.ReleaseMemObject(IdxBuf);
            Cl.ReleaseMemObject(IdyBuf);
            Cl.ReleaseMemObject(InFlow_U);
            Cl.ReleaseMemObject(InFlow_V);
            Cl.ReleaseMemObject(divP11);
            Cl.ReleaseMemObject(divP12);
            Cl.ReleaseMemObject(divP21);
            Cl.ReleaseMemObject(divP22);
            Cl.ReleaseMemObject(OutFlow_U);
            Cl.ReleaseMemObject(OutFlow_V);



            Cl.ReleaseKernel(_Kernel);


            return(outFlow);
        }
Exemple #22
0
        public void ScryptTest()
        {
            ErrorCode error;

            //Load and compile kernel source code.
            string programPath = System.Environment.CurrentDirectory + "/../../scrypt.cl";  //Cl

            if (!System.IO.File.Exists(programPath))
            {
                Console.WriteLine("Program doesn't exist at path " + programPath);                return;
            }

            string programSource = System.IO.File.ReadAllText(programPath);

            IntPtr[] sz      = new IntPtr[programSource.Length * 2];
            Program  program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error);

            if (1 == 1)
            {
                CheckErr(error, "Cl.CreateProgramWithSource");

                //                status = clBuildProgram(clState->program, 1, &devices[gpu], ""-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", NULL, NULL);
                //Compile kernel source
                error = Cl.BuildProgram(program, 1, new[] { _device }, "-D LOOKUP_GAP=1 -D CONCURRENT_THREADS=1 -D WORKSIZE=1", null, IntPtr.Zero);
                CheckErr(error, "Cl.BuildProgram");

                //Check for any compilation errors
                if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>()
                    != BuildStatus.Success && 1 == 0)
                {
                    CheckErr(error, "Cl.GetProgramBuildInfo");
                    Console.WriteLine("Cl.GetProgramBuildInfo != Success");
                    Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error));
                    return;
                }

                //Create the required kernel (entry function) [search]
                Kernel kernel = Cl.CreateKernel(program, "search", out error);

                CheckErr(error, "Cl.CreateKernel");
                int intPtrSize = 0;
                intPtrSize = Marshal.SizeOf(typeof(IntPtr));

                //Image's RGBA data converted to an unmanaged[] array
                byte[] inputByteArray;
                //OpenCL memory buffer that will keep our image's byte[] data.
                Mem inputImage2DBuffer;
                //Create a command queue, where all of the commands for execution will be added
                CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);
                CheckErr(error, "Cl.CreateCommandQueue");
                clState _clState = new clState();
                _clState.cl_command_queue = cmdQueue;
                _clState.cl_kernel        = kernel;
                _clState.cl_context       = _context;


                IntPtr buffersize = new IntPtr(1024);
                IntPtr blank_res  = new IntPtr(1024);
                Object thrdataRes = new Object();


                //int buffersize = 1024;
                OpenCL.Net.Event clevent;
                //                 status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,  buffersize, blank_res, 0, NULL, NULL);
                dev_blk_ctx blk = new dev_blk_ctx();
                ErrorCode   err = queue_scrypt_kernel(_clState, blk);



                ErrorCode status = Cl.EnqueueWriteBuffer(_clState.cl_command_queue,
                                                         _clState.outputBuffer, OpenCL.Net.Bool.True, new IntPtr(0), buffersize, blank_res, 0, null, out clevent);

                IntPtr[] globalThreads = new IntPtr[0];
                IntPtr[] localThreads  = new IntPtr[0];

                //uint16 workdim = new uint16(1);
                uint workdim = 1;

                status = Cl.EnqueueNDRangeKernel(_clState.cl_command_queue, _clState.cl_kernel, workdim, null, globalThreads, localThreads, 0, null, out clevent);
                CheckErr(error, "Cl.EnqueueNDRangeKernel");

                IntPtr offset = new IntPtr(0);

                status = Cl.EnqueueReadBuffer(_clState.cl_command_queue, _clState.outputBuffer, OpenCL.Net.Bool.False, offset, buffersize, thrdataRes, 0, null, out clevent);

                //Wait for completion of all calculations on the GPU.
                error = Cl.Finish(_clState.cl_command_queue);

                CheckErr(error, "Cl.Finish");

                //Clean up memory
                Cl.ReleaseKernel(_clState.cl_kernel);
                Cl.ReleaseCommandQueue(_clState.cl_command_queue);
            }
        }
Exemple #23
0
        public static Bitmap Bake(Camera camera)
        {
            bakeSW.Start();
            Vector2I res       = Graphics.GetRenderResolution();
            int      totPixels = res.x * res.y;
            C_CAMERA cam       = new C_CAMERA(camera);

            angle += rotationSpeed * Time.DeltaTime;
            rot    = Quaternion.CreateFromAxisAngle((Vector3)axis, (float)(angle * 0.0174533D));

            float time = (float)Time.TimeSinceStart;

            /*Bitmap image = new Bitmap(
             *  "assets/textures/2d/nord-vpn.png");*/

            Voxel   sword = Voxel.GenerateDebug(new int3(32, 32, 32));///Voxel.CreateFromImage(image);
            C_VOXEL model = new C_VOXEL(sword);


            int modelSizeSize;
            int modelColorsSize;

            unsafe
            {
                modelSizeSize   = sizeof(int3);
                modelColorsSize = sizeof(Colour32) * model.colors.Length;
            }

            ErrorCode error = ErrorCode.Success;

            Mem memModelSize   = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, modelSizeSize, out error);
            Mem memModelColors = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, modelColorsSize, out error);

            Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, memModelSize);
            Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, memModelColors);

            Event event0;

            try
            {
                error = Cl.EnqueueWriteBuffer(Queue, (IMem)memInput, Bool.True, IntPtr.Zero, new IntPtr(inputSize), cam, 0, null, out event0);
                error = Cl.EnqueueWriteBuffer(Queue, (IMem)memModelSize, Bool.True, IntPtr.Zero, new IntPtr(modelSizeSize), model.size, 0, null, out event0);
                error = Cl.EnqueueWriteBuffer(Queue, (IMem)memModelColors, Bool.True, IntPtr.Zero, new IntPtr(modelColorsSize), model.colors, 0, null, out event0);

                error = Cl.EnqueueWriteBuffer(Queue, (IMem)memTime, Bool.True, IntPtr.Zero, new IntPtr(sizeof(float)), time, 0, null, out event0);
                //error = Cl.EnqueueWriteBuffer(Queue, (IMem)memNVolume, Bool.True, IntPtr.Zero, new IntPtr(nVolumeSize), vols.Length, 0, null, out event0);
            }
            catch (Exception e)
            {
                Log.Print("Error when enqueuing buffer:\n\t-OpenCL Error:" + error.ToString() + "\n\t-DotNet Error: " + e);
            }


            byte[] bp = new byte[totPixels * 4];

            sw.Start();
            error = Cl.EnqueueNDRangeKernel(Queue, kernel, 1, null, workGroupSizePtr, null, 0, null, out event0);


            if (error != ErrorCode.Success)
            {
                Log.Print("Error when enqueuing the NDRange of kernel: " + error.ToString());
            }

            Cl.Finish(Queue);

            ErrorCode execError = Cl.EnqueueReadBuffer(Queue, (IMem)memOutput, Bool.True, IntPtr.Zero, memory, bp, 0, null, out event0);

            if (execError != ErrorCode.Success)
            {
                Log.Print("Error while rendering: " + execError.ToString());
                //return new Bitmap(1, 1);
            }
            sw.Stop();
            RenderTime = sw.Elapsed.TotalMilliseconds;
            sw.Reset();

            //Stopwatch swbm = new Stopwatch();
            //swbm.Start();
            Bitmap bm = Imaging.RawToImage(bp, res.x, res.y, System.Drawing.Imaging.PixelFormat.Format32bppArgb);

            //swbm.Stop();
            //Log.Print("Byte* to Bitmap => " + Math.Round(swbm.Elapsed.TotalMilliseconds,2) + "ms");

            //bakeSW.Stop();
            //Log.Print("Camera image bake took " + Math.Round(bakeSW.Elapsed.TotalMilliseconds, 2) + "ms");
            //bakeSW.Reset();
            return(bm);
        }
Exemple #24
0
        public void MatMul()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "MatMul", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host matrices
            float[] A = new float[WA * HA];
            float[] B = new float[WB * HB];
            float[] C = new float[WC * HC];

            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = (float)rand.Next() / short.MaxValue;
            }
            for (int i = 0; i < B.Length; i++)
            {
                B[i] = (float)rand.Next() / short.MaxValue;
            }

            // allocate device vectors
            Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * A.Length), A, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * B.Length), B, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                                 (IntPtr)(sizeof(float) * C.Length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB));
            clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC));
            clSafeCall(Cl.SetKernelArg(kernel, 3, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 4, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 5, WA));
            clSafeCall(Cl.SetKernelArg(kernel, 6, WB));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)WC, (IntPtr)HC },
                                               new[] { (IntPtr)BLOCK_SIZE, (IntPtr)BLOCK_SIZE }, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * C.Length), C, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            for (int i = 0; i < HA; ++i)
            {
                for (int j = 0; j < WB; ++j)
                {
                    float sum = 0;
                    for (int k = 0; k < WA; ++k)
                    {
                        sum += A[i * WA + k] * B[k * WB + j];
                    }
                    float err = Math.Abs((sum - C[i * WB + j]) / sum);
                    Assert.That(err, Is.LessThanOrEqualTo(1E-3F));
                }
            }
        }
Exemple #25
0
        public void VecAdd()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "VecAdd", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            int length = 1 << 10;

            // allocate host vectors
            float[] A = new float[length];
            float[] B = new float[length];
            float[] C = new float[length];

            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < length; i++)
            {
                A[i] = (float)rand.Next() / short.MaxValue;
                B[i] = (float)rand.Next() / short.MaxValue;
            }

            // allocate device vectors
            Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * length), A, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * length), B, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                                 (IntPtr)(sizeof(float) * length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB));
            clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC));
            clSafeCall(Cl.SetKernelArg(kernel, 3, length));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)length }, new[] { (IntPtr)256 },
                                               0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * length), C, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            for (int i = 0; i < length; i++)
            {
                float sum = A[i] + B[i];
                float err = Math.Abs((sum - C[i]) / sum);
                Assert.That(err, Is.LessThanOrEqualTo(1E-3F));
            }
        }
Exemple #26
0
        /// <summary>
        /// Reads data from GPU Buffer
        /// </summary>
        /// <param name="data">where to store the read data</param>
        public void ReadData(T[] data)
        {
            Event e;

            Cl.EnqueueReadBuffer(_handle.Queue, _buffer, Bool.True, 0, data.Length, data, 0, null, out e);
        }
Exemple #27
0
        public HTTPResponse GetResponse(HTTPRequest request)
        {
            HTTPResponse  response = new HTTPResponse(200);
            StringBuilder sb       = new StringBuilder();
            ErrorCode     error;

            if (!_isInit)
            {
                init();
                _isInit = true;
            }

            if (request.Method == HTTPRequest.METHOD_GET)
            {
                sb.Append("<html><body>");
                sb.Append(GenUploadForm());
                sb.Append("</body></html>");
                response.Body = Encoding.UTF8.GetBytes(sb.ToString());
                return(response);
            }
            else if (request.Method == HTTPRequest.METHOD_POST)
            {
                sb.Append(request.Body);
                response.Body = Encoding.UTF8.GetBytes(sb.ToString());
                string programPath = System.Environment.CurrentDirectory + "/Kernel.cl";
                if (!System.IO.File.Exists(programPath))
                {
                    Console.WriteLine("Program doesn't exist at path " + programPath);
                    return(new HTTPResponse(404));
                }

                sb.Append("<html><body>");
                string programSource = System.IO.File.ReadAllText(programPath);
                using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) {
                    LogError(error, "Cl.CreateProgramWithSource");
                    error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                    LogError(error, "Cl.BuildProgram");
                    if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <OpenCL.Net.BuildStatus>()
                        != BuildStatus.Success)
                    {
                        LogError(error, "Cl.GetProgramBuildInfo");
                        Console.WriteLine("Cl.GetProgramBuildInfo != Success");
                        Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error));
                        return(new HTTPResponse(404));
                    }
                    Kernel kernel = Cl.CreateKernel(program, "answer", out error);
                    LogError(error, "Cl.CreateKernel");

                    Random rand   = new Random();
                    int[]  input  = (from i in Enumerable.Range(0, 100) select(int) rand.Next()).ToArray();
                    int[]  output = new int[100];

                    var buffIn     = _context.CreateBuffer(input, MemFlags.ReadOnly);
                    var buffOut    = _context.CreateBuffer(output, MemFlags.WriteOnly);
                    int IntPtrSize = Marshal.SizeOf(typeof(IntPtr));
                    error  = Cl.SetKernelArg(kernel, 0, (IntPtr)IntPtrSize, buffIn);
                    error |= Cl.SetKernelArg(kernel, 1, (IntPtr)IntPtrSize, buffOut);
                    LogError(error, "Cl.SetKernelArg");
                    CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);
                    LogError(error, "Cl.CreateCommandQueue");
                    Event clevent;
                    error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)100, (IntPtr)1 }, null, 0, null, out clevent);
                    LogError(error, "Cl.EnqueueNDRangeKernel");
                    error = Cl.Finish(cmdQueue);
                    LogError(error, "Cl.Finih");
                    error = Cl.EnqueueReadBuffer(cmdQueue, buffOut, OpenCL.Net.Bool.True, 0, 100, output, 0, null, out clevent);
                    LogError(error, "Cl.EnqueueReadBuffer");
                    error = Cl.Finish(cmdQueue);
                    LogError(error, "Cl.Finih");


                    Cl.ReleaseKernel(kernel);
                    Cl.ReleaseCommandQueue(cmdQueue);
                    Cl.ReleaseMemObject(buffIn);
                    Cl.ReleaseMemObject(buffOut);
                    sb.Append("<pre>");
                    for (int i = 0; i != 100; i++)
                    {
                        sb.Append(input[i] + " % 42 = " + output[i] + "<br />");
                    }
                    sb.Append("</pre>");
                }
                sb.Append("</body></html>");
                response.Body = Encoding.UTF8.GetBytes(sb.ToString());
                return(response);
            }
            return(new HTTPResponse(501));
        }
Exemple #28
0
        static void Main(string[] args)
        {
            Console.WriteLine("Hello World!");
            uint      platformCount;
            ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount);

            Console.WriteLine("{0} platforms found", platformCount);


            var platformIds = new Platform[platformCount];

            result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount);
            var platformCounter = 0;

            foreach (var platformId in platformIds)
            {
                IntPtr paramSize;
                result = Cl.GetPlatformInfo(platformId, PlatformInfo.Name, IntPtr.Zero, InfoBuffer.Empty, out paramSize);

                using (var buffer = new InfoBuffer(paramSize))
                {
                    result = Cl.GetPlatformInfo(platformIds[0], PlatformInfo.Name, paramSize, buffer, out paramSize);

                    Console.WriteLine($"Platform {platformCounter}: {buffer}");
                }
                platformCounter++;
            }

            Console.WriteLine($"Using first platform...");

            uint deviceCount;

            result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, 0, null, out deviceCount);
            Console.WriteLine("{0} devices found", deviceCount);

            var deviceIds = new Device[deviceCount];

            result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, deviceCount, deviceIds, out var numberDevices);

            var selectedDevice = deviceIds[0];

            var context = Cl.CreateContext(null, 1, new[] { selectedDevice }, null, IntPtr.Zero, out var error);

            const string kernelSrc = @"
            // Simple test; c[i] = a[i] + b[i]
            __kernel void add_array(__global float *a, __global float *b, __global float *c)
            {
                int xid = get_global_id(0);
                c[xid] = a[xid] + b[xid] - 1500;
            }
            
            __kernel void sub_array(__global float *a, __global float *b, __global float *c)
            {
                int xid = get_global_id(0);
                c[xid] = a[xid] - b[xid] - 2000;
            }
                        
            __kernel void double_everything(__global float *a)
            {
                int xid = get_global_id(0);
                a[xid] = a[xid] * 2;
            }

            ";


            var src = kernelSrc;

            Console.WriteLine("=== src ===");
            Console.WriteLine(src);
            Console.WriteLine("============");

            var program = Cl.CreateProgramWithSource(context, 1, new[] { src }, null, out var error2);

            error2 = Cl.BuildProgram(program, 1, new[] { selectedDevice }, string.Empty, null, IntPtr.Zero);

            if (error2 == ErrorCode.BuildProgramFailure)
            {
                Console.Error.WriteLine(Cl.GetProgramBuildInfo(program, selectedDevice, ProgramBuildInfo.Log, out error));
            }

            Console.WriteLine(error2);

            // Get the kernels.
            var kernels = Cl.CreateKernelsInProgram(program, out error);

            Console.WriteLine($"Program contains {kernels.Length} kernels.");
            var kernelAdd    = kernels[0];
            var kernelDouble = kernels[2];

            //
            float[] A = new float[1000];
            float[] B = new float[1000];
            float[] C = new float[1000];

            for (var i = 0; i < 1000; i++)
            {
                A[i] = i;
                B[i] = i;
            }

            IMem <float> hDeviceMemA = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error);
            IMem <float> hDeviceMemB = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, B, out error);
            IMem <float> hDeviceMemC = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, C, out error);

            // Create a command queue.
            var cmdQueue = Cl.CreateCommandQueue(context, selectedDevice, CommandQueueProperties.None, out error);

            int intPtrSize = 0;

            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            error = Cl.SetKernelArg(kernelDouble, 0, new IntPtr(intPtrSize), hDeviceMemA);

            error = Cl.SetKernelArg(kernelAdd, 0, new IntPtr(intPtrSize), hDeviceMemA);
            error = Cl.SetKernelArg(kernelAdd, 1, new IntPtr(intPtrSize), hDeviceMemB);
            error = Cl.SetKernelArg(kernelAdd, 2, new IntPtr(intPtrSize), hDeviceMemC);

            // write data from host to device
            Event clevent;

            error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero,
                                          new IntPtr(1000 * sizeof(float)),
                                          A, 0, null, out clevent);
            error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero,
                                          new IntPtr(1000 * sizeof(float)),
                                          B, 1, new [] { clevent }, out clevent);

            // execute kernel
            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelDouble, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent);


            var infoBuffer = Cl.GetEventInfo(clevent, EventInfo.CommandExecutionStatus, out var e2);

            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelAdd, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent);
            Console.WriteLine($"Run result: {error}");



            error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.False, 0, C.Length, C, 1, new [] { clevent }, out clevent);
            Cl.WaitForEvents(1, new [] { clevent });

            for (var i = 0; i < 1000; i++)
            {
                Console.WriteLine($"[{i}]: {C[i]}");
            }

            program.Dispose();

            foreach (var res in typeof(SourceLoader).Assembly.GetManifestResourceNames())
            {
                Console.WriteLine(res);
            }
        }
Exemple #29
0
        // Partially from OpenTK demo - Submitted by "mfagerlund"
        public void AddArrayAddsCorrectly()
        {
            const string correctSource = @"
                // Simple test; c[i] = a[i] + b[i]

                __kernel void add_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] + b[xid];
                }
                
                __kernel void sub_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] - b[xid];
                }

                ";

            ErrorCode error;

            using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);
                error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                Assert.AreEqual(ErrorCode.Success, error);
                Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success);

                Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
                Kernel   kernel  = kernels[0];

                const int cnBlockSize = 4;
                const int cnBlocks    = 3;
                IntPtr    cnDimension = new IntPtr(cnBlocks * cnBlockSize);

                // allocate host  vectors
                float[] A = new float[cnDimension.ToInt32()];
                float[] B = new float[cnDimension.ToInt32()];
                float[] C = new float[cnDimension.ToInt32()];

                // initialize host memory
                Random rand = new Random();
                for (int i = 0; i < A.Length; i++)
                {
                    A[i] = rand.Next() % 256;
                    B[i] = rand.Next() % 256;
                }

                //Cl.IMem hDeviceMemA = Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), A, out error);
                //Assert.AreEqual(Cl.ErrorCode.Success, error);

                IMem <float> hDeviceMemA = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error);
                Assert.AreEqual(ErrorCode.Success, error);

                IMem hDeviceMemB = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), B, out error);
                Assert.AreEqual(ErrorCode.Success, error);
                IMem hDeviceMemC = Cl.CreateBuffer(_context, MemFlags.WriteOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), IntPtr.Zero, out error);
                Assert.AreEqual(ErrorCode.Success, error);

                CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);

                Event clevent;

                int intPtrSize = 0;
                intPtrSize = Marshal.SizeOf(typeof(IntPtr));

                // setup parameter values
                error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemA);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemB);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemC);
                Assert.AreEqual(ErrorCode.Success, error);

                // write data from host to device
                error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero,
                                              new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                              A, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero,
                                              new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                              B, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error);

                // execute kernel
                error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { cnDimension }, null, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error, error.ToString());

                // copy results from device back to host
                IntPtr event_handle = IntPtr.Zero;

                error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.True, 0, C.Length, C, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error, error.ToString());

                for (int i = 0; i < A.Length; i++)
                {
                    Assert.That(A[i] + B[i], Is.EqualTo(C[i]));
                }

                Cl.Finish(cmdQueue);

                Cl.ReleaseMemObject(hDeviceMemA);
                Cl.ReleaseMemObject(hDeviceMemB);
                Cl.ReleaseMemObject(hDeviceMemC);
            }
        }
Exemple #30
0
        public void PoissonJacobi()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonJacobi", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // initialize host memory

            uint dimX = 162;
            uint dimY = 122;
            uint N    = 15000;

            float x0 = (float)(-0.25 * Math.PI);
            float y0 = (float)(-0.25 * Math.PI);

            float hx = 2.0f * Math.Abs(x0) / dimX;
            float hy = 2.0f * Math.Abs(y0) / dimY;

            float[] hData = new float[dimX * dimY];

            uint stride = dimX;

            //boundary values

            for (uint i = 1; i < dimY - 1; i++)
            {
                uint  y_idx = i * stride;
                float y_val = y0 + i * hy;
                hData[y_idx]            = u(x0, y_val);
                hData[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val);
            }

            for (uint j = 1; j < dimX - 1; j++)
            {
                float x_val = x0 + j * hx;
                hData[j] = u(x_val, y0);
                hData[j + (dimY - 1) * stride] = u(x_val, y0 + (dimY - 1) * hy);
            }

            // allocate device vectors
            Cl.Mem input = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                           (IntPtr)(sizeof(float) * hData.Length), hData, out error);
            clSafeCall(error);
            Cl.Mem output = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                            (IntPtr)(sizeof(float) * hData.Length), hData, out error);
            clSafeCall(error);

            float a1 = 2 * hy / hx;
            float a2 = 2 * hx / hy;
            float a3 = a1;
            float a4 = a2;
            float a  = a1 + a2 + a3 + a4;

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 2, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 3, dimX));
            clSafeCall(Cl.SetKernelArg(kernel, 4, dimY));
            clSafeCall(Cl.SetKernelArg(kernel, 5, stride));
            clSafeCall(Cl.SetKernelArg(kernel, 6, a1));
            clSafeCall(Cl.SetKernelArg(kernel, 7, a2));
            clSafeCall(Cl.SetKernelArg(kernel, 8, a3));
            clSafeCall(Cl.SetKernelArg(kernel, 9, a4));
            clSafeCall(Cl.SetKernelArg(kernel, 10, a));
            clSafeCall(Cl.SetKernelArg(kernel, 11, hx));
            clSafeCall(Cl.SetKernelArg(kernel, 12, hy));
            clSafeCall(Cl.SetKernelArg(kernel, 13, x0));
            clSafeCall(Cl.SetKernelArg(kernel, 14, y0));

            IntPtr[] lo = { (IntPtr)16, (IntPtr)16 };
            IntPtr[] gl = { (IntPtr)((dimX - 2 + AREA_SIZE_X - 1) / AREA_SIZE_X * 16),
                            (IntPtr)((dimY - 2 + AREA_SIZE_Y - 1) / AREA_SIZE_Y * 16) };

            Cl.Mem curIn  = input;
            Cl.Mem curOut = output;

            // execute kernel (and perform data transfering silently)
            clSafeCall(Cl.SetKernelArg(kernel, 0, curIn));
            clSafeCall(Cl.SetKernelArg(kernel, 1, curOut));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            for (uint idx = 1; idx < N; idx++)
            {
                // swap buffers
                Cl.Mem temp = curIn;
                curIn  = curOut;
                curOut = temp;

                // execute kernel
                clSafeCall(Cl.SetKernelArg(kernel, 0, curIn));
                clSafeCall(Cl.SetKernelArg(kernel, 1, curOut));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));
            }

            clSafeCall(Cl.Finish(cmdQueue));

            stopwatch.Stop();

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, curOut, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * hData.Length), hData, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            float avgerr = 0, maxerr = 0;

            for (uint i = 1; i < dimY - 1; i++)
            {
                for (uint j = 1; j < dimX - 1; j++)
                {
                    float theory = u(x0 + j * hx, y0 + i * hy);
                    float err    = Math.Abs(theory - hData[j + i * stride]) / Math.Abs(theory);
                    avgerr += err;
                    maxerr  = Math.Max(maxerr, err);
                }
            }
            avgerr /= dimX * dimY;

            long   elapsedTime          = stopwatch.ElapsedMilliseconds;
            double dataSizePerIteration = dimX * dimY * 2 * sizeof(float);
            double dataSizeTotal        = dataSizePerIteration * N;
            double elapsedSeconds       = elapsedTime * 0.001;
            double gigabyteFactor       = 1 << 30;
            double bandwidth            = dataSizeTotal / (gigabyteFactor * elapsedSeconds);

            Console.WriteLine("avgerr = {0} maxerr = {1} elapsedTime = {2} ms bandwidth = {3} GB/s",
                              avgerr, maxerr, elapsedTime, bandwidth);

            Assert.That(maxerr, Is.LessThanOrEqualTo(5E-2F));
            Assert.That(avgerr, Is.LessThanOrEqualTo(1E-2F));
        }