public virtual double[] GetInputGradients() { int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize; double[] inputGradients = new double[inputArraySize]; // Copy device buffer to host float[] tmpInputGradients = new float[inputArraySize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, inputNeurons.DeltaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputArraySize), tmpInputGradients, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into public fields for (int i = 0; i < inputArraySize; ++i) { inputGradients[i] = (double)tmpInputGradients[i]; } return(inputGradients); }
public Event EnqueueReadBuffer(Buffer buffer, bool blocking, ulong offset, ulong count, IntPtr destination, Event[] events) { ClHelper.ThrowNullException(Handle); if (buffer == Buffer.Null) { throw new ArgumentNullException("buffer"); } if (destination == IntPtr.Zero) { throw new ArgumentNullException("destination"); } unsafe { int num_events_in_wait_list = events == null ? 0 : events.Length; IntPtr *wait_list = stackalloc IntPtr[num_events_in_wait_list]; for (int i = 0; i < num_events_in_wait_list; ++i) { wait_list[i] = events[i].Handle; } if (events == null) { wait_list = null; } IntPtr event_ptr = IntPtr.Zero; ClHelper.GetError(Cl.EnqueueReadBuffer(Handle, buffer.Handle, blocking ? 1u : 0u, new UIntPtr(offset), new UIntPtr(count), destination.ToPointer(), (uint)num_events_in_wait_list, wait_list, &event_ptr)); return(new Event(event_ptr)); } }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, gammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), gammaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer gammaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, betaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), betaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer betaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Speeds are not saved. }
public void ExternalLoopBody(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ExternalLoopBody", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hres = { 0, 1, 2, 3, 4, 5 }; // allocate device vectors Cl.Mem dres = Cl.CreateBuffer(context, Cl.MemFlags.ReadWrite | Cl.MemFlags.CopyHostPtr, (IntPtr)(sizeof(int) * hres.Length), hres, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dres)); clSafeCall(Cl.SetKernelArg(kernel, 1, hres.Length)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hres.Length), hres, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { 1, 4, 3, 6, 5, 8 }, hres); }
// images cannot be read_write... so let's continue using plain buffers // should implement this in a way that allows imgfAccu to be loaded only once // should test for image size consistency public void Accumulate(FloatMap imgfAccu, FloatMap imgfSrc, float k) { var kernel = _kernels["accumulate"]; // Creation of on-device memory objects IMem <float> accuMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.ReadWrite, imgfAccu.Size, out err); // why MemFlags.CopyHostPtr doesn't work here (and forces me to manual copy) ??? assert(err, "accu buf creation"); IMem <float> srcMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.WriteOnly, imgfSrc.Size, out err); assert(err, "src buf creation"); // Set memory objects as parameters to kernel err = Cl.SetKernelArg(kernel, 0, intPtrSize, accuMapBuffer); assert(err, "accu map setKernelArg"); err = Cl.SetKernelArg(kernel, 1, intPtrSize, srcMapBuffer); assert(err, "src map setKernelArg"); err = Cl.SetKernelArg(kernel, 2, intSize, imgfAccu.Stride); assert(err, "in stride setKernelArg"); err = Cl.SetKernelArg(kernel, 3, intSize, imgfSrc.Stride); assert(err, "out stride setKernelArg"); err = Cl.SetKernelArg(kernel, 4, floatSize, k); assert(err, "out stride setKernelArg"); // write actual data into memory object Event clevent; err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, 0, imgfAccu.Size, imgfAccu._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "write accu buffer"); err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, srcMapBuffer, Bool.True, 0, imgfSrc.Size, imgfSrc._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "write src buffer"); // execute err = Cl.EnqueueNDRangeKernel(_commandsQueue, kernel, 2, new[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }, // offset new[] { new IntPtr(imgfAccu.W), new IntPtr(imgfAccu.H), (IntPtr)1 }, // range null, 0, null, out clevent); clevent.Dispose(); assert(err, "Cl.EnqueueNDRangeKernel"); // sync Cl.Finish(_commandsQueue); // read from output memory object into actual buffer err = Cl.EnqueueReadBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, imgfAccu._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "read output buffer"); Cl.ReleaseMemObject(srcMapBuffer); Cl.ReleaseMemObject(accuMapBuffer); // maybe i could return this without disposing; would affect non-OpenCl implementation }
public static long CrackHigh(int[] sequence, long low) { if (!crackHighProgram.HasValue || !crackHighKernel.HasValue || !initialized || sequence.Length != 16) { return(-1); } ErrorCode error; IMem <int> sequence_dev = Cl.CreateBuffer <int>(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, sequence, out error); ErrorCheck(error, "CrackHigh(): Cl.CreateBuffer"); long[] seeds = new long[1]; IMem <long> seed_dev = Cl.CreateBuffer <long>(context, MemFlags.CopyHostPtr | MemFlags.WriteOnly, seeds, out error); ErrorCheck(error, "InitializeParameters(): Cl.CreateBuffer"); error = Cl.SetKernelArg(crackHighKernel.Value, 0, sequence_dev); ErrorCheck(error, "Cl.SetKernelArg"); error = Cl.SetKernelArg(crackHighKernel.Value, 1, seed_dev); ErrorCheck(error, "Cl.SetKernelArg"); error = Cl.SetKernelArg(crackHighKernel.Value, 2, (IntPtr)sizeof(long), low); ErrorCheck(error, "Cl.SetKernelArg"); CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, 0, out error); ErrorCheck(error, "Cl.CreateCommandQueue"); int maxGroupWorkSize = Cl.GetKernelWorkGroupInfo(crackHighKernel.Value, device, KernelWorkGroupInfo.WorkGroupSize, out error).CastTo <int>(); ErrorCheck(error, "Cl.GetKernelWorkGroupInfo"); Event e; int threads = maxGroupWorkSize * 12; IntPtr[] workSize = new IntPtr[] { (IntPtr)threads }; error = Cl.EnqueueNDRangeKernel(cmdQueue, crackHighKernel.Value, 1, null, workSize, null, 0, null, out e); ErrorCheck(error, "Cl.EnqueueNDRangeKernel"); error = Cl.Finish(cmdQueue); ErrorCheck(error, "Cl.Finish"); long[] seed_host = new long[1]; error = Cl.EnqueueReadBuffer(cmdQueue, seed_dev, Bool.True, (IntPtr)0, (IntPtr)(sizeof(long) * 1), seed_host, 0, null, out e); ErrorCheck(error, "CL.EnqueueReadBuffer"); //Dispose your shit error = Cl.ReleaseCommandQueue(cmdQueue); ErrorCheck(error, "CL.ReleaseCommandQueue"); error = Cl.ReleaseMemObject(sequence_dev); ErrorCheck(error, "CL.ReleaseMemObject"); error = Cl.ReleaseMemObject(seed_dev); ErrorCheck(error, "CL.ReleaseMemObject"); return(seed_host[0]); }
/// <summary> /// Run network backwards, propagating the gradient backwards and also updating parameters. /// Requires that gradient has ALREADY BEEN WRITTEN in network.Layers[nLayers-1].InputNeurons.Delta /// </summary> public void BackwardPass(double learningRate, double momentumMultiplier, double weightDecayCoeff, double weightMaxNorm) { for (int l = nLayers - 2; l > 0; l--) // propagate error signal backwards (layers L-2 to 1, i.e. second last to second) { // 1. Update layer's parameters' change speed using gradient layers[l].UpdateSpeeds(learningRate, momentumMultiplier, weightDecayCoeff); // 2. Backpropagate errors to previous layer (no need to do it for layer 1) if (l > 1) { layers[l].BackPropagate(); } #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display input delta layer-by-layer int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize; #if OPENCL_ENABLED float[] deltaInputAll = new float[layers[l].InputNeurons.NumberOfUnits * miniBatchSize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, layers[l].InputNeurons.DeltaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(layers[l].InputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)), deltaInputAll, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer deltaInputAll"); #endif Console.WriteLine("\nLayer {0} ({1}) backpropagated delta:", l, layers[l].Type); for (int m = 0; m < miniBatchSize; m++) { float[] deltaInput = new float[layers[l].InputNeurons.NumberOfUnits]; Array.Copy(deltaInputAll, m * layers[l].InputNeurons.NumberOfUnits, deltaInput, 0, layers[l].InputNeurons.NumberOfUnits); Console.WriteLine("\n --- Mini-batch item {0} -----", m); for (int j = 0; j < deltaInput.Length; j++) { Console.Write("{0} ", deltaInput[j]); } Console.WriteLine(); Console.ReadKey(); } /* ------------------------- END DEBUGGING --------------------------------------------- */ #endif // 3. Update layer's parameters layers[l].UpdateParameters(weightMaxNorm); } }
public override double[] GetParameterGradients() { int nParameters = nInputUnits * nOutputUnits + nOutputUnits; double[] parameterGradients = new double[nParameters]; // Copy weights and biases gradients buffers to host float[] tmpWeightsGrad = new float[nInputUnits * nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGradientsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), tmpWeightsGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBiasesGrad = new float[nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGradientsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), tmpBiasesGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into parameterGradients //Console.WriteLine("Weight gradients:\n"); for (int i = 0; i < nInputUnits * nOutputUnits; ++i) { parameterGradients[i] = (double)tmpWeightsGrad[i]; //Console.Write(" {0}", tmpWeightsGrad[i]); } //Console.ReadKey(); for (int i = 0; i < nOutputUnits; ++i) { parameterGradients[nInputUnits * nOutputUnits + i] = (double)tmpBiasesGrad[i]; } return(parameterGradients); }
public void ReadOutBuffer <T>(int argIndex, T *localBuffer) where T : unmanaged { fixed(IntPtr *memObjPtr = _memObj) { ErrorCode err; if ((err = Cl.EnqueueReadBuffer(_commandQueue, *(memObjPtr + argIndex), true.ToInt(), 0, *(_memObjSize + argIndex), localBuffer, 0, null, out Event @event)) == ErrorCode.Success) { @event.WaitForComplete(); } } }
public float CalculateVar(out double totalMs, float cf = .9f, int iterations = 10) { using (var annualizedReturnsKernel = _env.Context.CompileKernelFromSource(_programSource, "annualizedReturns")) using (var aggregateReturnsKernel = _env.Context.CompileKernelFromSource(_programSource, "aggregateReturnsKernel")) { Event clStart, clEnd; var queue = _env.CommandQueues[0]; var xSize = (_stockData.QuotesPerStock - _holding); var ySize = _stockData.StocksCount; var err = Cl.SetKernelArg(annualizedReturnsKernel, 0, _d_output); err = Cl.SetKernelArg(annualizedReturnsKernel, 1, _d_stocksAndPrices); err = Cl.SetKernelArg(annualizedReturnsKernel, 2, _d_portfolioStockMv); err = Cl.SetKernelArg(annualizedReturnsKernel, 3, _ann); err = Cl.SetKernelArg(annualizedReturnsKernel, 4, _holding); err = Cl.SetKernelArg(aggregateReturnsKernel, 0, _d_output); err = Cl.SetKernelArg(aggregateReturnsKernel, 1, ySize); Cl.EnqueueMarker(queue, out clStart); Cl.Finish(queue); for (int i = 0; i < iterations; i++) { err = Cl.EnqueueNDRangeKernel(queue, annualizedReturnsKernel, 2, null, new[] { (IntPtr)xSize, (IntPtr)ySize }, new[] { (IntPtr)256, (IntPtr)1 }, 0, null, out Event notInterestedInThisEvent1); err = Cl.EnqueueNDRangeKernel(queue, aggregateReturnsKernel, 2, null, new[] { (IntPtr)xSize, (IntPtr)1 }, new[] { (IntPtr)256, (IntPtr)1 }, 0, null, out notInterestedInThisEvent1); } Cl.EnqueueMarker(queue, out clEnd); Cl.Finish(queue); var startInfo = Cl.GetEventProfilingInfo(clStart, ProfilingInfo.Start, out err); var endInfo = Cl.GetEventProfilingInfo(clEnd, ProfilingInfo.End, out err); Cl.EnqueueReadBuffer <float>(queue, _d_output, Bool.True, _h_output, 0, null, out Event notInterestedInThisEvent2); totalMs = (endInfo.CastTo <ulong>() - startInfo.CastTo <ulong>()) * 10e-6; clStart.Dispose(); clEnd.Dispose(); annualizedReturnsKernel.Dispose(); return(_h_output[0] + _h_output[xSize - 1]); } }
public static void ReadFromBuffer <T>(this CommandQueue commandQueue, IMem buffer, T[] array, int offset = 0, long length = -1, params Event[] waitFor) where T : struct { Event e; var elemSize = TypeSize <T> .SizeInt; Cl.EnqueueReadBuffer(commandQueue, buffer, Bool.True, (IntPtr)(offset * elemSize), (IntPtr)((length == -1 ? array.Length : length) * elemSize), array, (uint)waitFor.Length, waitFor.Length == 0 ? null : waitFor, out e) .Check(); e.Dispose(); }
public void ArrayCompare(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayCompare", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors bool[] res = { true, false, true, false }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp3 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(bool) * res.Length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dp3)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { false, true, false, true }, res); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dummy)); clSafeCall(Cl.SetKernelArg(kernel, 1, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { true, false, true, false }, res); }
public void EnqueueReadBuffer <T>(IMem <T> buffer, int offset, int length, T[] data) where T : struct { Cl.EnqueueReadBuffer( CommandQueue, buffer, Bool.True, offset, length, data, 0, //_awaitEvent == null ? 0 : 1, null, //_awaitEvent == null ? new Event[0] : new Event[]{ _awaitEvent.Value }, out var awaitEvent ); //_awaitEvent = awaitEvent; }
public override double[] GetParameterGradients() { double[] parameterGradients = new double[2 * nInputUnits]; // Copy gamma and beta gradients buffers to host float[] tmpGammaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaGammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpGammaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBetaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaBetaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpBetaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into public fields for (int i = 0; i < nInputUnits; ++i) { parameterGradients[i] = (double)tmpGammaGrad[i]; parameterGradients[nInputUnits + i] = (double)tmpBetaGrad[i]; } return(parameterGradients); }
public T[] ReadBuffer <T>(IMem <T> buffer, int offset, int length) where T : struct { var data = new T[length]; Cl.EnqueueReadBuffer( CommandQueue, buffer, Bool.True, offset, length, data, 0, //_awaitEvent == null ? 0 : 1, null, //_awaitEvent == null ? new Event[0] : new Event[]{ _awaitEvent.Value }, out var awaitEvent ); //s_awaitEvent = awaitEvent; return(data); }
public override void Execute(object sender) { var commandQueue = sender as CommandQueue; var waitList = from name in WaitList let ev = CommandQueue.FindEvent(name) where ev != null select ev.Value; Event eventID; ErrorCode error; var mem = (Buffer == null) ? Mem : Buffer.Mem; var elementSize = (Buffer == null) ? ElementSize : Buffer.ElementSize; if (Data == null) { error = Cl.EnqueueReadBuffer(commandQueue.Queue, mem, Blocking ? Bool.True : Bool.False, (IntPtr)Offset, (IntPtr)(Count * elementSize), DataPtr, (uint)waitList.Count(), waitList.Count() == 0 ? null : waitList.ToArray(), out eventID); } else { error = Cl.EnqueueReadBuffer(commandQueue.Queue, mem, Blocking ? Bool.True : Bool.False, (IntPtr)Offset, (IntPtr)(Count * elementSize), Data, (uint)waitList.Count(), waitList.Count() == 0 ? null : waitList.ToArray(), out eventID); } if (error != ErrorCode.Success) { throw new Cl.Exception(error); } if (Name == string.Empty) { eventID.Dispose(); } else { CommandQueue.AddEvent(Name, eventID); } }
public void ArrayRefOut(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayRefOut", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hp1 = { 1 }; int[] hp2 = { 2 }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp1.Length), hp1, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp2.Length), hp2, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp1, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp1, 0, null, out clevent)); clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp2, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp2, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(5, hp1[0]); Assert.AreEqual(4, hp2[0]); }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), weightsHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), biasesHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer biasesGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Speeds are not saved. }
public float[][] calc_grad_rho_c(SimpleImage I0, SimpleImage I1d, FlowArray Flow) { float[][] arrays = new float[4][]; arrays[0] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[1] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[2] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[3] = new float[I0.ImageHeight * I0.ImageWidth]; Mem leftImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)0, I0.ByteArray, out error); Mem rightImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I1d.ImageWidth, (IntPtr)I1d.ImageHeight, (IntPtr)0, I1d.ByteArray, out error); IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> gradXBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> gradYBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> rho_cBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "gradRho", out error); error |= Cl.SetKernelArg(_Kernel, 0, leftImageMemObject); error |= Cl.SetKernelArg(_Kernel, 1, rightImageMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 2, uInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 3, vInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 4, gradXBuf); error |= Cl.SetKernelArg <float>(_Kernel, 5, gradYBuf); error |= Cl.SetKernelArg <float>(_Kernel, 6, grad_2Buf); error |= Cl.SetKernelArg <float>(_Kernel, 7, rho_cBuf); error |= Cl.SetKernelArg(_Kernel, 8, I0.ImageWidth); error |= Cl.SetKernelArg(_Kernel, 9, I0.ImageHeight); Event _event; IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; IntPtr[] regionPtr = new IntPtr[] { (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)1 }; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(Flow.Height * Flow.Width) }; error = Cl.EnqueueWriteImage(commandQueue, leftImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I0.ByteArray, 0, null, out _event); error = Cl.EnqueueWriteImage(commandQueue, rightImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I1d.ByteArray, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, gradXBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, gradYBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, grad_2Buf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[2], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, rho_cBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[3], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(uInputFlowMemObject); Cl.ReleaseMemObject(vInputFlowMemObject); Cl.ReleaseMemObject(leftImageMemObject); Cl.ReleaseMemObject(rightImageMemObject); Cl.ReleaseMemObject(gradXBuf); Cl.ReleaseMemObject(gradYBuf); Cl.ReleaseMemObject(grad_2Buf); Cl.ReleaseMemObject(rho_cBuf); Cl.ReleaseKernel(_Kernel); return(arrays); }
public FlowArray[] calc_P_field(FlowArray Flow, FlowArray P1, FlowArray P2) { ErrorCode error; FlowArray outputFlow1 = new FlowArray(); outputFlow1.Array = new float[2][]; outputFlow1.Width = P1.Width; outputFlow1.Height = P1.Height; outputFlow1.Array[0] = new float[outputFlow1.Width * outputFlow1.Height]; outputFlow1.Array[1] = new float[outputFlow1.Width * outputFlow1.Height]; FlowArray outputFlow2 = new FlowArray(); outputFlow2.Array = new float[2][]; outputFlow2.Width = P2.Width; outputFlow2.Height = P2.Height; outputFlow2.Array[0] = new float[outputFlow2.Width * outputFlow2.Height]; outputFlow2.Array[1] = new float[outputFlow2.Width * outputFlow2.Height]; IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> P11_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error); IMem <float> P12_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error); IMem <float> P21_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error); IMem <float> P22_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error); IMem <float> P11_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error); IMem <float> P12_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error); IMem <float> P21_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error); IMem <float> P22_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "calcP", out error); error |= Cl.SetKernelArg <float>(_Kernel, 0, uInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 1, vInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 2, this.tau); error |= Cl.SetKernelArg <float>(_Kernel, 3, this.theta); error |= Cl.SetKernelArg <float>(_Kernel, 4, P11_input); error |= Cl.SetKernelArg <float>(_Kernel, 5, P12_input); error |= Cl.SetKernelArg <float>(_Kernel, 6, P21_input); error |= Cl.SetKernelArg <float>(_Kernel, 7, P22_input); error |= Cl.SetKernelArg <float>(_Kernel, 8, P11_output); error |= Cl.SetKernelArg <float>(_Kernel, 9, P12_output); error |= Cl.SetKernelArg <float>(_Kernel, 10, P21_output); error |= Cl.SetKernelArg <float>(_Kernel, 11, P22_output); error |= Cl.SetKernelArg(_Kernel, 12, Flow.Width); error |= Cl.SetKernelArg(_Kernel, 13, Flow.Height); Event _event; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(outputFlow1.Height * outputFlow1.Width) }; error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P11_input, Bool.True, P1.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P12_input, Bool.True, P1.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P21_input, Bool.True, P2.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P22_input, Bool.True, P2.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P11_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P12_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P21_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P22_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(uInputFlowMemObject); Cl.ReleaseMemObject(vInputFlowMemObject); Cl.ReleaseMemObject(P11_input); Cl.ReleaseMemObject(P12_input); Cl.ReleaseMemObject(P21_input); Cl.ReleaseMemObject(P22_input); Cl.ReleaseMemObject(P11_output); Cl.ReleaseMemObject(P12_output); Cl.ReleaseMemObject(P21_output); Cl.ReleaseMemObject(P22_output); Cl.ReleaseKernel(_Kernel); FlowArray[] OutputFlows = new FlowArray[2]; OutputFlows[0] = outputFlow1; OutputFlows[1] = outputFlow2; return(OutputFlows); }
public FlowArray calc_divP_Flow(float[] Idx, float[] Idy, float[] grad_2, float[] rho_c, FlowArray inFlow, FlowArray P1, FlowArray P2) { FlowArray outFlow = new FlowArray(); outFlow.Array = new float[2][]; outFlow.Width = inFlow.Width; outFlow.Height = inFlow.Height; outFlow.Array[0] = new float[outFlow.Width * outFlow.Height]; outFlow.Array[1] = new float[outFlow.Width * outFlow.Height]; IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> rho_cBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> IdxBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> IdyBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> InFlow_U = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> InFlow_V = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP11 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP12 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP21 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP22 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> OutFlow_U = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error); IMem <float> OutFlow_V = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "divP_Flow", out error); error |= Cl.SetKernelArg(_Kernel, 0, rho_cBuf); error |= Cl.SetKernelArg(_Kernel, 1, IdxBuf); error |= Cl.SetKernelArg <float>(_Kernel, 2, IdyBuf); error |= Cl.SetKernelArg <float>(_Kernel, 3, InFlow_U); error |= Cl.SetKernelArg <float>(_Kernel, 4, InFlow_V); error |= Cl.SetKernelArg <float>(_Kernel, 5, OutFlow_U); error |= Cl.SetKernelArg <float>(_Kernel, 6, OutFlow_V); error |= Cl.SetKernelArg <float>(_Kernel, 7, this.theta); error |= Cl.SetKernelArg <float>(_Kernel, 8, this.lambda); error |= Cl.SetKernelArg <float>(_Kernel, 9, grad_2Buf); error |= Cl.SetKernelArg <float>(_Kernel, 10, divP11); error |= Cl.SetKernelArg <float>(_Kernel, 11, divP12); error |= Cl.SetKernelArg <float>(_Kernel, 12, divP21); error |= Cl.SetKernelArg <float>(_Kernel, 13, divP22); error |= Cl.SetKernelArg <float>(_Kernel, 14, threshold); error |= Cl.SetKernelArg(_Kernel, 15, inFlow.Width); error |= Cl.SetKernelArg(_Kernel, 16, inFlow.Height); Event _event; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(inFlow.Height * inFlow.Width) }; error = Cl.EnqueueWriteBuffer <float>(commandQueue, rho_cBuf, Bool.True, rho_c, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdxBuf, Bool.True, Idx, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdyBuf, Bool.True, Idy, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, grad_2Buf, Bool.True, grad_2, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_U, Bool.True, inFlow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_V, Bool.True, inFlow.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP11, Bool.True, P1.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP12, Bool.True, P1.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP21, Bool.True, P2.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP22, Bool.True, P2.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_U, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_V, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(grad_2Buf); Cl.ReleaseMemObject(rho_cBuf); Cl.ReleaseMemObject(IdxBuf); Cl.ReleaseMemObject(IdyBuf); Cl.ReleaseMemObject(InFlow_U); Cl.ReleaseMemObject(InFlow_V); Cl.ReleaseMemObject(divP11); Cl.ReleaseMemObject(divP12); Cl.ReleaseMemObject(divP21); Cl.ReleaseMemObject(divP22); Cl.ReleaseMemObject(OutFlow_U); Cl.ReleaseMemObject(OutFlow_V); Cl.ReleaseKernel(_Kernel); return(outFlow); }
public void ScryptTest() { ErrorCode error; //Load and compile kernel source code. string programPath = System.Environment.CurrentDirectory + "/../../scrypt.cl"; //Cl if (!System.IO.File.Exists(programPath)) { Console.WriteLine("Program doesn't exist at path " + programPath); return; } string programSource = System.IO.File.ReadAllText(programPath); IntPtr[] sz = new IntPtr[programSource.Length * 2]; Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error); if (1 == 1) { CheckErr(error, "Cl.CreateProgramWithSource"); // status = clBuildProgram(clState->program, 1, &devices[gpu], ""-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", NULL, NULL); //Compile kernel source error = Cl.BuildProgram(program, 1, new[] { _device }, "-D LOOKUP_GAP=1 -D CONCURRENT_THREADS=1 -D WORKSIZE=1", null, IntPtr.Zero); CheckErr(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success && 1 == 0) { CheckErr(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); return; } //Create the required kernel (entry function) [search] Kernel kernel = Cl.CreateKernel(program, "search", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); //Image's RGBA data converted to an unmanaged[] array byte[] inputByteArray; //OpenCL memory buffer that will keep our image's byte[] data. Mem inputImage2DBuffer; //Create a command queue, where all of the commands for execution will be added CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); CheckErr(error, "Cl.CreateCommandQueue"); clState _clState = new clState(); _clState.cl_command_queue = cmdQueue; _clState.cl_kernel = kernel; _clState.cl_context = _context; IntPtr buffersize = new IntPtr(1024); IntPtr blank_res = new IntPtr(1024); Object thrdataRes = new Object(); //int buffersize = 1024; OpenCL.Net.Event clevent; // status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, blank_res, 0, NULL, NULL); dev_blk_ctx blk = new dev_blk_ctx(); ErrorCode err = queue_scrypt_kernel(_clState, blk); ErrorCode status = Cl.EnqueueWriteBuffer(_clState.cl_command_queue, _clState.outputBuffer, OpenCL.Net.Bool.True, new IntPtr(0), buffersize, blank_res, 0, null, out clevent); IntPtr[] globalThreads = new IntPtr[0]; IntPtr[] localThreads = new IntPtr[0]; //uint16 workdim = new uint16(1); uint workdim = 1; status = Cl.EnqueueNDRangeKernel(_clState.cl_command_queue, _clState.cl_kernel, workdim, null, globalThreads, localThreads, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); IntPtr offset = new IntPtr(0); status = Cl.EnqueueReadBuffer(_clState.cl_command_queue, _clState.outputBuffer, OpenCL.Net.Bool.False, offset, buffersize, thrdataRes, 0, null, out clevent); //Wait for completion of all calculations on the GPU. error = Cl.Finish(_clState.cl_command_queue); CheckErr(error, "Cl.Finish"); //Clean up memory Cl.ReleaseKernel(_clState.cl_kernel); Cl.ReleaseCommandQueue(_clState.cl_command_queue); } }
public static Bitmap Bake(Camera camera) { bakeSW.Start(); Vector2I res = Graphics.GetRenderResolution(); int totPixels = res.x * res.y; C_CAMERA cam = new C_CAMERA(camera); angle += rotationSpeed * Time.DeltaTime; rot = Quaternion.CreateFromAxisAngle((Vector3)axis, (float)(angle * 0.0174533D)); float time = (float)Time.TimeSinceStart; /*Bitmap image = new Bitmap( * "assets/textures/2d/nord-vpn.png");*/ Voxel sword = Voxel.GenerateDebug(new int3(32, 32, 32));///Voxel.CreateFromImage(image); C_VOXEL model = new C_VOXEL(sword); int modelSizeSize; int modelColorsSize; unsafe { modelSizeSize = sizeof(int3); modelColorsSize = sizeof(Colour32) * model.colors.Length; } ErrorCode error = ErrorCode.Success; Mem memModelSize = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, modelSizeSize, out error); Mem memModelColors = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, modelColorsSize, out error); Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, memModelSize); Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, memModelColors); Event event0; try { error = Cl.EnqueueWriteBuffer(Queue, (IMem)memInput, Bool.True, IntPtr.Zero, new IntPtr(inputSize), cam, 0, null, out event0); error = Cl.EnqueueWriteBuffer(Queue, (IMem)memModelSize, Bool.True, IntPtr.Zero, new IntPtr(modelSizeSize), model.size, 0, null, out event0); error = Cl.EnqueueWriteBuffer(Queue, (IMem)memModelColors, Bool.True, IntPtr.Zero, new IntPtr(modelColorsSize), model.colors, 0, null, out event0); error = Cl.EnqueueWriteBuffer(Queue, (IMem)memTime, Bool.True, IntPtr.Zero, new IntPtr(sizeof(float)), time, 0, null, out event0); //error = Cl.EnqueueWriteBuffer(Queue, (IMem)memNVolume, Bool.True, IntPtr.Zero, new IntPtr(nVolumeSize), vols.Length, 0, null, out event0); } catch (Exception e) { Log.Print("Error when enqueuing buffer:\n\t-OpenCL Error:" + error.ToString() + "\n\t-DotNet Error: " + e); } byte[] bp = new byte[totPixels * 4]; sw.Start(); error = Cl.EnqueueNDRangeKernel(Queue, kernel, 1, null, workGroupSizePtr, null, 0, null, out event0); if (error != ErrorCode.Success) { Log.Print("Error when enqueuing the NDRange of kernel: " + error.ToString()); } Cl.Finish(Queue); ErrorCode execError = Cl.EnqueueReadBuffer(Queue, (IMem)memOutput, Bool.True, IntPtr.Zero, memory, bp, 0, null, out event0); if (execError != ErrorCode.Success) { Log.Print("Error while rendering: " + execError.ToString()); //return new Bitmap(1, 1); } sw.Stop(); RenderTime = sw.Elapsed.TotalMilliseconds; sw.Reset(); //Stopwatch swbm = new Stopwatch(); //swbm.Start(); Bitmap bm = Imaging.RawToImage(bp, res.x, res.y, System.Drawing.Imaging.PixelFormat.Format32bppArgb); //swbm.Stop(); //Log.Print("Byte* to Bitmap => " + Math.Round(swbm.Elapsed.TotalMilliseconds,2) + "ms"); //bakeSW.Stop(); //Log.Print("Camera image bake took " + Math.Round(bakeSW.Elapsed.TotalMilliseconds, 2) + "ms"); //bakeSW.Reset(); return(bm); }
public void MatMul() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "MatMul", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host matrices float[] A = new float[WA * HA]; float[] B = new float[WB * HB]; float[] C = new float[WC * HC]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = (float)rand.Next() / short.MaxValue; } for (int i = 0; i < B.Length; i++) { B[i] = (float)rand.Next() / short.MaxValue; } // allocate device vectors Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * A.Length), A, out error); clSafeCall(error); Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * B.Length), B, out error); clSafeCall(error); Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(float) * C.Length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA)); clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB)); clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC)); clSafeCall(Cl.SetKernelArg(kernel, 3, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 4, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 5, WA)); clSafeCall(Cl.SetKernelArg(kernel, 6, WB)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)WC, (IntPtr)HC }, new[] { (IntPtr)BLOCK_SIZE, (IntPtr)BLOCK_SIZE }, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * C.Length), C, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); for (int i = 0; i < HA; ++i) { for (int j = 0; j < WB; ++j) { float sum = 0; for (int k = 0; k < WA; ++k) { sum += A[i * WA + k] * B[k * WB + j]; } float err = Math.Abs((sum - C[i * WB + j]) / sum); Assert.That(err, Is.LessThanOrEqualTo(1E-3F)); } } }
public void VecAdd() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "VecAdd", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); int length = 1 << 10; // allocate host vectors float[] A = new float[length]; float[] B = new float[length]; float[] C = new float[length]; // initialize host memory Random rand = new Random(); for (int i = 0; i < length; i++) { A[i] = (float)rand.Next() / short.MaxValue; B[i] = (float)rand.Next() / short.MaxValue; } // allocate device vectors Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * length), A, out error); clSafeCall(error); Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * length), B, out error); clSafeCall(error); Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(float) * length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA)); clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB)); clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC)); clSafeCall(Cl.SetKernelArg(kernel, 3, length)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)length }, new[] { (IntPtr)256 }, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * length), C, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); for (int i = 0; i < length; i++) { float sum = A[i] + B[i]; float err = Math.Abs((sum - C[i]) / sum); Assert.That(err, Is.LessThanOrEqualTo(1E-3F)); } }
/// <summary> /// Reads data from GPU Buffer /// </summary> /// <param name="data">where to store the read data</param> public void ReadData(T[] data) { Event e; Cl.EnqueueReadBuffer(_handle.Queue, _buffer, Bool.True, 0, data.Length, data, 0, null, out e); }
public HTTPResponse GetResponse(HTTPRequest request) { HTTPResponse response = new HTTPResponse(200); StringBuilder sb = new StringBuilder(); ErrorCode error; if (!_isInit) { init(); _isInit = true; } if (request.Method == HTTPRequest.METHOD_GET) { sb.Append("<html><body>"); sb.Append(GenUploadForm()); sb.Append("</body></html>"); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); return(response); } else if (request.Method == HTTPRequest.METHOD_POST) { sb.Append(request.Body); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); string programPath = System.Environment.CurrentDirectory + "/Kernel.cl"; if (!System.IO.File.Exists(programPath)) { Console.WriteLine("Program doesn't exist at path " + programPath); return(new HTTPResponse(404)); } sb.Append("<html><body>"); string programSource = System.IO.File.ReadAllText(programPath); using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) { LogError(error, "Cl.CreateProgramWithSource"); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); LogError(error, "Cl.BuildProgram"); if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <OpenCL.Net.BuildStatus>() != BuildStatus.Success) { LogError(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); return(new HTTPResponse(404)); } Kernel kernel = Cl.CreateKernel(program, "answer", out error); LogError(error, "Cl.CreateKernel"); Random rand = new Random(); int[] input = (from i in Enumerable.Range(0, 100) select(int) rand.Next()).ToArray(); int[] output = new int[100]; var buffIn = _context.CreateBuffer(input, MemFlags.ReadOnly); var buffOut = _context.CreateBuffer(output, MemFlags.WriteOnly); int IntPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernel, 0, (IntPtr)IntPtrSize, buffIn); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)IntPtrSize, buffOut); LogError(error, "Cl.SetKernelArg"); CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); LogError(error, "Cl.CreateCommandQueue"); Event clevent; error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)100, (IntPtr)1 }, null, 0, null, out clevent); LogError(error, "Cl.EnqueueNDRangeKernel"); error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finih"); error = Cl.EnqueueReadBuffer(cmdQueue, buffOut, OpenCL.Net.Bool.True, 0, 100, output, 0, null, out clevent); LogError(error, "Cl.EnqueueReadBuffer"); error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finih"); Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseMemObject(buffIn); Cl.ReleaseMemObject(buffOut); sb.Append("<pre>"); for (int i = 0; i != 100; i++) { sb.Append(input[i] + " % 42 = " + output[i] + "<br />"); } sb.Append("</pre>"); } sb.Append("</body></html>"); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); return(response); } return(new HTTPResponse(501)); }
static void Main(string[] args) { Console.WriteLine("Hello World!"); uint platformCount; ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount); Console.WriteLine("{0} platforms found", platformCount); var platformIds = new Platform[platformCount]; result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount); var platformCounter = 0; foreach (var platformId in platformIds) { IntPtr paramSize; result = Cl.GetPlatformInfo(platformId, PlatformInfo.Name, IntPtr.Zero, InfoBuffer.Empty, out paramSize); using (var buffer = new InfoBuffer(paramSize)) { result = Cl.GetPlatformInfo(platformIds[0], PlatformInfo.Name, paramSize, buffer, out paramSize); Console.WriteLine($"Platform {platformCounter}: {buffer}"); } platformCounter++; } Console.WriteLine($"Using first platform..."); uint deviceCount; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, 0, null, out deviceCount); Console.WriteLine("{0} devices found", deviceCount); var deviceIds = new Device[deviceCount]; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, deviceCount, deviceIds, out var numberDevices); var selectedDevice = deviceIds[0]; var context = Cl.CreateContext(null, 1, new[] { selectedDevice }, null, IntPtr.Zero, out var error); const string kernelSrc = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid] - 1500; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid] - 2000; } __kernel void double_everything(__global float *a) { int xid = get_global_id(0); a[xid] = a[xid] * 2; } "; var src = kernelSrc; Console.WriteLine("=== src ==="); Console.WriteLine(src); Console.WriteLine("============"); var program = Cl.CreateProgramWithSource(context, 1, new[] { src }, null, out var error2); error2 = Cl.BuildProgram(program, 1, new[] { selectedDevice }, string.Empty, null, IntPtr.Zero); if (error2 == ErrorCode.BuildProgramFailure) { Console.Error.WriteLine(Cl.GetProgramBuildInfo(program, selectedDevice, ProgramBuildInfo.Log, out error)); } Console.WriteLine(error2); // Get the kernels. var kernels = Cl.CreateKernelsInProgram(program, out error); Console.WriteLine($"Program contains {kernels.Length} kernels."); var kernelAdd = kernels[0]; var kernelDouble = kernels[2]; // float[] A = new float[1000]; float[] B = new float[1000]; float[] C = new float[1000]; for (var i = 0; i < 1000; i++) { A[i] = i; B[i] = i; } IMem <float> hDeviceMemA = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error); IMem <float> hDeviceMemB = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, B, out error); IMem <float> hDeviceMemC = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, C, out error); // Create a command queue. var cmdQueue = Cl.CreateCommandQueue(context, selectedDevice, CommandQueueProperties.None, out error); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernelDouble, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 1, new IntPtr(intPtrSize), hDeviceMemB); error = Cl.SetKernelArg(kernelAdd, 2, new IntPtr(intPtrSize), hDeviceMemC); // write data from host to device Event clevent; error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), A, 0, null, out clevent); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), B, 1, new [] { clevent }, out clevent); // execute kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelDouble, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); var infoBuffer = Cl.GetEventInfo(clevent, EventInfo.CommandExecutionStatus, out var e2); error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelAdd, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); Console.WriteLine($"Run result: {error}"); error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.False, 0, C.Length, C, 1, new [] { clevent }, out clevent); Cl.WaitForEvents(1, new [] { clevent }); for (var i = 0; i < 1000; i++) { Console.WriteLine($"[{i}]: {C[i]}"); } program.Dispose(); foreach (var res in typeof(SourceLoader).Assembly.GetManifestResourceNames()) { Console.WriteLine(res); } }
// Partially from OpenTK demo - Submitted by "mfagerlund" public void AddArrayAddsCorrectly() { const string correctSource = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid]; } "; ErrorCode error; using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Kernel kernel = kernels[0]; const int cnBlockSize = 4; const int cnBlocks = 3; IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize); // allocate host vectors float[] A = new float[cnDimension.ToInt32()]; float[] B = new float[cnDimension.ToInt32()]; float[] C = new float[cnDimension.ToInt32()]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = rand.Next() % 256; B[i] = rand.Next() % 256; } //Cl.IMem hDeviceMemA = Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), A, out error); //Assert.AreEqual(Cl.ErrorCode.Success, error); IMem <float> hDeviceMemA = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemB = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), B, out error); Assert.AreEqual(ErrorCode.Success, error); IMem hDeviceMemC = Cl.CreateBuffer(_context, MemFlags.WriteOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), IntPtr.Zero, out error); Assert.AreEqual(ErrorCode.Success, error); CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); Event clevent; int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); // setup parameter values error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemA); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemB); Assert.AreEqual(ErrorCode.Success, error); error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemC); Assert.AreEqual(ErrorCode.Success, error); // write data from host to device error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), A, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), B, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error); // execute kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { cnDimension }, null, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error, error.ToString()); // copy results from device back to host IntPtr event_handle = IntPtr.Zero; error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.True, 0, C.Length, C, 0, null, out clevent); Assert.AreEqual(ErrorCode.Success, error, error.ToString()); for (int i = 0; i < A.Length; i++) { Assert.That(A[i] + B[i], Is.EqualTo(C[i])); } Cl.Finish(cmdQueue); Cl.ReleaseMemObject(hDeviceMemA); Cl.ReleaseMemObject(hDeviceMemB); Cl.ReleaseMemObject(hDeviceMemC); } }
public void PoissonJacobi() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonJacobi", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // initialize host memory uint dimX = 162; uint dimY = 122; uint N = 15000; float x0 = (float)(-0.25 * Math.PI); float y0 = (float)(-0.25 * Math.PI); float hx = 2.0f * Math.Abs(x0) / dimX; float hy = 2.0f * Math.Abs(y0) / dimY; float[] hData = new float[dimX * dimY]; uint stride = dimX; //boundary values for (uint i = 1; i < dimY - 1; i++) { uint y_idx = i * stride; float y_val = y0 + i * hy; hData[y_idx] = u(x0, y_val); hData[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val); } for (uint j = 1; j < dimX - 1; j++) { float x_val = x0 + j * hx; hData[j] = u(x_val, y0); hData[j + (dimY - 1) * stride] = u(x_val, y0 + (dimY - 1) * hy); } // allocate device vectors Cl.Mem input = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(float) * hData.Length), hData, out error); clSafeCall(error); Cl.Mem output = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(float) * hData.Length), hData, out error); clSafeCall(error); float a1 = 2 * hy / hx; float a2 = 2 * hx / hy; float a3 = a1; float a4 = a2; float a = a1 + a2 + a3 + a4; // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 2, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 3, dimX)); clSafeCall(Cl.SetKernelArg(kernel, 4, dimY)); clSafeCall(Cl.SetKernelArg(kernel, 5, stride)); clSafeCall(Cl.SetKernelArg(kernel, 6, a1)); clSafeCall(Cl.SetKernelArg(kernel, 7, a2)); clSafeCall(Cl.SetKernelArg(kernel, 8, a3)); clSafeCall(Cl.SetKernelArg(kernel, 9, a4)); clSafeCall(Cl.SetKernelArg(kernel, 10, a)); clSafeCall(Cl.SetKernelArg(kernel, 11, hx)); clSafeCall(Cl.SetKernelArg(kernel, 12, hy)); clSafeCall(Cl.SetKernelArg(kernel, 13, x0)); clSafeCall(Cl.SetKernelArg(kernel, 14, y0)); IntPtr[] lo = { (IntPtr)16, (IntPtr)16 }; IntPtr[] gl = { (IntPtr)((dimX - 2 + AREA_SIZE_X - 1) / AREA_SIZE_X * 16), (IntPtr)((dimY - 2 + AREA_SIZE_Y - 1) / AREA_SIZE_Y * 16) }; Cl.Mem curIn = input; Cl.Mem curOut = output; // execute kernel (and perform data transfering silently) clSafeCall(Cl.SetKernelArg(kernel, 0, curIn)); clSafeCall(Cl.SetKernelArg(kernel, 1, curOut)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (uint idx = 1; idx < N; idx++) { // swap buffers Cl.Mem temp = curIn; curIn = curOut; curOut = temp; // execute kernel clSafeCall(Cl.SetKernelArg(kernel, 0, curIn)); clSafeCall(Cl.SetKernelArg(kernel, 1, curOut)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); } clSafeCall(Cl.Finish(cmdQueue)); stopwatch.Stop(); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, curOut, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * hData.Length), hData, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); float avgerr = 0, maxerr = 0; for (uint i = 1; i < dimY - 1; i++) { for (uint j = 1; j < dimX - 1; j++) { float theory = u(x0 + j * hx, y0 + i * hy); float err = Math.Abs(theory - hData[j + i * stride]) / Math.Abs(theory); avgerr += err; maxerr = Math.Max(maxerr, err); } } avgerr /= dimX * dimY; long elapsedTime = stopwatch.ElapsedMilliseconds; double dataSizePerIteration = dimX * dimY * 2 * sizeof(float); double dataSizeTotal = dataSizePerIteration * N; double elapsedSeconds = elapsedTime * 0.001; double gigabyteFactor = 1 << 30; double bandwidth = dataSizeTotal / (gigabyteFactor * elapsedSeconds); Console.WriteLine("avgerr = {0} maxerr = {1} elapsedTime = {2} ms bandwidth = {3} GB/s", avgerr, maxerr, elapsedTime, bandwidth); Assert.That(maxerr, Is.LessThanOrEqualTo(5E-2F)); Assert.That(avgerr, Is.LessThanOrEqualTo(1E-2F)); }