/// <summary> /// Gets the values for an explicit input /// </summary> /// <param name="input">The explicit input</param> /// <param name="output">The output values</param> public void GetValues(Single3[] input, ref float[] output) { if (context == null || kernelExplicit == null) { throw new Exception("Compile first!"); } int inputLength = input.Length; // IO length changed if (lastLength != inputLength) { lastLength = inputLength; outputBuffer = new Cloo.ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, inputLength); kernelExplicit.SetMemoryArgument(2, outputBuffer); } // Setup IO Buffers ComputeBuffer <Single3> bufIn = new Cloo.ComputeBuffer <Single3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input); // Arrange params kernelExplicit.SetMemoryArgument(0, bufIn); // Exec and read queue.Execute(kernelExplicit, null, new long[] { input.Length }, null, null); GCHandle outHandle = GCHandle.Alloc(output, GCHandleType.Pinned); queue.Read <float>(outputBuffer, true, 0, inputLength, outHandle.AddrOfPinnedObject(), null); // Read saves about 500 - 1000 ticks. Sweet for small queues outHandle.Free(); queue.Finish(); }
/// <summary> /// Copy a buffer to a float array. /// </summary> /// /// <param name="sourceBuffer">The source buffer.</param> /// <param name="target">The target array.</param> public void Buffer2Array(ComputeBuffer <float> sourceBuffer, float[] target) { GCHandle arrCHandle = GCHandle.Alloc(target, GCHandleType.Pinned); commands.Read(sourceBuffer, true, 0, target.Length, arrCHandle.AddrOfPinnedObject(), null); arrCHandle.Free(); }
// Wartosci min, max po OpenCL. public void GetMinMaxValuesCL(Mat frame, out int[] maxValues, out int[] minValues, int windowValue) { maxValues = null; minValues = null; try { MinMaxCL.UpdateArguments(frame, clooCtx, ctxMinMaxKernel, windowValue); // execute kernel queue.Execute(ctxMinMaxKernel, null, new long[] { frame.Cols }, null, null); // max Values. maxValues = new int[frame.Cols]; GCHandle maxHandle = GCHandle.Alloc(maxValues, GCHandleType.Pinned); queue.Read(MinMaxCL.maxBufferCB, true, 0, maxValues.Length, maxHandle.AddrOfPinnedObject(), null); // min Values. minValues = new int[frame.Cols]; GCHandle minHandle = GCHandle.Alloc(minValues, GCHandleType.Pinned); queue.Read(MinMaxCL.minBufferCB, true, 0, minValues.Length, minHandle.AddrOfPinnedObject(), null); // end opencl compute. queue.Finish(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
static void Main(string[] args) { int[] r1 = new int[] { 8, 2, 3, 4 }; int[] r2 = new int[] { 4, 3, 2, 5 }; int[] r3 = new int[4]; int rowSize = r1.Length; // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source and // create program with opencl source ComputeProgram program = new ComputeProgram(context, CalculateKernel); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("Calc"); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> row1Buffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> row2Buffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> resultBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]); kernel.SetMemoryArgument(0, row1Buffer); // set the integer array kernel.SetMemoryArgument(1, row2Buffer); // set the integer array kernel.SetValueArgument(2, rowSize); // set the array size kernel.SetMemoryArgument(3, resultBuffer); // set the integer array // execute kernel queue.ExecuteTask(kernel, null); // wait for completion queue.Finish(); GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned); queue.Read <int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null); Console.WriteLine("display result from gpu buffer:"); for (int i = 0; i < r3.Length; i++) { Console.WriteLine(r3[i]); } arrCHandle.Free(); row1Buffer.Dispose(); row2Buffer.Dispose(); kernel.Dispose(); program.Dispose(); queue.Dispose(); context.Dispose(); Console.WriteLine("Finished"); Console.ReadKey(); }
public static Tuple <List <List <int> >, TimeSpan> MultiplyParallel(List <List <int> > matrixOne, List <List <int> > matrixTwo) { if (!isRegularMatrix(matrixOne) || !isRegularMatrix(matrixTwo)) { throw new ArgumentException("Non regular matrix detected. Rows size mismatch detected."); } if (matrixOne[0].Count != matrixTwo.Count) { throw new ArgumentException("Matrixes is not compatible. Columns count of first matrix is not equal to rows count of second matrix."); } List <List <int> > result = new List <List <int> >(); ComputePlatform platform = GetGPU(); if (platform is null) { throw new PlatformNotSupportedException("Platform doesn't have a dedicated GPU. Run is impossible."); } ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeProgram program = new ComputeProgram(context, CalculateKernel); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("Multiply"); List <ComputeBuffer <int> > rowsMatrixOne = matrixOne.TransformMatrixToComputerBuffersOfRows(context); List <ComputeBuffer <int> > columnsMatrixTwo = matrixTwo.TransformMatrixToComputerBuffersOfColumns(context); List <ComputeBuffer <int> > resultRowsMatrix = TwoDToOneDResult(matrixOne.Count, matrixTwo[0].Count, context); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < resultRowsMatrix.Count; ++i) { for (int j = 0; j < resultRowsMatrix[i].Count; ++j) { kernel.SetMemoryArgument(0, rowsMatrixOne[i]); kernel.SetMemoryArgument(1, columnsMatrixTwo[j]); kernel.SetMemoryArgument(2, resultRowsMatrix[i]); kernel.SetValueArgument(3, matrixTwo.Count); kernel.SetValueArgument(4, j); queue.ExecuteTask(kernel, null); } } queue.Finish(); stopwatch.Stop(); for (int i = 0; i < resultRowsMatrix.Count; ++i) { int[] res = new int[resultRowsMatrix[i].Count]; GCHandle gCHandle = GCHandle.Alloc(res, GCHandleType.Pinned); queue.Read <int>(resultRowsMatrix[i], true, 0, res.Length, gCHandle.AddrOfPinnedObject(), null); result.Add(new List <int>(res)); } return(new Tuple <List <List <int> >, TimeSpan>(result, stopwatch.Elapsed)); }
private TFP[] RunKernalTest(float num, ComputeBuffer <TFP> result, ComputeKernel kernel) { kernel.SetMemoryArgument(0, result); kernel.SetValueArgument(1, num); // BUG: ATI Stream v2.2 crash if event list not null. commands.Execute(kernel, null, new long[] { 1, 1 }, null, events); //commands.Execute(kernel, null, new long[] { count }, null, null); TFP[] myresult = new TFP[1]; GCHandle arrCHandle = GCHandle.Alloc(myresult, GCHandleType.Pinned); commands.Read(result, true, 0, 1, arrCHandle.AddrOfPinnedObject(), events); arrCHandle.Free(); return(myresult); }
public float[] MultiplyMatrices(float[] matrix1, float[] matrix2, int matrix1Height, int matrix1WidthMatrix2Height, int matrix2Width) { if (!_initialized) { Initialize(); _initialized = true; } ComputeBuffer <float> matrix1Buffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, matrix1); _kernel.SetMemoryArgument(0, matrix1Buffer); ComputeBuffer <float> matrix2Buffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, matrix2); _kernel.SetMemoryArgument(1, matrix2Buffer); float[] ret = new float[matrix1Height * matrix2Width]; ComputeBuffer <float> retBuffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, ret); _kernel.SetMemoryArgument(2, retBuffer); _kernel.SetValueArgument <int>(3, matrix1WidthMatrix2Height); _kernel.SetValueArgument <int>(4, matrix2Width); _commandQueue.Execute(_kernel, new long[] { 0 }, new long[] { matrix2Width, matrix1Height }, null, null); unsafe { fixed(float *retPtr = ret) { _commandQueue.Read(retBuffer, false, 0, ret.Length, new IntPtr(retPtr), null); _commandQueue.Finish(); } } matrix1Buffer.Dispose(); matrix2Buffer.Dispose(); retBuffer.Dispose(); return(ret); }
private BitmapSource ReadImageDataFromGPUMemory() { unsafe { fixed(float *imgPtr = NextImageVector) { CQ.Read(NextImage, true, new SysIntX3(0, 0, 0), new SysIntX3(Width, Height, 1), Width * 4 * sizeof(float), 0, (IntPtr)imgPtr, null); } } return(BitmapSource.Create(Width, Height, 96, 96, PixelFormats.Rgba128Float, null, NextImageVector, (PixelFormats.Rgba128Float.BitsPerPixel / 8) * Width)); }
public void DoubleSumTest() { var builder = new OpenCL200Factory(); string text = File.ReadAllText("Examples/SumTest.cl"); int count = 2000; var a = new double[count]; var b = new double[count]; var ab = new double[count]; for (int i = 0; i < count; i++) { a[i] = i / 10.0; b[i] = -i / 9.0; } var Properties = new List <ComputeContextProperty> { new ComputeContextProperty(ComputeContextPropertyName.Platform, Device.Platform.Handle.Value) }; using (var Context = builder.CreateContext(ComputeDeviceTypes.All, Properties, null, IntPtr.Zero)) { using (var Program = builder.BuildComputeProgram(Context, text)) { var Devs = new List <IComputeDevice>() { Device }; Program.Build(Devs, "", null, IntPtr.Zero); var kernel = builder.CreateKernel(Program, "doubleVectorSum"); using (ComputeBuffer <double> varA = new ComputeBuffer <double>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, a), varB = new ComputeBuffer <double>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, b)) { kernel.SetMemoryArgument(0, varA); kernel.SetMemoryArgument(1, varB); using (var Queue = new ComputeCommandQueue(Context, Device, ComputeCommandQueueFlags.None)) { Queue.Execute(kernel, null, new long[] { count }, null, null); ab = Queue.Read(varA, true, 0, count, null); } } } } for (int i = 0; i < count; i++) { Assert.AreEqual(-i / 90.0, ab[i], 1E-13); } }
public void OpenClMul() { //ѡȡ�豸 var platform = ComputePlatform.Platforms.FirstOrDefault(); var device = platform.Devices.FirstOrDefault(); var properties = new ComputeContextPropertyList(platform); var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); var code = File.ReadAllText(@"demos\cls\matrix_mul.cl"); var program = new ComputeProgram(context, code); try { program.Build(new [] { device }, null, null, IntPtr.Zero); } catch (Exception ex) { throw; } var kernel = program.CreateKernel("MatrixMul"); int rank = Rank; var result = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, rank * rank); var matrix = CreateMatrix(context, rank); kernel.SetMemoryArgument(0, result); kernel.SetMemoryArgument(1, matrix); kernel.SetValueArgument(2, rank); Console.WriteLine($"Platform: {platform.Name}\n Device: {device.Name}\n Size: {rank}x{rank}"); Stopwatch sw = Stopwatch.StartNew(); commands.Execute(kernel, null, new long[] { rank, rank }, null, null); int[] resultArray = new int[rank * rank]; var arrHandle = GCHandle.Alloc(resultArray, GCHandleType.Pinned); commands.Read(result, true, 0, rank * rank, arrHandle.AddrOfPinnedObject(), null); var elapsed = sw.Elapsed; Console.WriteLine($"using: {elapsed.TotalMilliseconds} ms\n"); arrHandle.Free(); kernel.Dispose(); }
private TFP[] RunKernal(int unit, int w, int h, int cx, int cy, float originx, float originy, int bufferSize, ComputeBuffer <TFP> points, ComputeKernel kernel) { kernel.SetMemoryArgument(0, points); kernel.SetValueArgument(1, unit); kernel.SetValueArgument(2, w); kernel.SetValueArgument(3, cx); kernel.SetValueArgument(4, cy); kernel.SetValueArgument(5, originx); kernel.SetValueArgument(6, originy); // BUG: ATI Stream v2.2 crash if event list not null. commands.Execute(kernel, null, new long[] { w, h }, null, events); //commands.Execute(kernel, null, new long[] { count }, null, null); TFP[] pointsArray = new TFP[bufferSize]; GCHandle arrCHandle = GCHandle.Alloc(pointsArray, GCHandleType.Pinned); commands.Read(points, true, 0, bufferSize, arrCHandle.AddrOfPinnedObject(), events); arrCHandle.Free(); return(pointsArray); }
public static void Run(TextWriter log, ComputeContext context) { StartTest(log, "Vector addition test"); try { int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); ComputeProgram program = new ComputeProgram(context, kernelSource); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ICollection<ComputeEventBase> events = new Collection<ComputeEventBase>(); // BUG: ATI Stream v2.2 crash if event list not null. commands.Execute(kernel, null, new long[] { count }, null, events); //commands.Execute(kernel, null, new long[] { count }, null, null); arrC = new float[count]; GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); commands.Read(c, true, 0, count, arrCHandle.AddrOfPinnedObject(), events); arrCHandle.Free(); for (int i = 0; i < count; i++) log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); } catch (Exception e) { log.WriteLine(e.ToString()); } EndTest(log, "Vector addition test"); }
static float ComputeAverageGPUTime( ushort[] depthPixels, int width, float inverseRotatedFx, float rotatedCx, float inverseRotatedFy, float rotatedCy, Matrix bedTransformationM, Matrix bedTransformationb, Matrix floorTransformationM, Matrix floorTransformationb, int numberOfIterations) { // pick the device platform ComputePlatform intelGPU = ComputePlatform.Platforms.Where(n => n.Name.Contains("Intel")).First(); ComputeContext context = new ComputeContext( ComputeDeviceTypes.Gpu, // use the gpu new ComputeContextPropertyList(intelGPU), // use the intel openCL platform null, IntPtr.Zero); // the command queue is the, well, queue of commands sent to the "device" (GPU) ComputeCommandQueue commandQueue = new ComputeCommandQueue( context, // the compute context context.Devices[0], // first device matching the context specifications ComputeCommandQueueFlags.None); // no special flags string kernelSource = null; using (StreamReader sr = new StreamReader("kernel.cl")) { kernelSource = sr.ReadToEnd(); } // create the "program" ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); // compile. program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("ComputePoints"); Point3D[] outProjectivePoints = new Point3D[depthPixels.Length]; Point3D[] outRealPoints = new Point3D[depthPixels.Length]; Point3D[] outBedPoints = new Point3D[depthPixels.Length]; Point3D[] outFloorPoints = new Point3D[depthPixels.Length]; float[] affines = new float[24]; // do bed affines first because that's what assembly code expects int z = 0; for (int b = 0; b < 3; b++) { for (int c = 0; c < 3; c++) { affines[z++] = bedTransformationM[b, c]; } affines[z++] = bedTransformationb[b, 0]; } // do floor affines next because that's what assembly code expects for (int b = 0; b < 3; b++) { for (int c = 0; c < 3; c++) { affines[z++] = floorTransformationM[b, c]; } affines[z++] = floorTransformationb[b, 0]; } ComputeBuffer <float> affinesBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, affines); kernel.SetMemoryArgument(1, affinesBuffer); ComputeBuffer <Point3D> projectivePointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outProjectivePoints); kernel.SetMemoryArgument(2, projectivePointsBuffer); ComputeBuffer <Point3D> realPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outBedPoints); kernel.SetMemoryArgument(3, realPointsBuffer); ComputeBuffer <Point3D> bedPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outFloorPoints); kernel.SetMemoryArgument(4, projectivePointsBuffer); ComputeBuffer <Point3D> floorPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outRealPoints); kernel.SetMemoryArgument(5, realPointsBuffer); kernel.SetValueArgument <int>(6, width); kernel.SetValueArgument <float>(7, inverseRotatedFx); kernel.SetValueArgument <float>(8, rotatedCx); kernel.SetValueArgument <float>(9, inverseRotatedFy); kernel.SetValueArgument <float>(10, rotatedCy); Stopwatch sw = new Stopwatch(); sw.Start(); for (int c = 0; c < numberOfIterations; c++) { ComputeBuffer <ushort> depthPointsBuffer = new ComputeBuffer <ushort>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, depthPixels); kernel.SetMemoryArgument(0, depthPointsBuffer); commandQueue.Execute(kernel, new long[] { 0 }, new long[] { depthPixels.Length }, null, null); unsafe { fixed(Point3D *projectivePointsPtr = outProjectivePoints) { fixed(Point3D *realPointsPtr = outRealPoints) { fixed(Point3D *bedPointsPtr = outBedPoints) { fixed(Point3D *floorPointsPtr = outFloorPoints) { commandQueue.Read(projectivePointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(projectivePointsPtr), null); commandQueue.Read(realPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(realPointsPtr), null); commandQueue.Read(bedPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(bedPointsPtr), null); commandQueue.Read(floorPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(floorPointsPtr), null); commandQueue.Finish(); } } } } } } sw.Stop(); return(sw.ElapsedMilliseconds / (numberOfIterations * 1.0f)); }
// 26 ms 4096x4096@512 iter with 1024 cores static long Method05(float ymin, float xmin, float width, int[] message) { // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source StreamReader streamReader = new StreamReader("Mandel3.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); // create program with opencl source ComputeProgram program = new ComputeProgram(context, clSource); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("mandel"); int messageSize = message.Length; // allocate a memory buffer with the message ComputeBuffer <int> messageBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message); kernel.SetMemoryArgument(0, messageBuffer); kernel.SetValueArgument(1, N); kernel.SetValueArgument(2, ymin); kernel.SetValueArgument(3, xmin); kernel.SetValueArgument(4, width); kernel.SetValueArgument(5, MaxIter); var watch = System.Diagnostics.Stopwatch.StartNew(); // Execute kernel //queue.ExecuteTask(kernel, null); //queue.Execute(kernel, new long[] { 0, 0, 0, 0 }, new long[] { 8, 8 }, new long[] { 8, 8 }, null); for (var i = 0; i < N / 32; ++i) { for (var j = 0; j < N / 32; ++j) { queue.Execute(kernel, new long[] { i *32, j *32 }, new long[] { 32, 32 }, null, null); } } // Read data back unsafe { fixed(int *retPtr = message) { queue.Read(messageBuffer, false, 0, messageSize, new IntPtr(retPtr), null); queue.Finish(); } } watch.Stop(); return(watch.ElapsedMilliseconds); }
static void GenerateMandelBrot() { var nWidth = width / N; if (useDoublePrecision) { kernel.SetValueArgument(4, ymin); kernel.SetValueArgument(5, xmin); kernel.SetValueArgument(6, nWidth); } else { kernel.SetValueArgument(4, (float)ymin); kernel.SetValueArgument(5, (float)xmin); kernel.SetValueArgument(6, (float)nWidth); } kernel.SetValueArgument(7, maxIter); kernel.SetValueArgument(8, coclorCycle); coclorCycle += colorCycleFrameIncrement; Stopwatch watch = new Stopwatch(); watch.Start(); // Execute kernel queue.Execute(kernel, new long[] { 0, 0 }, new long[] { N, N }, null, null); // Read data back unsafe { fixed(int *retPtr = message) { queue.Read(messageBuffer, false, 0, messageSize, new IntPtr(retPtr), null); queue.Finish(); } } watch.Stop(); debugLabel.Content = $"{watch.ElapsedMilliseconds} ms"; // Write to bitmap try { // Reserve the back buffer for updates. writeableBitmap.Lock(); unsafe { IntPtr buffer = writeableBitmap.BackBuffer; System.Runtime.InteropServices.Marshal.Copy(message, 0, buffer, (int)(writeableBitmap.Height * writeableBitmap.Width)); } // Specify the area of the bitmap that changed. writeableBitmap.AddDirtyRect(new Int32Rect(0, 0, (int)writeableBitmap.Width, (int)writeableBitmap.Height)); } finally { // Release the back buffer and make it available for display. writeableBitmap.Unlock(); } }
public void ReadResult() { clCommands.Read(cbuf_Result, true, 0, width * height * 4, gc_resultBuffer.AddrOfPinnedObject(), null); clCommands.Finish(); }
/// <summary> /// Creates the Julia image using the GPU. /// </summary> /// <param name="width">The width.</param> /// <param name="height">The height.</param> /// <returns>An image containing the Julia set.</returns> private Image CreateGPU(int width, int height) { Bitmap image; // Initialize the position of the set. float realLeft = -1.5f; float realRight = 1.5f; float imaginaryBottom = -1.2f; float imaginaryTop = imaginaryBottom + (realRight - realLeft) * height / width; /* Compute factors for translating from the imaginary plane to the * Cartesian plane. */ float realFactor = (realRight - realLeft) / (width - 1); float imaginaryFactor = (imaginaryTop - imaginaryBottom) / (height - 1); // Set the number of iterations to check for values in the set. uint maxIterations = MAX_ITERATIONS; // Create a buffer to for kernel output. ComputeBuffer <char> kernelOutput = new ComputeBuffer <char>( context, ComputeMemoryFlags.WriteOnly, width * height * 4); // Set arguments for the kernel. kernel.SetValueArgument <float>(0, realFactor); kernel.SetValueArgument <float>(1, imaginaryFactor); kernel.SetValueArgument <float>(2, realLeft); kernel.SetValueArgument <float>(3, imaginaryBottom); kernel.SetValueArgument <float>(4, imaginaryTop); kernel.SetValueArgument <uint>(5, maxIterations); kernel.SetValueArgument <int>(6, width); kernel.SetMemoryArgument(7, kernelOutput); // TODO: Scale work group and work item sizes to fit the resolution. commands.Execute(kernel, null, new long[] { width, height }, null, events); // Create a pinned buffer for kernel output. byte[] kernelResult = new byte[width * height * 4]; GCHandle kernelResultHandle = GCHandle.Alloc(kernelResult, GCHandleType.Pinned); // Copy the kernel result into the pinned buffer. commands.Read(kernelOutput, false, 0, width * height * 4, kernelResultHandle.AddrOfPinnedObject(), events); commands.Finish(); // Free the pinned handle. kernelResultHandle.Free(); unsafe { fixed(byte *pKernelResult = kernelResult) { IntPtr intPtr = new IntPtr((void *)pKernelResult); image = new Bitmap(width, height, width * 4, PixelFormat.Format32bppArgb, intPtr); } } return(image); }
private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr) { uint[] dst = new uint[16]; fixed (uint* dstPtr = dst) { using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None)) { var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16); var kernel = program.CreateKernel("test"); kernel.SetValueArgument(0, 1443351125U); kernel.SetMemoryArgument(1, buf); var eventList = new ComputeEventList(); queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null); queue.Finish(); queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null); queue.Finish(); queue.Finish(); } } }
protected override void RunInternal() { int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeEventList events = new ComputeEventList(); commands.Execute(kernel, null, new long[] { count }, null, events); arrC = new float[count]; GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events); commands.Finish(); arrCHandle.Free(); for (int i = 0; i < count; i++) Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); }
private unsafe void ReadFromDeviceTo(void* p, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events) { if (CreatedFromGLBuffer && (!AcquiredInOpenCL)) throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables"); CQ.Read((ComputeImage)VarPointer, BlockingRead, new SysIntX3(0, 0, 0), new SysIntX3(width, height, 1), 0, 0, new IntPtr(p), events); }
private unsafe void ReadFromDeviceTo(void* p, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events) { CQ.Read((ComputeImage)VarPointer, BlockingRead, new SysIntX3(0, 0, 0), new SysIntX3(width, height, 1), 0, 0, new IntPtr(p), events); }
public void Compute_cl(string imageFile, string dst) { //选取设备 var platform = ComputePlatform.Platforms.FirstOrDefault(); var device = platform.Devices.FirstOrDefault(); //设置相关上下文 var properties = new ComputeContextPropertyList(platform); var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero); //命令队列,用于控制执行的代码 ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); //读取opencl代码 var code = File.ReadAllText(@"gaussianblur.cl"); //编译 var program = new ComputeProgram(context, code); try { program.Build(new[] { device }, null, null, IntPtr.Zero); } catch (Exception ex) { throw; } var images = CreateImageFromBitmap(imageFile, context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer); //创建核心代码,就是cl代码中以kernel标识,函数签名为MatrixMul的函数 var kernel = program.CreateKernel("gaussian_blur"); //矩阵规模 //储存计算结果的数组 //创建的核心代码函数以这种方式来传参 var resultBuffer = new ComputeBuffer <char>(context, ComputeMemoryFlags.WriteOnly, dstBytes.Length); kernel.SetMemoryArgument(0, images); kernel.SetMemoryArgument(1, resultBuffer); kernel.SetMemoryArgument(2, new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, _matrix)); kernel.SetValueArgument(3, Radius); kernel.SetValueArgument(4, (int)images.Width); Console.WriteLine($"\n运行平台: {platform.Name}\n运行设备: {device.Name}"); Stopwatch sw = Stopwatch.StartNew(); var climg = images; Console.WriteLine($"处理图片尺寸:{climg.Width}*{climg.Height}"); //执行代码 commands.Execute(kernel, null, new long[] { climg.Width, climg.Height }, null, null); //read data char[] resultArray = new char[dstBytes.Length]; var arrHandle = GCHandle.Alloc(resultArray, GCHandleType.Pinned); commands.Read(resultBuffer, true, 0, dstBytes.Length, arrHandle.AddrOfPinnedObject(), null); //commands.ReadFromImage(images.Item2, processeddata.Scan0, true, null); var resultHandle = GCHandle.Alloc(resultArray, GCHandleType.Pinned); var bmp = new Bitmap(climg.Width, climg.Height, climg.Width * 4, PixelFormat.Format32bppArgb, resultHandle.AddrOfPinnedObject()); var elapsed = sw.Elapsed; Console.WriteLine($"OpenCL处理耗时: {elapsed.TotalMilliseconds} ms\n"); kernel.Dispose(); bmp.Save(dst); arrHandle.Free(); }
/// <summary>Reads variable from device.</summary> /// <param name="Values">Values to store data coming from device</param> /// <param name="CQ">Command queue to use</param> /// <param name="BlockingRead">TRUE to return only after completed reading.</param> /// <param name="events">OpenCL Event associated with this operation</param> public void ReadFromDeviceTo(byte[] Values, ComputeCommandQueue CQ, bool BlockingRead, ICollection<ComputeEventBase> events) { if (Values.Length != OriginalVarLength) throw new Exception("Values length should be the same as allocated length"); if (CreatedFromGLBuffer && (!AcquiredInOpenCL)) throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables"); unsafe { fixed (void* ponteiro = Values) { CQ.Read<byte>((ComputeBuffer<byte>)VarPointer, BlockingRead, 0, Values.Length, (IntPtr)ponteiro, events); } } }