/// <summary> /// Creates a new <see cref="ComputeBuffer{T}"/>. /// </summary> /// <param name="context"> A <see cref="ComputeContext"/> used to create the <see cref="ComputeBuffer{T}"/>. </param> /// <param name="flags"> A bit-field that is used to specify allocation and usage information about the <see cref="ComputeBuffer{T}"/>. </param> /// <param name="count"> The number of elements of the <see cref="ComputeBuffer{T}"/>. </param> /// <param name="dataPtr"> A pointer to the data for the <see cref="ComputeBuffer{T}"/>. </param> public ComputeBuffer(ComputeContext context, ComputeMemoryFlags flags, long count, IntPtr dataPtr) : base(context, flags) { ComputeErrorCode error = ComputeErrorCode.Success; Handle = CL10.CreateBuffer(context.Handle, flags, new IntPtr(Marshal.SizeOf(typeof(T)) * count), dataPtr, out error); ComputeException.ThrowOnError(error); Init(); }
/// <summary> /// Creates a new <see cref="ComputeBuffer{T}"/>. /// </summary> /// <param name="context"> A <see cref="ComputeContext"/> used to create the <see cref="ComputeBuffer{T}"/>. </param> /// <param name="flags"> A bit-field that is used to specify allocation and usage information about the <see cref="ComputeBuffer{T}"/>. </param> /// <param name="data"> The data for the <see cref="ComputeBuffer{T}"/>. </param> /// <remarks> Note, that <paramref name="data"/> cannot be an "immediate" parameter, i.e.: <c>new T[100]</c>, because it could be quickly collected by the GC causing Cloo to send and invalid reference to OpenCL. </remarks> public ComputeBuffer(ComputeContext context, ComputeMemoryFlags flags, T[] data) : base(context, flags) { GCHandle dataPtr = GCHandle.Alloc(data, GCHandleType.Pinned); try { ComputeErrorCode error = ComputeErrorCode.Success; Handle = CL10.CreateBuffer(context.Handle, flags, new IntPtr(Marshal.SizeOf(typeof(T)) * data.Length), dataPtr.AddrOfPinnedObject(), out error); ComputeException.ThrowOnError(error); } finally { dataPtr.Free(); } Init(); }
public void sortKeysValue(CLMemoryHandle key, CLMemoryHandle value, int numElements) { debugRead = new int[Math.Max(numElements, numCounters)]; ComputeErrorCode error; ComputeEvent eve; /* * error = CL10.EnqueueReadBuffer(cqCommandQueue, input, Bool.True, IntPtr.Zero, (IntPtr)(numElements * 4), * debugRead, 0, null, out eve); * CheckErr(error, "CL10.EnqueueReadBuffer"); */ mCounters = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, gpuConstants.numGroupsPerBlock * gpuConstants.numRadices * gpuConstants.numBlocks * sizeof(int), out error); CheckErr(error, "CL10.CreateBuffer"); mRadixPrefixes = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, gpuConstants.numRadices * sizeof(int), out error); CheckErr(error, "CL10.CreateBuffer"); CLMemoryHandle outputValue = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, (IntPtr)(8 * numElements), out error); CheckErr(error, "CL10.CreateBuffer"); CLMemoryHandle outputKey = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, (IntPtr)(4 * numElements), out error); CheckErr(error, "CL10.CreateBuffer"); gpuConstants.numElementsPerGroup = (numElements / (gpuConstants.numBlocks * gpuConstants.numGroupsPerBlock)) + 1; gpuConstants.numTotalElements = numElements; int i; for (i = 0; i < 8; i++) { error = CL10.EnqueueWriteBuffer(cqCommandQueue, mCounters, Bool.True, IntPtr.Zero, (IntPtr)(numCounters * 4), counters, 0, null, out eve); CheckErr(error, "CL10.EnqueueWriteBuffer Counter initialize"); if (i % 2 == 0) { SetupAndCount(key, 4 * i); SumIt(key, 4 * i); ReorderingKeyValue(key, outputKey, value, outputValue, 4 * i); } else { SetupAndCount(outputKey, 4 * i); SumIt(outputKey, 4 * i); ReorderingKeyValue(outputKey, key, outputValue, value, 4 * i); } } if (i % 2 == 0) { error = CL10.EnqueueCopyBuffer(cqCommandQueue, outputKey, key, IntPtr.Zero, IntPtr.Zero, (IntPtr)(numElements * 4), 0, null, out eve); CheckErr(error, "CL10.EnqueueCopyBuffer"); error = CL10.Finish(cqCommandQueue); CheckErr(error, "CL10.Finish Copybuffer"); error = CL10.EnqueueCopyBuffer(cqCommandQueue, outputValue, value, IntPtr.Zero, IntPtr.Zero, (IntPtr)(numElements * 8), 0, null, out eve); CheckErr(error, "CL10.EnqueueCopyBuffer"); error = CL10.Finish(cqCommandQueue); CheckErr(error, "CL10.Finish Copybuffer"); } error = CL10.ReleaseMemObject(outputKey); CheckErr(error, "CL10.ReleaseMemObj"); error = CL10.ReleaseMemObject(outputValue); CheckErr(error, "CL10.ReleaseMemObj"); error = CL10.ReleaseMemObject(mRadixPrefixes); CheckErr(error, "CL10.ReleaseMemObj"); error = CL10.ReleaseMemObject(mCounters); CheckErr(error, "CL10.ReleaseMemObj"); Log_Idx++; }
private void InternalCreateBuffer(IComputeContext context, ComputeMemoryFlags flags, long count, IntPtr dataPtr) { Handle = CL10.CreateBuffer(context.Handle, flags, new IntPtr(Marshal.SizeOf(typeof(T)) * count), dataPtr, out ComputeErrorCode error); ComputeException.ThrowOnError(error); Init(); }
public void sortKeysOnly(CLMemoryHandle input, CLMemoryHandle output, int numElements) { debugRead = new int[Math.Max(numElements, numCounters)]; ComputeErrorCode error; Compute ComputeEvent eve; mCounters = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, gpuConstants.numGroupsPerBlock * gpuConstants.numRadices * gpuConstants.numBlocks * sizeof(int), out error); CheckErr(error, "CL10.CreateBuffer"); mRadixPrefixes = CL10.CreateBuffer(cxGPUContext, ComputeMemoryFlags.ReadWrite, gpuConstants.numRadices * sizeof(int), out error); CheckErr(error, "CL10.CreateBuffer"); gpuConstants.numElementsPerGroup = (numElements / (gpuConstants.numBlocks * gpuConstants.numGroupsPerBlock)) + 1; gpuConstants.numTotalElements = numElements; if (DEBUG) { CL10.EnqueueReadBuffer(cqCommandQueue, input, Bool.True, IntPtr.Zero, (IntPtr)(gpuConstants.numTotalElements * 4), debugRead, 0, null, out eve); CheckErr(error, "CL10.EnqueueReadBuffer"); PrintAsArray(debugRead, gpuConstants.numTotalElements); } int i; for (i = 0; i < 8; i++) { error = CL10.EnqueueWriteBuffer(cqCommandQueue, mCounters, true, IntPtr.Zero, (IntPtr)(numCounters * 4), counters, 0, null, out eve); CheckErr(error, "CL10.EnqueueWriteBuffer Counter initialize"); if (i % 2 == 0) { DateTime before = DateTime.Now; SetupAndCount(input, 4 * i); if (DEBUG_CONSOLE_OUTPUT) { Console.WriteLine("Setup and Count =" + (DateTime.Now - before).TotalMilliseconds); } before = DateTime.Now; SumIt(input, 4 * i); if (DEBUG_CONSOLE_OUTPUT) { Console.WriteLine("SumIt =" + (DateTime.Now - before).TotalMilliseconds); } before = DateTime.Now; ReorderingKeysOnly(input, output, 4 * i); if (DEBUG_CONSOLE_OUTPUT) { Console.WriteLine("Reorder =" + (DateTime.Now - before).TotalMilliseconds); } } else { SetupAndCount(output, 4 * i); SumIt(output, 4 * i); ReorderingKeysOnly(output, input, 4 * i); } } if (i % 2 != 0) { error = CL10.EnqueueCopyBuffer(cqCommandQueue, input, output, IntPtr.Zero, IntPtr.Zero, (IntPtr)(numElements * 4), 0, null, out eve); CheckErr(error, "CL10.EnqueueCopyBuffer"); error = CL10.Finish(cqCommandQueue); CheckErr(error, "CL10.Finish Copybuffer"); } error = CL10.ReleaseMemObject(mRadixPrefixes); CheckErr(error, "CL10.ReleaseMemObj"); error = CL10.ReleaseMemObject(mCounters); CheckErr(error, "CL10.ReleaseMemObj"); Log_Idx++; }