public virtual void Read(CommandQueue cq, long srcOffset, double[] dstData, int dstStartIndex, int count) { IntPtr p = cq.EnqueueMapBuffer(this, true, MapFlags.READ, srcOffset, count * sizeof(double)); double* pBlock = (double*)p.ToPointer(); for (long i = 0; i < count; i++) dstData[dstStartIndex+i] = pBlock[i]; cq.EnqueueUnmapMemObject(this, p); cq.Finish(); }
public virtual void Write(CommandQueue cq, long dstOffset, double[] srcData, int srcStartIndex, int count) { IntPtr p = cq.EnqueueMapBuffer(this, true, MapFlags.WRITE, dstOffset, (long)count * sizeof(double)); double* pBlock = (double*)p.ToPointer(); for (long i = 0; i < count; i++) pBlock[i] = srcData[i + srcStartIndex]; cq.EnqueueUnmapMemObject(this, p); cq.Finish(); }
public virtual void MemSet(CommandQueue cq, long dstByteOffset, double value, long count) { IntPtr p = cq.EnqueueMapBuffer(this, true, MapFlags.WRITE, dstByteOffset, count * sizeof(double)); double* pBlock = (double*)p.ToPointer(); for (long i = 0; i < count; i++) pBlock[i] = value; cq.EnqueueUnmapMemObject(this, p); cq.Finish(); }
public virtual void MemSet(CommandQueue cq, byte value) { long offset = 0; long count = MemSize.ToInt64(); IntPtr p = cq.EnqueueMapBuffer(this, true, MapFlags.WRITE, offset, count); byte* pBlock = (byte*)p.ToPointer(); for (long i = 0; i < count; i++) pBlock[i] = value; cq.EnqueueUnmapMemObject(this, p); cq.Finish(); }
/// <summary> /// Test all versions of: /// /// EnqueueMapBuffer /// EnqueueMapImage /// /// The test bounces an array from a managed byte buffer to a mapped buffer, /// to an image. The image is then mapped and copied to a new managed buffer /// where the result is compared to the original. /// /// On error, the actual point of failure will have to be identified manually. /// </summary> /// <param name="c"></param> /// <param name="cq"></param> private void TestMapBuffer(Context c, CommandQueue cq) { if (!cq.Device.ImageSupport) { Output("Skipping EnqueueMapBuffer and EnqueueMapImage tests(not supported on this device)"); return; } Output("Testing MapBuffer"); OpenCLNet.Image img0 = null; OpenCLNet.Mem mem0 = null; int imgWidth = 1024; int imgHeight = 1024; int bufLen = imgWidth * 4 * imgHeight; byte[] srcData = new byte[bufLen]; byte[] cmpData = new byte[bufLen]; Event event0; Event event1; for (int i = 0; i < srcData.Length; i++) srcData[i] = (byte)(i); Array.Clear(cmpData, 0, cmpData.Length); try { img0 = c.CreateImage2D(MemFlags.READ_WRITE, ImageFormat.RGBA8U, imgWidth, imgHeight); mem0 = c.CreateBuffer(MemFlags.READ_WRITE, bufLen, IntPtr.Zero); Array.Clear(cmpData, 0, cmpData.Length); fixed (byte* pSrc = srcData) { fixed (byte* pCmp = cmpData) { { IntPtr[] origin = new IntPtr[3] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; IntPtr[] region = new IntPtr[3] { (IntPtr)imgWidth, (IntPtr)imgHeight, (IntPtr)1 }; IntPtr[] dstOrigin = new IntPtr[3] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; IntPtr[] dstRegion = new IntPtr[3] { (IntPtr)imgWidth, (IntPtr)imgHeight, (IntPtr)1 }; IntPtr mapPtr; byte* pMapPtr; IntPtr image_row_pitch; IntPtr image_slice_pitch; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, (IntPtr)0, origin, region); mapPtr = cq.EnqueueMapImage(img0, true, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y*imgWidth*4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (IntPtr version)Copy not identical to source when using no event args"); Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); Event fdjk; cq.EnqueueCopyBufferToImage(mem0, img0, (IntPtr)0, origin, region, 0, null, out fdjk); cq.Finish(); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 0, null, out event0 ); cq.EnqueueWaitForEvent(event0); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (IntPtr version)Copy not identical to source when using event output and no wait list"); Event[] waitList = new Event[] { event0 }; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, (IntPtr)0, origin, region); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 1, waitList, out event1); cq.EnqueueWaitForEvent(event1); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (IntPtr version)Copy not identical to source when using event output and no wait list"); event0.Dispose(); event1.Dispose(); } { int[] origin = new int[3] { (int)0, (int)0, (int)0 }; int[] region = new int[3] { (int)imgWidth, (int)imgHeight, (int)1 }; IntPtr mapPtr; byte* pMapPtr; int image_row_pitch; int image_slice_pitch; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, 0, origin, region); mapPtr = cq.EnqueueMapImage(img0, true, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y*imgWidth*4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (int version)Copy not identical to source when using no event args"); Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, 0, origin, region); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 0, null, out event0 ); cq.EnqueueWaitForEvent(event0); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (int version)Copy not identical to source when using event output and no wait list"); Event[] waitList = new Event[] { event0 }; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, 0, origin, region); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 1, waitList, out event1); cq.EnqueueWaitForEvent(event1); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * (int)image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (int version)Copy not identical to source when using event output and no wait list"); event0.Dispose(); event1.Dispose(); } { long[] origin = new long[3] { (long)0, (long)0, (long)0 }; long[] region = new long[3] { (long)imgWidth, (long)imgHeight, (long)1 }; IntPtr mapPtr; byte* pMapPtr; long image_row_pitch; long image_slice_pitch; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, (long)0, origin, region); mapPtr = cq.EnqueueMapImage(img0, true, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (long version)Copy not identical to source when using no event args"); Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, (long)0, origin, region); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 0, null, out event0); cq.EnqueueWaitForEvent(event0); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (long version)Copy not identical to source when using event output and no wait list"); Event[] waitList = new Event[] { event0 }; Array.Clear(cmpData, 0, cmpData.Length); mapPtr = cq.EnqueueMapBuffer(mem0, true, MapFlags.WRITE, 0, bufLen); pMapPtr = (byte*)mapPtr.ToPointer(); for (int i = 0; i < bufLen; i++) pMapPtr[i] = srcData[i]; cq.EnqueueUnmapMemObject(mem0, mapPtr); cq.EnqueueCopyBufferToImage(mem0, img0, (long)0, origin, region); mapPtr = cq.EnqueueMapImage(img0, false, MapFlags.READ, origin, region, out image_row_pitch, out image_slice_pitch, 1, waitList, out event1); cq.EnqueueWaitForEvent(event1); cq.Finish(); pMapPtr = (byte*)mapPtr.ToPointer(); for (int y = 0; y < imgHeight; y++) { byte* pSrcRowPtr = pMapPtr + y * image_row_pitch; byte* pDstRowPtr = pCmp + y * imgWidth * 4; for (int x = 0; x < imgWidth * 4; x++) { pDstRowPtr[x] = pSrcRowPtr[x]; } } cq.EnqueueUnmapMemObject(img0, mapPtr); if (!CompareArray(cmpData, srcData)) Error("EnqueueEnqueueMapBuffer/EnqueueMapImage: (long version)Copy not identical to source when using event output and no wait list"); event0.Dispose(); event1.Dispose(); } } } } catch (Exception e) { Error("Exception during testing: " + e.ToString()); } finally { if (img0 != null) img0.Dispose(); if (mem0 != null) mem0.Dispose(); } }
private unsafe void TestKernel(Context c, CommandQueue cq, Kernel argIOKernel) { Mem outArgBuffer = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR|(ulong)MemFlags.READ_WRITE), sizeof(IOKernelArgs), IntPtr.Zero); byte[] data = new byte[sizeof(IOKernelArgs)]; Output("Testing kernel - Argument return"); argIOKernel.SetArg(0, 1); argIOKernel.SetArg(1, 65L); argIOKernel.SetArg(2, 38.4f); argIOKernel.SetArg(3, outArgBuffer); Event ev; cq.EnqueueTask(argIOKernel,0,null,out ev); cq.Finish(); if ((int)ev.ExecutionStatus < 0) { Error(cq.Device.Name + ": argIOKernel failed with error code " + (ErrorCode)ev.ExecutionStatus); ev.Dispose(); } else { outArgBuffer.Read(cq, 0L, data, 0, sizeof(IOKernelArgs)); IntPtr outArgPtr = cq.EnqueueMapBuffer(outArgBuffer, true, MapFlags.READ, IntPtr.Zero, (IntPtr)sizeof(IOKernelArgs)); IOKernelArgs args = (IOKernelArgs)Marshal.PtrToStructure(outArgPtr, typeof(IOKernelArgs)); cq.EnqueueUnmapMemObject(outArgBuffer, outArgPtr); if (args.outInt != 1) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outLong != 65) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); if (args.outSingle != 38.4f) Error(cq.Device.Name + ": argIOKernel failed to return correct arguments"); } }
private void TestCommandQueueMemCopy(Context c, CommandQueue cq) { Output("Testing synchronous host memory->memory copy"); AlignedArrayFloat aafSrc = new AlignedArrayFloat(1024 * 1024, 64); AlignedArrayFloat aafDst = new AlignedArrayFloat(1024 * 1024, 64); SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); /// Test HOST_PTR -> HOST_PTR copy /// The call to EnqueueMapBuffer synchronizes caches before testing the result using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE+(ulong)MemFlags.USE_HOST_PTR), aafSrc.ByteLength, aafSrc)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE+(ulong)MemFlags.USE_HOST_PTR), aafDst.ByteLength, aafDst)) { cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); IntPtr mappedPtr = cq.EnqueueMapBuffer(memDst, true, MapFlags.READ_WRITE, (IntPtr)0, (IntPtr)aafDst.ByteLength); if (!TestAAF(aafDst, 0.0f)) Error("EnqueueCopyBuffer failed, destination is invalid"); cq.EnqueueUnmapMemObject(memDst, mappedPtr); cq.EnqueueBarrier(); } } /// Test COPY_HOST_PTR -> COPY_HOST_PTR copy /// Verify that original source buffers are intact and that the copy was successful SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer(MemFlags.COPY_HOST_PTR, aafSrc.ByteLength, aafSrc)) { using (Mem memDst = c.CreateBuffer(MemFlags.COPY_HOST_PTR, aafSrc.ByteLength, aafDst)) { SetAAF(aafSrc, 2.0f); SetAAF(aafDst, 3.0f); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.Finish(); if (!TestAAF(aafSrc, 2.0f)) Error("Memory copy destroyed src buffer"); if (!TestAAF(aafDst, 3.0f)) Error("Memory copy destroyed dst buffer"); Event ev; cq.EnqueueReadBuffer(memDst, false, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst,0, null, out ev); cq.EnqueueWaitForEvents(1, new Event[] { ev }); ev.Dispose(); cq.Finish(); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } /// Test ALLOC_HOST_PTR -> ALLOC_HOST_PTR copy SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.READ_WRITE), aafSrc.ByteLength, IntPtr.Zero)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.WRITE_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { cq.EnqueueWriteBuffer(memSrc, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueWriteBuffer(memDst, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueBarrier(); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(memDst, true, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } /// Test DEFAULT -> DEFAULT copy SetAAF(aafSrc, 0.0f); SetAAF(aafDst, 1.0f); using (Mem memSrc = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.READ_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { using (Mem memDst = c.CreateBuffer((MemFlags)((ulong)MemFlags.ALLOC_HOST_PTR + (ulong)MemFlags.WRITE_ONLY), aafSrc.ByteLength, IntPtr.Zero)) { cq.EnqueueWriteBuffer(memSrc, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueWriteBuffer(memDst, false, (IntPtr)0, (IntPtr)aafSrc.ByteLength, aafSrc); cq.EnqueueBarrier(); cq.EnqueueCopyBuffer(memSrc, memDst, IntPtr.Zero, IntPtr.Zero, (IntPtr)aafSrc.ByteLength); cq.EnqueueBarrier(); cq.EnqueueReadBuffer(memDst, true, IntPtr.Zero, (IntPtr)aafDst.ByteLength, aafDst); if (!TestAAF(aafDst, 0.0f)) Error("Memory copy failed"); } } }
private unsafe void TestVecKernel(Context c, CommandQueue cq, Kernel k) { Float2 f2 = new Float2(0.0f,1.0f); float[] memory = new float[2]; fixed (float* pMemory = memory) { Mem mem = c.CreateBuffer((MemFlags)((ulong)MemFlags.READ_WRITE | (ulong)MemFlags.USE_HOST_PTR), 4 * 2, pMemory); k.SetArg(0, f2); k.SetArg(1, mem); cq.EnqueueTask(k); cq.EnqueueBarrier(); IntPtr pMap = cq.EnqueueMapBuffer(mem, true, MapFlags.READ, 0, 2 * 4); cq.EnqueueUnmapMemObject(mem, pMap); } }
unsafe public FLACCLTask(Program _openCLProgram, int channelsCount, int channels, uint bits_per_sample, int max_frame_size, FLACCLWriter writer, int groupSize, bool gpuOnly, bool gpuRice) { this.UseGPUOnly = gpuOnly; this.UseGPURice = gpuOnly && gpuRice; this.UseMappedMemory = writer._settings.MappedMemory || writer._settings.DeviceType == OpenCLDeviceType.CPU; this.groupSize = groupSize; this.channels = channels; this.channelsCount = channelsCount; this.writer = writer; openCLProgram = _openCLProgram; #if DEBUG var prop = CommandQueueProperties.PROFILING_ENABLE; #else var prop = CommandQueueProperties.NONE; #endif openCLCQ = openCLProgram.Context.CreateCommandQueue(openCLProgram.Context.Devices[0], prop); int MAX_ORDER = this.writer.eparams.max_prediction_order; int MAX_FRAMES = this.writer.framesPerTask; int MAX_CHANNELSIZE = MAX_FRAMES * ((writer.eparams.block_size + 3) & ~3); residualTasksLen = sizeof(FLACCLSubframeTask) * 32 * channelsCount * MAX_FRAMES; bestResidualTasksLen = sizeof(FLACCLSubframeTask) * channels * MAX_FRAMES; int samplesBufferLen = writer.PCM.BlockAlign * MAX_CHANNELSIZE * channelsCount; int residualBufferLen = sizeof(int) * MAX_CHANNELSIZE * channels; // need to adjust residualOffset? int partitionsLen = sizeof(int) * ((writer.PCM.BitsPerSample > 16 ? 31 : 15) * 2 << 8) * channels * MAX_FRAMES; int riceParamsLen = sizeof(int) * (4 << 8) * channels * MAX_FRAMES; int autocorLen = sizeof(float) * (MAX_ORDER + 1) * lpc.MAX_LPC_WINDOWS * channelsCount * MAX_FRAMES; int lpcDataLen = autocorLen * 32; int resOutLen = sizeof(int) * channelsCount * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8) * MAX_FRAMES; int wndLen = sizeof(float) * MAX_CHANNELSIZE /** 2*/ * lpc.MAX_LPC_WINDOWS; int selectedLen = sizeof(int) * 32 * channelsCount * MAX_FRAMES; int riceLen = sizeof(int) * channels * MAX_CHANNELSIZE; if (!this.UseMappedMemory) { clSamplesBytes = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, samplesBufferLen / 2); clResidual = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, residualBufferLen); clBestRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceParamsLen / 4); clResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, residualTasksLen); clBestResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, bestResidualTasksLen); clWindowFunctions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, wndLen); clSelectedTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); clRiceOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceLen); clSamplesBytesPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, samplesBufferLen / 2); clResidualPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualBufferLen); clBestRiceParamsPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceParamsLen / 4); clResidualTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualTasksLen); clBestResidualTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, bestResidualTasksLen); clWindowFunctionsPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, wndLen); clSelectedTasksPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, selectedLen); clRiceOutputPinned = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceLen); clSamplesBytesPtr = openCLCQ.EnqueueMapBuffer(clSamplesBytesPinned, true, MapFlags.READ_WRITE, 0, samplesBufferLen / 2); clResidualPtr = openCLCQ.EnqueueMapBuffer(clResidualPinned, true, MapFlags.READ_WRITE, 0, residualBufferLen); clBestRiceParamsPtr = openCLCQ.EnqueueMapBuffer(clBestRiceParamsPinned, true, MapFlags.READ_WRITE, 0, riceParamsLen / 4); clResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clResidualTasksPinned, true, MapFlags.READ_WRITE, 0, residualTasksLen); clBestResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clBestResidualTasksPinned, true, MapFlags.READ_WRITE, 0, bestResidualTasksLen); clWindowFunctionsPtr = openCLCQ.EnqueueMapBuffer(clWindowFunctionsPinned, true, MapFlags.READ_WRITE, 0, wndLen); clSelectedTasksPtr = openCLCQ.EnqueueMapBuffer(clSelectedTasksPinned, true, MapFlags.READ_WRITE, 0, selectedLen); clRiceOutputPtr = openCLCQ.EnqueueMapBuffer(clRiceOutputPinned, true, MapFlags.READ_WRITE, 0, riceLen); } else { clSamplesBytes = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, (uint)samplesBufferLen / 2); clResidual = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualBufferLen); clBestRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceParamsLen / 4); clResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, residualTasksLen); clBestResidualTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, bestResidualTasksLen); clWindowFunctions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, wndLen); clSelectedTasks = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, selectedLen); clRiceOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE | MemFlags.ALLOC_HOST_PTR, riceLen); clSamplesBytesPtr = openCLCQ.EnqueueMapBuffer(clSamplesBytes, true, MapFlags.READ_WRITE, 0, samplesBufferLen / 2); clResidualPtr = openCLCQ.EnqueueMapBuffer(clResidual, true, MapFlags.READ_WRITE, 0, residualBufferLen); clBestRiceParamsPtr = openCLCQ.EnqueueMapBuffer(clBestRiceParams, true, MapFlags.READ_WRITE, 0, riceParamsLen / 4); clResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clResidualTasks, true, MapFlags.READ_WRITE, 0, residualTasksLen); clBestResidualTasksPtr = openCLCQ.EnqueueMapBuffer(clBestResidualTasks, true, MapFlags.READ_WRITE, 0, bestResidualTasksLen); clWindowFunctionsPtr = openCLCQ.EnqueueMapBuffer(clWindowFunctions, true, MapFlags.READ_WRITE, 0, wndLen); clSelectedTasksPtr = openCLCQ.EnqueueMapBuffer(clSelectedTasks, true, MapFlags.READ_WRITE, 0, selectedLen); clRiceOutputPtr = openCLCQ.EnqueueMapBuffer(clRiceOutput, true, MapFlags.READ_WRITE, 0, riceLen); //clSamplesBytesPtr = clSamplesBytes.HostPtr; //clResidualPtr = clResidual.HostPtr; //clBestRiceParamsPtr = clBestRiceParams.HostPtr; //clResidualTasksPtr = clResidualTasks.HostPtr; //clBestResidualTasksPtr = clBestResidualTasks.HostPtr; //clWindowFunctionsPtr = clWindowFunctions.HostPtr; } clSamples = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, samplesBufferLen); clLPCData = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, lpcDataLen); clAutocorOutput = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, autocorLen); clSelectedTasksSecondEstimate = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); clSelectedTasksBestMethod = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, selectedLen); if (UseGPUOnly) { clPartitions = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, partitionsLen); clRiceParams = openCLProgram.Context.CreateBuffer(MemFlags.READ_WRITE, riceParamsLen); } //openCLCQ.EnqueueMapBuffer(clSamplesBytes, true, MapFlags.WRITE, 0, samplesBufferLen / 2); clComputeAutocor = openCLProgram.CreateKernel("clComputeAutocor"); clStereoDecorr = openCLProgram.CreateKernel("clStereoDecorr"); //cudaChannelDecorr = openCLProgram.CreateKernel("clChannelDecorr"); clChannelDecorr2 = openCLProgram.CreateKernel("clChannelDecorr2"); clChannelDecorrX = openCLProgram.CreateKernel("clChannelDecorrX"); clFindWastedBits = openCLProgram.CreateKernel("clFindWastedBits"); clComputeLPC = openCLProgram.CreateKernel("clComputeLPC"); clQuantizeLPC = openCLProgram.CreateKernel("clQuantizeLPC"); //cudaComputeLPCLattice = openCLProgram.CreateKernel("clComputeLPCLattice"); clSelectStereoTasks = openCLProgram.CreateKernel("clSelectStereoTasks"); clEstimateResidual = openCLProgram.CreateKernel("clEstimateResidual"); clChooseBestMethod = openCLProgram.CreateKernel("clChooseBestMethod"); if (UseGPUOnly) { clEncodeResidual = openCLProgram.CreateKernel("clEncodeResidual"); if (openCLCQ.Device.DeviceType != DeviceType.CPU) { clCalcPartition = openCLProgram.CreateKernel("clCalcPartition"); clCalcPartition16 = openCLProgram.CreateKernel("clCalcPartition16"); } clSumPartition = openCLProgram.CreateKernel("clSumPartition"); clFindRiceParameter = openCLProgram.CreateKernel("clFindRiceParameter"); clFindPartitionOrder = openCLProgram.CreateKernel("clFindPartitionOrder"); if (UseGPURice) { clCalcOutputOffsets = openCLProgram.CreateKernel("clCalcOutputOffsets"); clRiceEncoding = openCLProgram.CreateKernel("clRiceEncoding"); } } samplesBuffer = new int[MAX_CHANNELSIZE * channelsCount]; outputBuffer = new byte[max_frame_size * MAX_FRAMES + 1]; frame = new FlacFrame(channelsCount); frame.writer = new BitWriter(outputBuffer, 0, outputBuffer.Length); if (writer._settings.DoVerify) { verify = new FlakeReader(new AudioPCMConfig((int)bits_per_sample, channels, 44100)); verify.DoCRC = false; } }