public void initialize() { computeContext = new ComputationBackend.OpenCl.ComputeContext(); computeContext.initialize(); initializeOperatorRadialKernel(); initializeOperatorBlur(); initializeOperatorFindNearestPosition(); initializeAttentionModule(); initializeOperatorSkeletalize(); }
public void calculate(ComputeContext computeContext) { ErrorCode errorCode; OpenCL.Net.Event eventWriteBufferForInputMap; OpenCL.Net.Event eventReadBufferForOutputMap; OpenCL.Net.Event eventExecutedKernelBlurX; OpenCL.Net.Event eventExecutedKernelBlurY; errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForInputMap, OpenCL.Net.Bool.False, inputMap.unsafeGetValues(), 0, new Event[] { }, out eventWriteBufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)(inputMap.getWidth()/2), (IntPtr)(inputMap.getLength()) }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)32, (IntPtr)4 }; // execute X errorCode = Cl.SetKernelArg<float>(kernelBlurX, 0, bufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurX, 2, bufferForTemporary); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueNDRangeKernel(computeContext.commandQueue, kernelBlurX, 2, null, globalWorkSize, localWorkSize, 1, new Event[] { eventWriteBufferForInputMap }, out eventExecutedKernelBlurX); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurY, 0, bufferForTemporary); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurY, 2, bufferForOutputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // execute Y errorCode = Cl.EnqueueNDRangeKernel(computeContext.commandQueue, kernelBlurY, 2, null, globalWorkSize, localWorkSize, 1, new Event[] { eventExecutedKernelBlurX }, out eventExecutedKernelBlurY); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // read back blured map errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForOutputMap, OpenCL.Net.Bool.True, outputMap.unsafeGetValues(), 3, new Event[] { eventExecutedKernelBlurY, eventExecutedKernelBlurX, eventWriteBufferForInputMap }, out eventReadBufferForOutputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //Cl.Flush(computeContext.commandQueue); //Cl.WaitForEvents(1, new[] { eventReadBufferForOutputMap }); // HACK System.Diagnostics.Debug.Assert(!float.IsNaN(outputMap.unsafeGetValues()[6]) && outputMap.unsafeGetValues()[6] < 50000.0f); }
public void calculate(ComputeContext computeContext) { ErrorCode errorCode; OpenCL.Net.Event eventReadBufferForResults; OpenCL.Net.Event eventExecutedKernel; OpenCL.Net.Event eventWriteBufferForInputMap; OpenCL.Net.Event eventWriteBufferCompletedPositions; System.Diagnostics.Debug.Assert(kernelPositionsLength >= kernelPositions.Length); System.Diagnostics.Debug.Assert((kernelPositions.Length % 32) == 0, "kernelPositions.Length must be of length % 32 == 0"); //float[] arrayForInputMap = new float[inputMap.getWidth() * inputMap.getLength()]; //int x, y; /* for( y = 0; y < inputMap.getLength(); y++ ) { for( x = 0; x < inputMap.getWidth(); x++ ) { arrayForInputMap[x + y * inputMap.getWidth()] = inputMap.readAt(x, y); } } */ int[] positionsArray = new int[2 * kernelPositions.Length]; int i; for (i = 0; i < kernelPositions.Length; i++) { if( kernelPositions[i] == null ) { continue; } positionsArray[i * 2 + 0] = kernelPositions[i].x; positionsArray[i * 2 + 1] = kernelPositions[i].y; } errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForPositions, OpenCL.Net.Bool.False, positionsArray, 0, null, out eventWriteBufferCompletedPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForInputMap, OpenCL.Net.Bool.False, inputMap.unsafeGetValues(), 0, new Event[] { }, out eventWriteBufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)kernelPositions.Length }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)32 }; errorCode = Cl.EnqueueNDRangeKernel(computeContext.commandQueue, kernel, 1, null, globalWorkSize, localWorkSize, 2, new Event[] { eventWriteBufferForInputMap, eventWriteBufferCompletedPositions }, out eventExecutedKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // important so it doesn't crash errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new Event[] { eventExecutedKernel }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForKernelResults, OpenCL.Net.Bool.False, kernelResults, 1, new[] { eventExecutedKernel }, out eventReadBufferForResults); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new[] { eventReadBufferForResults }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventReadBufferForResults); Cl.ReleaseEvent(eventExecutedKernel); Cl.ReleaseEvent(eventWriteBufferForInputMap); Cl.ReleaseEvent(eventWriteBufferCompletedPositions); }
public void initialize(ComputeContext computeContext, int kernelPositionsLength, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; OpenCL.Net.Event eventWriteBufferCompletedKernel; this.kernelPositionsLength = kernelPositionsLength; kernelResults = new float[kernelPositionsLength]; bufferForPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, 2 * kernelPositionsLength, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForInputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); OpenCL.Net.IMem<float> bufferForKernel = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (kernelWidth * kernelWidth), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForKernelResults = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, kernelPositionsLength, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForKernel, OpenCL.Net.Bool.True, this.kernelArray, 0, null, out eventWriteBufferCompletedKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getProgramSource(inputMapSize.x, kernelPositionsLength) }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernel = Cl.CreateKernel(program, "kernel0", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 0, bufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 1, bufferForKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernel, 2, bufferForPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 3, bufferForKernelResults); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferCompletedKernel); }
public void calculate(ComputeContext computeContext) { ErrorCode errorCode; int lengthOfPositionsForOpenCl; int numberOfPositionsForOpenCl; OpenCL.Net.Event eventWriteBufferInputPositions; OpenCL.Net.Event eventWriteBufferForInputMap; OpenCL.Net.Event eventExecutedKernelNearestPoint; OpenCL.Net.Event eventReadBufferForResultPositions; OpenCL.Net.Event eventReadBufferForFoundNewPosition; numberOfPositionsForOpenCl = inputPositions.Length; numberOfPositionsForOpenCl = numberOfPositionsForOpenCl + 32 + (32 - (numberOfPositionsForOpenCl % 32)); lengthOfPositionsForOpenCl = numberOfPositionsForOpenCl * 2; if (lengthOfPositionsForOpenCl > numberOfAllocatedInputAndOutputPositions*2) { // we need to reallocate the buffer to the right size numberOfAllocatedInputAndOutputPositions = lengthOfPositionsForOpenCl; Cl.ReleaseMemObject(bufferForInputPositions); Cl.ReleaseMemObject(bufferForOutputPositions); Cl.ReleaseMemObject(bufferForFoundNewPosition); bufferForInputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForOutputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForFoundNewPosition = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); } // copy the positions int[] inputPositionsForOpenCl; inputPositionsForOpenCl = new int[lengthOfPositionsForOpenCl]; int i; for (i = 0; i < inputPositions.Length; i++ ) { inputPositionsForOpenCl[i * 2 + 0] = inputPositions[i].x; inputPositionsForOpenCl[i * 2 + 1] = inputPositions[i].y; } errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForInputPositions, OpenCL.Net.Bool.False, inputPositionsForOpenCl, 0, new Event[] { }, out eventWriteBufferInputPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // copy input map int[] translatedInputMap; translatedInputMap = new int[inputMap.getWidth() * inputMap.getLength()]; for (i = 0; i < inputMap.getWidth() * inputMap.getLength(); i++ ) { if( inputMap.unsafeGetValues()[i] ) { translatedInputMap[i] = 1; } } errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForInputMap, OpenCL.Net.Bool.False, translatedInputMap, 0, new Event[] { }, out eventWriteBufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg(kernelNearestPoint, 5, (IntPtr)4, /*lengthOfPositionsForOpenCl*/inputPositions.Length); // number of relative positions //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // call kernel IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)32, (IntPtr)(numberOfPositionsForOpenCl/32) }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)32, (IntPtr)1 }; errorCode = Cl.EnqueueNDRangeKernel(computeContext.commandQueue, kernelNearestPoint, 2, null, globalWorkSize, localWorkSize, 2, new Event[] { eventWriteBufferInputPositions, eventWriteBufferForInputMap }, out eventExecutedKernelNearestPoint); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); int[] resultPositionsFromOpenCl; int[] resultfoundNewPositionsFromOpenCl; resultPositionsFromOpenCl = new int[lengthOfPositionsForOpenCl]; resultfoundNewPositionsFromOpenCl = new int[lengthOfPositionsForOpenCl / 2]; // read results back and copy into arrays errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForOutputPositions, OpenCL.Net.Bool.False, resultPositionsFromOpenCl, 1, new[] { eventExecutedKernelNearestPoint }, out eventReadBufferForResultPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForFoundNewPosition, OpenCL.Net.Bool.False, resultfoundNewPositionsFromOpenCl, 1, new[] { eventExecutedKernelNearestPoint }, out eventReadBufferForFoundNewPosition); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(2, new Event[] { eventReadBufferForResultPositions, eventReadBufferForFoundNewPosition }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // copy back outputPositions = new Vector2<int>[inputPositions.Length]; foundNewPositions = new bool[inputPositions.Length]; for( i = 0; i < inputPositions.Length; i++ ) { // check output position // NOTE< works only if kernel is prepared > { Vector2<int> inputPosition = inputPositions[i]; Vector2<int> outputPosition2; outputPosition2 = new Vector2<int>(); outputPosition2.x = resultPositionsFromOpenCl[i * 2 + 0]; outputPosition2.y = resultPositionsFromOpenCl[i * 2 + 1]; if (System.Math.Abs(outputPosition2.x - inputPosition.x) > 20 || System.Math.Abs(outputPosition2.y - inputPosition.y) > 20) { int here0 = 0; } } Vector2<int> outputPosition; outputPosition = new Vector2<int>(); outputPosition.x = resultPositionsFromOpenCl[i * 2 + 0]; outputPosition.y = resultPositionsFromOpenCl[i * 2 + 1]; outputPositions[i] = outputPosition; if( resultfoundNewPositionsFromOpenCl[i] == 1 ) { foundNewPositions[i] = true; } } Cl.ReleaseEvent(eventWriteBufferInputPositions); Cl.ReleaseEvent(eventWriteBufferForInputMap); Cl.ReleaseEvent(eventExecutedKernelNearestPoint); Cl.ReleaseEvent(eventReadBufferForResultPositions); Cl.ReleaseEvent(eventReadBufferForFoundNewPosition); }
public void initialize(ComputeContext computeContext, int searchRadius, Misc.Vector2<int> inputMapSize) { List<Vector2<int>> relativePositions; ErrorCode errorCode; int[] relativePositionsArray; OpenCL.Net.Event eventWriteBufferForRelativePositions; relativePositions = calculateRelativePositionsForRadius(searchRadius); numberOfAllocatedInputAndOutputPositions = 50000; bufferForInputMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForRelativePositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, relativePositions.Count * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); relativePositionsArray = convertRelativePositionsToArray(relativePositions); // copy relative positions into buffer errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForRelativePositions, OpenCL.Net.Bool.True, relativePositionsArray, 0, new Event[] { }, out eventWriteBufferForRelativePositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForInputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForOutputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForFoundNewPosition = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getOpenClSource() }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelNearestPoint = Cl.CreateKernel(program, "findNearestPoint", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 0, bufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 1, bufferForRelativePositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 2, bufferForInputPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 3, bufferForOutputPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 4, bufferForFoundNewPosition); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 5, (IntPtr)4, relativePositions.Count); // number of relative positions ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 6, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 7, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferForRelativePositions); }
public void initialize(ComputeContext computeContext, int kernelRadius, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; float[] kernelArray; OpenCL.Net.Event eventWriteBufferCompletedKernel; bufferForInputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForTemporary = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForOutputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); kernelArray = calculateKernel(kernelRadius); bufferForKernelArray = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, kernelArray.Length, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // copy kernel into buffer errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForKernelArray, OpenCL.Net.Bool.True, kernelArray, 0, null, out eventWriteBufferCompletedKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); string programLocation = Assembly.GetEntryAssembly().Location; string pathToLoad = Path.Combine(Path.GetDirectoryName(programLocation), "..\\..\\", "ComputationBackend\\OpenCl\\src\\Blur.cl"); string openClSource = File.ReadAllText(pathToLoad); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { openClSource }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelBlurX = Cl.CreateKernel(program, "blurX", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurX, 0, bufferForInputMap); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurX, 1, bufferForKernelArray); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurX, 2, bufferForTemporary); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurX, 3, (IntPtr)4, kernelRadius); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurX, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); kernelBlurY = Cl.CreateKernel(program, "blurY", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurY, 0, bufferForTemporary); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurY, 1, bufferForKernelArray); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurY, 2, bufferForOutputMap); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 3, (IntPtr)4, kernelRadius); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 5, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferCompletedKernel); }
public void calculate(ComputeContext computeContext, ResourceMetric resourceMetric) { ErrorCode errorCode; OpenCL.Net.Event eventWriteBufferForCounterMap; OpenCL.Net.Event eventReadBufferForChangeMade; OpenCL.Net.Event eventReadBufferForCounterMapOutput; OpenCL.Net.IMem<int> bufferForCurrentCounterMapInput; OpenCL.Net.IMem<int> bufferForCurrentCounterMapOutput; int[] counterMapForOpenCl; int[] changeMadeForOpenCl; int i; int k; resourceMetric.startTimer("visual", "skeletalize", "initInputMap"); counterMapForOpenCl = new int[inputMap.getWidth() * inputMap.getLength()]; for (i = 0; i < counterMapForOpenCl.Length; i++ ) { if( inputMap.unsafeGetValues()[i] ) { counterMapForOpenCl[i] = 1; } } resourceMetric.stopTimer(); errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForCounterMap, OpenCL.Net.Bool.False, counterMapForOpenCl, 0, new Event[] { }, out eventWriteBufferForCounterMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new Event[] { eventWriteBufferForCounterMap }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferForCounterMap); bufferForCurrentCounterMapInput = bufferForCounterMap; bufferForCurrentCounterMapOutput = bufferForCounterOutputMap; resourceMetric.startTimer("visual", "skeletalize", "main k loop"); for( k = 1;; k++ ) { OpenCL.Net.Event eventWriteBufferForChangeMade; OpenCL.Net.Event eventExecutedNarrow; OpenCL.Net.IMem<int> swapingBuffer; changeMadeForOpenCl = new int[1]; errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForChangeMade, OpenCL.Net.Bool.False, changeMadeForOpenCl, 0, new Event[] { }, out eventWriteBufferForChangeMade); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNarrow, 0, bufferForCurrentCounterMapInput); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNarrow, 1, bufferForCurrentCounterMapOutput); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNarrow, 2, bufferForChangeMade); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // set k errorCode = Cl.SetKernelArg(kernelNarrow, 3, (IntPtr)4, k); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)(inputMap.getWidth() / 2), (IntPtr)(inputMap.getLength()) }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)32, (IntPtr)4 }; errorCode = Cl.EnqueueNDRangeKernel(computeContext.commandQueue, kernelNarrow, 2, null, globalWorkSize, localWorkSize, 1, new Event[] { eventWriteBufferForChangeMade }, out eventExecutedNarrow); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new Event[] { eventExecutedNarrow }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // read back change made errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForChangeMade, OpenCL.Net.Bool.False, changeMadeForOpenCl, 0, new Event[] { }, out eventReadBufferForChangeMade); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new Event[] { eventReadBufferForChangeMade }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferForChangeMade); Cl.ReleaseEvent(eventReadBufferForChangeMade); if( changeMadeForOpenCl[0] == 0 ) { break; } // swap swapingBuffer = bufferForCurrentCounterMapInput; bufferForCurrentCounterMapInput = bufferForCurrentCounterMapOutput; bufferForCurrentCounterMapOutput = swapingBuffer; } resourceMetric.stopTimer(); Map2d<int> counterMap; counterMap = new Map2d<int>(inputMap.getWidth(), inputMap.getLength()); errorCode = Cl.EnqueueReadBuffer(computeContext.commandQueue, bufferForCurrentCounterMapOutput, OpenCL.Net.Bool.False, counterMap.unsafeGetValues(), 0, new Event[] { }, out eventReadBufferForCounterMapOutput); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.Flush(computeContext.commandQueue); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.WaitForEvents(1, new Event[] { eventReadBufferForCounterMapOutput }); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); int x, y; int[] maxArray; maxArray = new int[5]; resourceMetric.startTimer("visual", "skeletalize", "transfer result"); int[] counterMapArray = counterMap.unsafeGetValues(); bool[] resultMapArray = resultMap.unsafeGetValues(); int mapWidth = (int)inputMap.getWidth(); for (y = 1; y < inputMap.getLength() - 1; y++) { for (x = 1; x < inputMap.getWidth() - 1; x++) { int compare; compare = counterMapArray[(x) + (y) * mapWidth]; if (compare == 0) { resultMapArray[x + y * mapWidth] = false; continue; } maxArray[0] = counterMapArray[(x + 1) + (y) * mapWidth]; maxArray[1] = counterMapArray[(x - 1) + (y) * mapWidth]; maxArray[2] = counterMapArray[(x) + (y + 1) * mapWidth]; maxArray[3] = counterMapArray[(x) + (y - 1) * mapWidth]; maxArray[4] = counterMapArray[(x) + (y) * mapWidth]; if (compare == maxOfArray(ref maxArray)) { resultMapArray[x + y*mapWidth] = true; } else { resultMapArray[x + y * mapWidth] = false; } } } resourceMetric.stopTimer(); }
public void initialize(ComputeContext computeContext, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; bufferForCounterMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForCounterOutputMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForChangeMade = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, 1, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getOpenClSource() }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelNarrow = Cl.CreateKernel(program, "narrow", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNarrow, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNarrow, 5, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); }