public Kernel SetArgumentMemory(Memory val) { #if DELAY_COMPILE if (!Initialized) { Initialized = true; kern = Device.GetDevice().env.Context.CompileKernelFromSource(SourceCode, Name, "-cl-unsafe-math-optimizations"); } #endif if (PendingExecution.IsValid()) { Device.GetDevice().HandleEvent(); } if (val == null) { return(this); } if (reset) { chain = kern.SetKernelArg((IMem)val.buf); reset = false; } else { chain = chain.SetKernelArg((IMem)val.buf); } return(this); }
public Kernel SetArgument <T>(T val) where T : struct, IComparable { #if DELAY_COMPILE if (!Initialized) { Initialized = true; kern = Device.GetDevice().env.Context.CompileKernelFromSource(SourceCode, Name, "-cl-unsafe-math-optimizations"); } #endif if (PendingExecution.IsValid()) { Device.GetDevice().HandleEvent(); } if (reset) { chain = kern.SetKernelArg(val); reset = false; } else { chain = chain.SetKernelArg(val); } return(this); }
public float [] MathFunctionsSingleTest(int[] input) { if (input.Length == 0) { return(new float[0]); } var source = @"#pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void kernelCode(__global int* ___input___, __global float* ___result___) { int n0; float ___final___10; int ___flag___11; int ___id___ = get_global_id(0); n0 = ___input___[___id___]; float pi = 3.14159274f; float c = cos(((float) n0)); float s = sin(((float) n0)); float f = floor(pi); float sq = sqrt(((float) (n0 * n0))); float ex = exp(pi); float p = powr(pi, 2.0f); float a = fabs(c); float l = log(((float) n0)); ___final___10 = ((((((((f * pi) * c) * s) * sq) * ex) * p) * a) * l); ___result___[___id___] = ___final___10; } "; var output = new float[input.Length]; ErrorCode error; var a = Cl.CreateBuffer(env.Context, MemFlags.ReadOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(int)), input, out error); var b = Cl.CreateBuffer(env.Context, MemFlags.WriteOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(float)), output, out error); var max = Cl.GetDeviceInfo(env.Devices[0], DeviceInfo.MaxWorkGroupSize, out error).CastTo <uint>(); OpenCL.Net.Program program = Cl.CreateProgramWithSource(env.Context, 1u, new string[] { source }, null, out error); error = Cl.BuildProgram(program, (uint)env.Devices.Length, env.Devices, " -cl-fast-relaxed-math -cl-mad-enable ", null, IntPtr.Zero); OpenCL.Net.Kernel kernel = Cl.CreateKernel(program, "kernelCode", out error); error = Cl.SetKernelArg(kernel, 0, a); error = Cl.SetKernelArg(kernel, 1, b); Event eventID; error = Cl.EnqueueNDRangeKernel(env.CommandQueues[0], kernel, (uint)1, null, new IntPtr[] { (IntPtr)input.Length }, new IntPtr[] { (IntPtr)1 }, (uint)0, null, out eventID); env.CommandQueues[0].ReadFromBuffer(b, output); a.Dispose(); b.Dispose(); //env.Dispose(); return(output); }
public void SquareArray(float[] array) { Cl.ErrorCode error; //Create the required kernel (entry function) Cl.Kernel kernel = Cl.Cl.CreateKernel(_program, "square_array", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); var arrayBuffer = Cl.Cl.CreateBuffer <float>(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, array, out error); CheckErr(error, "Cl.CreateBuffer plaintext_bytes"); //Pass the memory buffers to our kernel function error = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, arrayBuffer); CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error); CheckErr(error, "Cl.CreateCommandQueue"); Cl.Event clevent; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)array.Length }; //Execute our kernel (OpenCL code) error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, workGroupSizePtr, null, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); //Wait for completion of all calculations on the GPU. error = Cl.Cl.Finish(cmdQueue); CheckErr(error, "Cl.Finish"); // Read the buffer from memory error = Cl.Cl.EnqueueReadBuffer(cmdQueue, arrayBuffer, Cl.Bool.True, array, 0, null, out clevent); CheckErr(error, "Cl.EnqueueReadBuffer"); //Clean up memory Cl.Cl.ReleaseKernel(kernel); Cl.Cl.ReleaseCommandQueue(cmdQueue); Cl.Cl.ReleaseMemObject(arrayBuffer); }
private void init(string oclProgramSourcePath) { string kernelSource = File.ReadAllText(oclProgramSourcePath); string[] kernelNames = new string[] { "accumulate", "quickBlurImgH", "quickBlurImgV", "upsizeImg", "halfSizeImgH", "halfSizeImgV", "getLumaImg", "mapToGreyscaleBmp", "getContrastImg", "capHolesImg", "maxReduceImgH", "maxReduceImgV", "mapToFauxColorsBmp", "quickSpikesFilterImg", "convolveImg" }; bool gpu = true; //err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); // NVidia driver doesn't seem to support a NULL first param (properties) // http://stackoverflow.com/questions/19140989/how-to-remove-cl-invalid-platform-error-in-opencl-code // now get all the platform IDs Platform[] platforms = Cl.GetPlatformIDs(out err); assert(err, "Error: Failed to get platform ids!"); InfoBuffer deviceInfo = Cl.GetPlatformInfo(platforms[0], PlatformInfo.Name, out err); assert(err, "error retrieving platform name"); Console.WriteLine("Platform name: {0}\n", deviceInfo.ToString()); // Arbitrary, should be configurable Device[] devices = Cl.GetDeviceIDs(platforms[0], gpu ? DeviceType.Gpu : DeviceType.Cpu, out err); assert(err, "Error: Failed to create a device group!"); _device = devices[0]; // Arbitrary, should be configurable deviceInfo = Cl.GetDeviceInfo(_device, DeviceInfo.Name, out err); assert(err, "error retrieving device name"); Debug.WriteLine("Device name: {0}", deviceInfo.ToString()); deviceInfo = Cl.GetDeviceInfo(_device, DeviceInfo.ImageSupport, out err); assert(err, "error retrieving device image capability"); Debug.WriteLine("Device supports img: {0}", (deviceInfo.CastTo <Bool>() == Bool.True)); // Create a compute context // _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out err); assert(err, "Error: Failed to create a compute context!"); // Create the compute program from the source buffer // _program = Cl.CreateProgramWithSource(_context, 1, new[] { kernelSource }, new[] { (IntPtr)kernelSource.Length }, out err); assert(err, "Error: Failed to create compute program!"); // Build the program executable // err = Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); assert(err, "Error: Failed to build program executable!"); InfoBuffer buffer = Cl.GetProgramBuildInfo(_program, _device, ProgramBuildInfo.Log, out err); Debug.WriteLine("build success: {0}", buffer.CastTo <BuildStatus>() == BuildStatus.Success); foreach (string kernelName in kernelNames) { // Create the compute kernel in the program we wish to run // OpenCL.Net.Kernel kernel = Cl.CreateKernel(_program, kernelName, out err); assert(err, "Error: Failed to create compute kernel!"); _kernels.Add(kernelName, kernel); } // Create a command queue // _commandsQueue = Cl.CreateCommandQueue(_context, _device, CommandQueueProperties.None, out err); assert(err, "Error: Failed to create a command commands!"); }
private void CrackImpl(byte[] plaintext_bytes, byte[] plaintext_lengths, byte[] target, out byte[] match) { Cl.ErrorCode error; match = null; //Create the required kernel (entry function) Cl.Kernel kernel = Cl.Cl.CreateKernel(program, "md5", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); var plaintextBytesBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)plaintext_bytes.Length, plaintext_bytes, out error); CheckErr(error, "Cl.CreateBuffer plaintext_bytes"); var plaintextLengthsBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)plaintext_lengths.Length, plaintext_lengths, out error); CheckErr(error, "Cl.CreateBuffer plaintext_lengths"); var targetBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)KEY_LENGTH, target, out error); CheckErr(error, "Cl.CreateBuffer target"); match = new byte[KEY_LENGTH]; var matchBuffer = Cl.Cl.CreateBuffer <byte>(_context, Cl.MemFlags.WriteOnly | Cl.MemFlags.CopyHostPtr, match, out error); CheckErr(error, "Cl.CreateBuffer match"); //Pass the memory buffers to our kernel function error = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, plaintextBytesBuffer); error |= Cl.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, plaintextLengthsBuffer); error |= Cl.Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, targetBuffer); error |= Cl.Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, matchBuffer); CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error); CheckErr(error, "Cl.CreateCommandQueue"); Cl.Event clevent; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(plaintext_bytes.Length / KEY_LENGTH) }; //Execute our kernel (OpenCL code) error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, workGroupSizePtr, null, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); //Wait for completion of all calculations on the GPU. error = Cl.Cl.Finish(cmdQueue); CheckErr(error, "Cl.Finish"); error = Cl.Cl.EnqueueReadBuffer(cmdQueue, matchBuffer, Cl.Bool.True, match, 0, null, out clevent); CheckErr(error, "Cl.EnqueueReadBuffer"); //Clean up memory Cl.Cl.ReleaseKernel(kernel); Cl.Cl.ReleaseCommandQueue(cmdQueue); Cl.Cl.ReleaseMemObject(plaintextBytesBuffer); Cl.Cl.ReleaseMemObject(plaintextLengthsBuffer); Cl.Cl.ReleaseMemObject(targetBuffer); Cl.Cl.ReleaseMemObject(matchBuffer); }
public void initialize(ComputeContext computeContext, int kernelPositionsLength, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; OpenCL.Net.Event eventWriteBufferCompletedKernel; this.kernelPositionsLength = kernelPositionsLength; kernelResults = new float[kernelPositionsLength]; bufferForPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, 2 * kernelPositionsLength, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForInputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); OpenCL.Net.IMem<float> bufferForKernel = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (kernelWidth * kernelWidth), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForKernelResults = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, kernelPositionsLength, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForKernel, OpenCL.Net.Bool.True, this.kernelArray, 0, null, out eventWriteBufferCompletedKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getProgramSource(inputMapSize.x, kernelPositionsLength) }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernel = Cl.CreateKernel(program, "kernel0", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 0, bufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 1, bufferForKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernel, 2, bufferForPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernel, 3, bufferForKernelResults); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferCompletedKernel); }
public void initialize(ComputeContext computeContext, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; bufferForCounterMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForCounterOutputMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForChangeMade = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, 1, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getOpenClSource() }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelNarrow = Cl.CreateKernel(program, "narrow", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNarrow, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNarrow, 5, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); }
public void initialize(ComputeContext computeContext, int kernelRadius, Misc.Vector2<int> inputMapSize) { ErrorCode errorCode; float[] kernelArray; OpenCL.Net.Event eventWriteBufferCompletedKernel; bufferForInputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForTemporary = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForOutputMap = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); kernelArray = calculateKernel(kernelRadius); bufferForKernelArray = Cl.CreateBuffer<float>(computeContext.context, MemFlags.AllocHostPtr, kernelArray.Length, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); // copy kernel into buffer errorCode = Cl.EnqueueWriteBuffer<float>(computeContext.commandQueue, bufferForKernelArray, OpenCL.Net.Bool.True, kernelArray, 0, null, out eventWriteBufferCompletedKernel); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); string programLocation = Assembly.GetEntryAssembly().Location; string pathToLoad = Path.Combine(Path.GetDirectoryName(programLocation), "..\\..\\", "ComputationBackend\\OpenCl\\src\\Blur.cl"); string openClSource = File.ReadAllText(pathToLoad); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { openClSource }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelBlurX = Cl.CreateKernel(program, "blurX", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurX, 0, bufferForInputMap); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurX, 1, bufferForKernelArray); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurX, 2, bufferForTemporary); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurX, 3, (IntPtr)4, kernelRadius); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurX, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); kernelBlurY = Cl.CreateKernel(program, "blurY", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurY, 0, bufferForTemporary); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<float>(kernelBlurY, 1, bufferForKernelArray); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); //errorCode = Cl.SetKernelArg<float>(kernelBlurY, 2, bufferForOutputMap); //ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 3, (IntPtr)4, kernelRadius); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 4, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelBlurY, 5, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferCompletedKernel); }
public static ErrorCode SetKernelArg(Kernel kernel, uint argIndex, sbyte value) { return(SetKernelArg(kernel, argIndex, sizeof(sbyte), value)); }
public static ErrorCode SetKernelArg(Kernel kernel, uint argIndex, ulong value) { return(SetKernelArg(kernel, argIndex, sizeof(ulong), value)); }
public static ErrorCode SetKernelArg(Kernel kernel, uint argIndex, Mem value) { return(SetKernelArg(kernel, argIndex, IntPtr.Size, value)); }
public static InfoBuffer GetKernelWorkGroupInfo(Kernel kernel, Device device, KernelWorkGroupInfo paramName, out ErrorCode error) { return(GetInfo(GetKernelWorkGroupInfo, kernel, device, paramName, out error)); }
public static InfoBuffer GetKernelInfo(Kernel kernel, KernelInfo paramName, out ErrorCode error) { return(GetInfo(GetKernelInfo, kernel, paramName, out error)); }
public int[] ClosestPoints(double[,] source_points, double[,] target_points) { System.Diagnostics.Trace.Assert(source_points.GetLength(1) == target_points.GetLength(1)); int[] ans = new int[source_points.GetLength(0)]; CL.ErrorCode error; byte[] source_points_byte_array = new byte[Marshal.SizeOf(typeof(double)) * source_points.Length]; byte[] target_points_byte_array = new byte[Marshal.SizeOf(typeof(double)) * target_points.Length]; byte[] dims_byte_array = new byte[Marshal.SizeOf(typeof(int)) * 3]; byte[] output_byte_array = new byte[Marshal.SizeOf(typeof(int)) * ans.Length]; int[] dims = new[] { source_points.GetLength(1), source_points.GetLength(0), target_points.GetLength(0) }; MyCL.memcpy(ref source_points, ref source_points_byte_array); MyCL.memcpy(ref target_points, ref target_points_byte_array); MyCL.memcpy(ref dims, ref dims_byte_array); lock (cl_lock) { if (!program_initialized) { program_initialized = true; string programPath = Path.Combine(Environment.CurrentDirectory, "../../ClosestPoints.cl"); if (!File.Exists(programPath)) { throw new Exception("Program doesn't exist at path " + programPath); } string programSource = System.IO.File.ReadAllText(programPath); program = CL.Cl.CreateProgramWithSource(MyCL.context, 1, new[] { programSource }, null, out error); MyCL.CheckErr(error, "Cl.CreateProgramWithSource"); error = CL.Cl.BuildProgram(program, 1, new[] { MyCL.device }, string.Empty, null, IntPtr.Zero); MyCL.CheckErr(error, "Cl.BuildProgram"); if (CL.Cl.GetProgramBuildInfo(program, MyCL.device, CL.ProgramBuildInfo.Status, out error).CastTo <CL.BuildStatus>() != CL.BuildStatus.Success) { MyCL.CheckErr(error, "Cl.GetProgramBuildInfo"); throw new Exception($"Cl.GetProgramBuildInfo != Success\r\n{CL.Cl.GetProgramBuildInfo(program, MyCL.device, CL.ProgramBuildInfo.Log, out error)}"); } } using (CL.Kernel kernel = CL.Cl.CreateKernel(program, "weighted_sum", out error)) { MyCL.CheckErr(error, "Cl.CreateKernel"); //OpenCL memory buffer that will keep our image's byte[] data. using (CL.IMem source_points_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, source_points_byte_array, out CL.ErrorCode err1), target_points_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, target_points_byte_array, out CL.ErrorCode err2), dims_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, dims_byte_array, out CL.ErrorCode err3), output_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.WriteOnly, output_byte_array, out CL.ErrorCode err4)) { MyCL.CheckErr(err1, "Cl.CreateBuffer source_points"); MyCL.CheckErr(err2, "Cl.CreateBuffer target_points"); MyCL.CheckErr(err3, "Cl.CreateBuffer dims"); MyCL.CheckErr(err4, "Cl.CreateBuffer output"); int intPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = CL.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, source_points_buffer) | CL.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, target_points_buffer) | CL.Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, dims_buffer) | CL.Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, output_buffer); MyCL.CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added using (CL.CommandQueue cmdQueue = CL.Cl.CreateCommandQueue(MyCL.context, MyCL.device, (CL.CommandQueueProperties) 0, out error)) { MyCL.CheckErr(error, "Cl.CreateCommandQueue"); CL.Event clevent; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)source_points.GetLength(0) }; error = CL.Cl.EnqueueNDRangeKernel( cmdQueue, kernel, 1, null,//not used workGroupSizePtr, null, 0, null, out clevent); CL.Cl.ReleaseEvent(clevent); MyCL.CheckErr(error, "Cl.EnqueueNDRangeKernel"); error = CL.Cl.Finish(cmdQueue); MyCL.CheckErr(error, "Cl.Finish"); error = CL.Cl.EnqueueReadBuffer(cmdQueue, output_buffer, CL.Bool.True, 0, Marshal.SizeOf(typeof(byte)) * output_byte_array.Length, output_byte_array, 0, null, out clevent); CL.Cl.ReleaseEvent(clevent); MyCL.CheckErr(error, "Cl.EnqueueReadBuffer"); MyCL.memcpy(ref output_byte_array, ref ans); //CL.Cl.ReleaseCommandQueue(cmdQueue); } //CL.Cl.ReleaseMemObject(data_buffer); //CL.Cl.ReleaseMemObject(offsets_x_buffer); //CL.Cl.ReleaseMemObject(offsets_y_buffer); //CL.Cl.ReleaseMemObject(weights_buffer); //CL.Cl.ReleaseMemObject(dims_buffer); //CL.Cl.ReleaseMemObject(output_buffer); } //CL.Cl.ReleaseKernel(kernel); } } return(ans); }
public void initialize(ComputeContext computeContext, int searchRadius, Misc.Vector2<int> inputMapSize) { List<Vector2<int>> relativePositions; ErrorCode errorCode; int[] relativePositionsArray; OpenCL.Net.Event eventWriteBufferForRelativePositions; relativePositions = calculateRelativePositionsForRadius(searchRadius); numberOfAllocatedInputAndOutputPositions = 50000; bufferForInputMap = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, (int)(inputMapSize.x * inputMapSize.y), out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForRelativePositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, relativePositions.Count * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); relativePositionsArray = convertRelativePositionsToArray(relativePositions); // copy relative positions into buffer errorCode = Cl.EnqueueWriteBuffer<int>(computeContext.commandQueue, bufferForRelativePositions, OpenCL.Net.Bool.True, relativePositionsArray, 0, new Event[] { }, out eventWriteBufferForRelativePositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForInputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForOutputPositions = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions * 2, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); bufferForFoundNewPosition = Cl.CreateBuffer<int>(computeContext.context, MemFlags.AllocHostPtr, numberOfAllocatedInputAndOutputPositions, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); program = Cl.CreateProgramWithSource(computeContext.context, 1, new[] { getOpenClSource() }, null, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.BuildProgram(program, 1, new[] { computeContext.chosenDevice }, "", null, IntPtr.Zero); if (errorCode != ErrorCode.Success) { OpenCL.Net.InfoBuffer logInfoBuffer = Cl.GetProgramBuildInfo(program, computeContext.chosenDevice, ProgramBuildInfo.Log, out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); throw new ComputeContext.OpenClError(); } kernelNearestPoint = Cl.CreateKernel(program, "findNearestPoint", out errorCode); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 0, bufferForInputMap); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 1, bufferForRelativePositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 2, bufferForInputPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 3, bufferForOutputPositions); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg<int>(kernelNearestPoint, 4, bufferForFoundNewPosition); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 5, (IntPtr)4, relativePositions.Count); // number of relative positions ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 6, (IntPtr)4, inputMapSize.x); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); errorCode = Cl.SetKernelArg(kernelNearestPoint, 7, (IntPtr)4, inputMapSize.y); ComputeContext.throwErrorIfNotSuccessfull(errorCode, ""); Cl.ReleaseEvent(eventWriteBufferForRelativePositions); }
public void Process(int[] inputArray, int inputImgWidth, int inputImgHeight, double threshold, out int[] outputArray) { Cl.ErrorCode error; outputArray = null; //Create the required kernel (entry function) Cl.Kernel kernel = Cl.Cl.CreateKernel(program, "sobelEdgeDetect", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); //OpenCL memory buffer that will keep our image's byte[] data. Cl.IMem inputImage2DBuffer; Cl.ImageFormat clImageFormat = new Cl.ImageFormat(Cl.ChannelOrder.RGBA, Cl.ChannelType.Unsigned_Int8); //Copy the raw bitmap data to an unmanaged byte[] array //inputByteArray = new byte[inputImgBytesSize]; //Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize); //Allocate OpenCL image memory buffer inputImage2DBuffer = Cl.Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, clImageFormat, (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)0, inputArray, out error); CheckErr(error, "Cl.CreateImage2D input"); //Unmanaged output image's raw RGBA byte[] array outputArray = new int[inputArray.Length]; //Allocate OpenCL image memory buffer Cl.IMem outputImage2DBuffer = Cl.Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)0, outputArray, out error); CheckErr(error, "Cl.CreateImage2D output"); //Pass the memory buffers to our kernel function error = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer); error |= Cl.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer); error |= Cl.Cl.SetKernelArg(kernel, 2, (float)threshold / 250.0f); CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error); CheckErr(error, "Cl.CreateCommandQueue"); Cl.Event clevent; //Copy input image from the host to the GPU. IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; //x, y, z IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; error = Cl.Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputArray, 0, null, out clevent); CheckErr(error, "Cl.EnqueueWriteImage"); //Execute our kernel (OpenCL code) error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); //Wait for completion of all calculations on the GPU. error = Cl.Cl.Finish(cmdQueue); CheckErr(error, "Cl.Finish"); //Read the processed image from GPU to raw RGBA data byte[] array error = Cl.Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, outputArray, 0, null, out clevent); CheckErr(error, "Cl.clEnqueueReadImage"); //Clean up memory Cl.Cl.ReleaseKernel(kernel); Cl.Cl.ReleaseCommandQueue(cmdQueue); Cl.Cl.ReleaseMemObject(inputImage2DBuffer); Cl.Cl.ReleaseMemObject(outputImage2DBuffer); }