private void CompileKernels(ExecutionContext executionContext) { var sources = SourceLoader.CreateProgramCollection(_source); _program = Cl.CreateProgramWithSource( executionContext.OpenClContext, (uint)sources.Length, sources, null, out var error ); if (error != ErrorCode.Success) { throw new NerotiqException($"Error creating program with source: {error}"); } error = Cl.BuildProgram(_program, 1, new[] { executionContext.Device }, string.Empty, null, IntPtr.Zero); if (error != ErrorCode.Success) { if (error == ErrorCode.BuildProgramFailure) { var buildInfoLog = Cl.GetProgramBuildInfo(_program, executionContext.Device, ProgramBuildInfo.Log, out var buildInfoError); throw new NerotiqException($"Error building program: {error}: {buildInfoLog}"); } throw new NerotiqException($"Error building program: {error}"); } // Get the kernels. _updateKernel = Cl.CreateKernel(_program, "update", out error); if (error != ErrorCode.Success) { throw new NerotiqException($"Error creating kernel update: {error}"); } }
/// <summary> /// /// </summary> /// <param name="sourceGpuCode">CL code to execute</param> /// <param name="methodeName">main methode name (__kernal xxxxx())</param> /// <returns></returns> public static OpenClBridge CompileProgramFromSource(string sourceGpuCode, string methodeName) { #region create program Program program = Cl.CreateProgramWithSource(Context, 1, new[] { sourceGpuCode }, null, out ErrorCode error); if (error != ErrorCode.Success) { throw new GPUException("Compile0x1", Cl.GetProgramBuildInfo(program, Device, ProgramBuildInfo.Log, out error).ToString()); } if (Cl.BuildProgram(program, 1, new[] { Device }, string.Empty, null, IntPtr.Zero) != ErrorCode.Success) { throw new GPUException("Compile0x2", Cl.GetProgramBuildInfo(program, Device, ProgramBuildInfo.Log, out error).ToString()); } if (Cl.GetProgramBuildInfo(program, Device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success) { throw new GPUException("Compile0x3", Cl.GetProgramBuildInfo(program, Device, ProgramBuildInfo.Log, out error).ToString()); } #endregion create program #region create kernal Kernel kernel = Cl.CreateKernel(program, methodeName, out error); if (error != ErrorCode.Success) { throw new GPUException("Compile0x4", error.ToString()); } return(new OpenClBridge(kernel, sourceGpuCode, methodeName)); #endregion create kernal }
public void ExternalLoopBody(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ExternalLoopBody", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hres = { 0, 1, 2, 3, 4, 5 }; // allocate device vectors Cl.Mem dres = Cl.CreateBuffer(context, Cl.MemFlags.ReadWrite | Cl.MemFlags.CopyHostPtr, (IntPtr)(sizeof(int) * hres.Length), hres, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dres)); clSafeCall(Cl.SetKernelArg(kernel, 1, hres.Length)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hres.Length), hres, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { 1, 4, 3, 6, 5, 8 }, hres); }
private Kernel CompileKernel(string kernelName) { ErrorCode error; if (!File.Exists(PROGRAM_PATH)) { throw new IOException("Program does not exist at path."); } string programSource = File.ReadAllText(PROGRAM_PATH); using (Program program = Cl.CreateProgramWithSource(context, 1, new[] { programSource }, null, out error)) { CheckErr(error, "Cl.CreateProgramWithSource"); //Compile kernel source error = Cl.BuildProgram(program, 1, new[] { device }, "-Werror", null, IntPtr.Zero); InfoBuffer log = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Log, out error); Console.WriteLine(log); CheckErr(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success) { CheckErr(error, "Cl.GetProgramBuildInfo"); } //Create the required kernel (entry function) Kernel kernel = Cl.CreateKernel(program, kernelName, out error); CheckErr(error, "Cl.CreateKernel"); return(kernel); } }
/// <summary> /// Creates a kernel from a program /// </summary> /// <param name="kernelName">the name of the kernel</param> public void CreateKernel(string kernelName) { Kernel kernel = Cl.CreateKernel(_program, kernelName, out _error); CLException.CheckException(_error); _kernels.Add(kernelName, kernel); }
public static Kernel CreateProgram(ref Program program, string methodeName) { Kernel kernel = Cl.CreateKernel(program, methodeName, out ErrorCode error); if (error != ErrorCode.Success) { throw new GPUException("Compile0x4", error.ToString()); } return(kernel); }
public static ComputeKernel CreateKernel(string name, ComputeProgram prog) { var kern = Cl.CreateKernel(prog.prog, name, out var err); ComputeKernel kernel = new ComputeKernel() { kernel = kern }; return(kernel); }
public float [] MathFunctionsSingleTest(int[] input) { if (input.Length == 0) { return(new float[0]); } var source = @"#pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void kernelCode(__global int* ___input___, __global float* ___result___) { int n0; float ___final___10; int ___flag___11; int ___id___ = get_global_id(0); n0 = ___input___[___id___]; float pi = 3.14159274f; float c = cos(((float) n0)); float s = sin(((float) n0)); float f = floor(pi); float sq = sqrt(((float) (n0 * n0))); float ex = exp(pi); float p = powr(pi, 2.0f); float a = fabs(c); float l = log(((float) n0)); ___final___10 = ((((((((f * pi) * c) * s) * sq) * ex) * p) * a) * l); ___result___[___id___] = ___final___10; } "; var output = new float[input.Length]; ErrorCode error; var a = Cl.CreateBuffer(env.Context, MemFlags.ReadOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(int)), input, out error); var b = Cl.CreateBuffer(env.Context, MemFlags.WriteOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(float)), output, out error); var max = Cl.GetDeviceInfo(env.Devices[0], DeviceInfo.MaxWorkGroupSize, out error).CastTo <uint>(); OpenCL.Net.Program program = Cl.CreateProgramWithSource(env.Context, 1u, new string[] { source }, null, out error); error = Cl.BuildProgram(program, (uint)env.Devices.Length, env.Devices, " -cl-fast-relaxed-math -cl-mad-enable ", null, IntPtr.Zero); OpenCL.Net.Kernel kernel = Cl.CreateKernel(program, "kernelCode", out error); error = Cl.SetKernelArg(kernel, 0, a); error = Cl.SetKernelArg(kernel, 1, b); Event eventID; error = Cl.EnqueueNDRangeKernel(env.CommandQueues[0], kernel, (uint)1, null, new IntPtr[] { (IntPtr)input.Length }, new IntPtr[] { (IntPtr)1 }, (uint)0, null, out eventID); env.CommandQueues[0].ReadFromBuffer(b, output); a.Dispose(); b.Dispose(); //env.Dispose(); return(output); }
public void ArrayCompare(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayCompare", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors bool[] res = { true, false, true, false }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp3 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(bool) * res.Length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dp3)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { false, true, false, true }, res); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dummy)); clSafeCall(Cl.SetKernelArg(kernel, 1, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { true, false, true, false }, res); }
public ClKernel(Context context, CommandQueue commandQueue, OpenCL.Net.Program program, string KernelName, out ErrorCode error) { this.context = context; this.commandQueue = commandQueue; origin = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; kernel = Cl.CreateKernel(program, KernelName, out error); if (error != ErrorCode.Success) { initialized = false; } else { initialized = true; } }
public Kernel(Program program, string kernel_name) : this() { if (program == Program.Null) { throw new ArgumentNullException("context"); } unsafe { int error; IntPtr str = Marshal.StringToHGlobalAnsi(kernel_name); Handle = Cl.CreateKernel(program.Handle, (byte *)str.ToPointer(), &error); Marshal.FreeHGlobal(str); ClHelper.GetError(error); } }
private static Kernel _CompileKernel(this Context context, string source, string kernelName, out string errors, string options = null) { errors = string.Empty; ErrorCode error; var devicesInfoBuffer = Cl.GetContextInfo(context, ContextInfo.Devices, out error); var devices = devicesInfoBuffer.CastToArray <Device>((devicesInfoBuffer.Size / Marshal.SizeOf(typeof(IntPtr)))); var program = Cl.CreateProgramWithSource(context, 1, new[] { source }, new[] { (IntPtr)source.Length }, out error); error = Cl.BuildProgram(program, (uint)devices.Length, devices, options == null ? string.Empty : options, null, IntPtr.Zero); if (error != ErrorCode.Success) { errors = string.Join("\n", from device in devices select Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Log, out error).ToString()); return(new Kernel()); } return(Cl.CreateKernel(program, kernelName, out error)); }
public void ArrayRefOut(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayRefOut", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hp1 = { 1 }; int[] hp2 = { 2 }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp1.Length), hp1, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp2.Length), hp2, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp1, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp1, 0, null, out clevent)); clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp2, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp2, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(5, hp1[0]); Assert.AreEqual(4, hp2[0]); }
private static void LoadKernel(string file, string name, out Program?program, out Kernel?kernel) { ErrorCode error; if (File.Exists(file)) { string programSource = File.ReadAllText(file); program = Cl.CreateProgramWithSource(context, 1, new[] { programSource }, null, out error); ErrorCheck(error, "Cl.CreateProgramWithSource"); //Compile kernel source error = Cl.BuildProgram(program.Value, 1, new[] { device }, string.Empty, null, IntPtr.Zero); ErrorCheck(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo(program.Value, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success) { ErrorCheck(error, "Cl.GetProgramBuildInfo"); Cl.ReleaseContext(context); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program.Value, device, ProgramBuildInfo.Log, out error)); Console.ReadKey(); } //Create the required kernel (entry function) kernel = Cl.CreateKernel(program.Value, name, out error); ErrorCheck(error, "Cl.CreateKernel"); } else { program = null; kernel = null; } }
public static Kernel LoadAndBuildKernel(string kernelFilePath, string kernelName) { // Attempt to read file if (!System.IO.File.Exists(kernelFilePath)) { Console.WriteLine("Program doesn't exist at path " + kernelFilePath); Console.ReadKey(); System.Environment.Exit(1); } string kernelSource = System.IO.File.ReadAllText(kernelFilePath); // Create program OpenCL.Net.Program clProgram = Cl.CreateProgramWithSource(context, 1, new[] { kernelSource }, null, out ClError); CheckErr(ClError, "CL.LoadAndBuildKernel: Cl.CreateProgramWithSource"); //Compile kernel source ClError = Cl.BuildProgram(clProgram, 1, new[] { device }, string.Empty, null, IntPtr.Zero); CheckErr(ClError, "CL.LoadAndBuildKernel: Cl.BuildProgram " + kernelFilePath); //Check for any compilation errors if (Cl.GetProgramBuildInfo(clProgram, device, ProgramBuildInfo.Status, out ClError).CastTo <BuildStatus>() != BuildStatus.Success) { CheckErr(ClError, "CL.LoadAndBuildKernel: Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(clProgram, device, ProgramBuildInfo.Log, out ClError)); Console.ReadKey(); System.Environment.Exit(1); } //Create the required kernel (entry function) Kernel kernel = Cl.CreateKernel(clProgram, kernelName, out ClError); CheckErr(ClError, "CL.LoadAndBuildKernel: Cl.CreateKernel " + kernelName); return(kernel); }
public HTTPResponse GetResponse(HTTPRequest request) { HTTPResponse response = new HTTPResponse(200); StringBuilder sb = new StringBuilder(); ErrorCode error; if (!_isInit) { init(); _isInit = true; } if (request.Method == HTTPRequest.METHOD_GET) { // Input form, this can be place by any HTML page sb.Append("<html><body>"); sb.Append(GenUploadForm()); sb.Append("</body></html>"); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); return(response); } else if (request.Method == HTTPRequest.METHOD_POST) { // Get remote image from URL string url = Uri.UnescapeDataString(request.GetRequestByKey("imageUploadUrl")); byte[] data; try { data = DownloadImageFromUrl(url); } catch (Exception) { return(new HTTPResponse(400)); } // https://www.codeproject.com/Articles/502829/GPGPU-image-processing-basics-using-OpenCL-NET // Convert image to bitmap binary Image inputImage = Image.FromStream(new MemoryStream(data)); if (inputImage == null) { return(new HTTPResponse(500)); } int imagewidth = inputImage.Width; int imageHeight = inputImage.Height; Bitmap bmpImage = new Bitmap(inputImage); BitmapData bitmapData = bmpImage.LockBits(new Rectangle(0, 0, bmpImage.Width, bmpImage.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); int inputImageByteSize = bitmapData.Stride * bitmapData.Height; byte[] inputByteArray = new byte[inputImageByteSize]; Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImageByteSize); // Load kernel source code string programPath = System.Environment.CurrentDirectory + "/Kernel.cl"; if (!System.IO.File.Exists(programPath)) { return(new HTTPResponse(404)); } string programSource = System.IO.File.ReadAllText(programPath); using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) { // Create kernel LogError(error, "Cl.CreateProgramWithSource"); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); LogError(error, "Cl.BuildProgram"); if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <OpenCL.Net.BuildStatus>() != BuildStatus.Success) { LogError(error, "Cl.GetProgramBuildInfo"); return(new HTTPResponse(404)); } Kernel kernel = Cl.CreateKernel(program, _parameters["KernelFunction"], out error); LogError(error, "Cl.CreateKernel"); // Create image memory objects OpenCL.Net.ImageFormat clImageFormat = new OpenCL.Net.ImageFormat(ChannelOrder.RGBA, ChannelType.Unsigned_Int8); IMem inputImage2DBuffer = Cl.CreateImage2D(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, clImageFormat, (IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height, (IntPtr)0, inputByteArray, out error); LogError(error, "CreateImage2D input"); byte[] outputByteArray = new byte[inputImageByteSize]; IMem outputImage2DBuffer = Cl.CreateImage2D(_context, MemFlags.CopyHostPtr | MemFlags.WriteOnly, clImageFormat, (IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height, (IntPtr)0, outputByteArray, out error); LogError(error, "CreateImage2D output"); // Set arguments int IntPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernel, 0, (IntPtr)IntPtrSize, inputImage2DBuffer); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)IntPtrSize, outputImage2DBuffer); LogError(error, "Cl.SetKernelArg"); // Create command queue CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); LogError(error, "Cl.CreateCommandQueue"); Event clevent; // Copy input image from the host to the GPU IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; IntPtr[] regionPtr = new IntPtr[] { (IntPtr)imagewidth, (IntPtr)imageHeight, (IntPtr)1 }; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)imagewidth, (IntPtr)imageHeight, (IntPtr)1 }; error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent); LogError(error, "Cl.EnqueueWriteImage"); // Run the kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent); LogError(error, "Cl.EnqueueNDRangeKernel"); // Wait for finish event error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finish"); // Read the output image back from GPU error = Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, outputByteArray, 0, null, out clevent); LogError(error, "Cl.EnqueueReadImage"); error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finih"); // Release memory Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseMemObject(inputImage2DBuffer); Cl.ReleaseMemObject(outputImage2DBuffer); // Convert binary bitmap to JPEG image and return as response GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned); IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject(); Bitmap outputBitmap = new Bitmap(imagewidth, imageHeight, bitmapData.Stride, PixelFormat.Format32bppArgb, outputBmpPointer); MemoryStream msOutput = new MemoryStream(); outputBitmap.Save(msOutput, System.Drawing.Imaging.ImageFormat.Jpeg); response.Body = msOutput.ToArray(); response.Type = "image/jpeg"; return(response); } } return(new HTTPResponse(501)); }
private float[][] Gauss_J(float[][] J_in, int width, int height) { float[][] J = new float[6][]; J[0] = new float[width * height]; J[1] = new float[width * height]; J[2] = new float[width * height]; J[3] = new float[width * height]; J[4] = new float[width * height]; J[5] = new float[width * height]; IMem <float> J1in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J2in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J3in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J4in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J5in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J6in = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, width * height * sizeof(float), out error); IMem <float> J1out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); IMem <float> J2out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); IMem <float> J3out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); IMem <float> J4out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); IMem <float> J5out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); IMem <float> J6out = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, width * height * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "localGlobalFlow2", out error); error |= Cl.SetKernelArg(_Kernel, 0, kernelSize); error |= Cl.SetKernelArg <float>(_Kernel, 1, sigma); error |= Cl.SetKernelArg <float>(_Kernel, 2, J1in); error |= Cl.SetKernelArg <float>(_Kernel, 3, J2in); error |= Cl.SetKernelArg <float>(_Kernel, 4, J3in); error |= Cl.SetKernelArg <float>(_Kernel, 5, J4in); error |= Cl.SetKernelArg <float>(_Kernel, 6, J5in); error |= Cl.SetKernelArg <float>(_Kernel, 7, J6in); error |= Cl.SetKernelArg <float>(_Kernel, 8, J1out); error |= Cl.SetKernelArg <float>(_Kernel, 9, J2out); error |= Cl.SetKernelArg <float>(_Kernel, 10, J3out); error |= Cl.SetKernelArg <float>(_Kernel, 11, J4out); error |= Cl.SetKernelArg <float>(_Kernel, 12, J5out); error |= Cl.SetKernelArg <float>(_Kernel, 13, J6out); error |= Cl.SetKernelArg(_Kernel, 14, width); error |= Cl.SetKernelArg(_Kernel, 15, height); Event _event; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(height * width) }; error = Cl.EnqueueWriteBuffer <float>(commandQueue, J1in, Bool.True, J_in[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, J2in, Bool.True, J_in[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, J3in, Bool.True, J_in[2], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, J4in, Bool.True, J_in[3], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, J5in, Bool.True, J_in[4], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, J6in, Bool.True, J_in[5], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J1out, Bool.True, 0, (width * height), J[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J2out, Bool.True, 0, (width * height), J[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J3out, Bool.True, 0, (width * height), J[2], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J4out, Bool.True, 0, (width * height), J[3], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J5out, Bool.True, 0, (width * height), J[4], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, J6out, Bool.True, 0, (width * height), J[5], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(J1in); Cl.ReleaseMemObject(J2in); Cl.ReleaseMemObject(J3in); Cl.ReleaseMemObject(J4in); Cl.ReleaseMemObject(J5in); Cl.ReleaseMemObject(J6in); Cl.ReleaseMemObject(J1out); Cl.ReleaseMemObject(J2out); Cl.ReleaseMemObject(J3out); Cl.ReleaseMemObject(J4out); Cl.ReleaseMemObject(J5out); Cl.ReleaseMemObject(J6out); Cl.ReleaseKernel(_Kernel); return(J); }
public float[][] calc_grad_rho_c(SimpleImage I0, SimpleImage I1d, FlowArray Flow) { float[][] arrays = new float[4][]; arrays[0] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[1] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[2] = new float[I0.ImageHeight * I0.ImageWidth]; arrays[3] = new float[I0.ImageHeight * I0.ImageWidth]; Mem leftImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)0, I0.ByteArray, out error); Mem rightImageMemObject = (Mem)Cl.CreateImage2D(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, SimpleImage.clImageFormat, (IntPtr)I1d.ImageWidth, (IntPtr)I1d.ImageHeight, (IntPtr)0, I1d.ByteArray, out error); IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> gradXBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> gradYBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); IMem <float> rho_cBuf = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, Flow.Height * Flow.Width * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "gradRho", out error); error |= Cl.SetKernelArg(_Kernel, 0, leftImageMemObject); error |= Cl.SetKernelArg(_Kernel, 1, rightImageMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 2, uInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 3, vInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 4, gradXBuf); error |= Cl.SetKernelArg <float>(_Kernel, 5, gradYBuf); error |= Cl.SetKernelArg <float>(_Kernel, 6, grad_2Buf); error |= Cl.SetKernelArg <float>(_Kernel, 7, rho_cBuf); error |= Cl.SetKernelArg(_Kernel, 8, I0.ImageWidth); error |= Cl.SetKernelArg(_Kernel, 9, I0.ImageHeight); Event _event; IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; IntPtr[] regionPtr = new IntPtr[] { (IntPtr)I0.ImageWidth, (IntPtr)I0.ImageHeight, (IntPtr)1 }; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(Flow.Height * Flow.Width) }; error = Cl.EnqueueWriteImage(commandQueue, leftImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I0.ByteArray, 0, null, out _event); error = Cl.EnqueueWriteImage(commandQueue, rightImageMemObject, Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, I1d.ByteArray, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, gradXBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, gradYBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, grad_2Buf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[2], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, rho_cBuf, Bool.True, 0, (Flow.Width * Flow.Height), arrays[3], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(uInputFlowMemObject); Cl.ReleaseMemObject(vInputFlowMemObject); Cl.ReleaseMemObject(leftImageMemObject); Cl.ReleaseMemObject(rightImageMemObject); Cl.ReleaseMemObject(gradXBuf); Cl.ReleaseMemObject(gradYBuf); Cl.ReleaseMemObject(grad_2Buf); Cl.ReleaseMemObject(rho_cBuf); Cl.ReleaseKernel(_Kernel); return(arrays); }
public FlowArray[] calc_P_field(FlowArray Flow, FlowArray P1, FlowArray P2) { ErrorCode error; FlowArray outputFlow1 = new FlowArray(); outputFlow1.Array = new float[2][]; outputFlow1.Width = P1.Width; outputFlow1.Height = P1.Height; outputFlow1.Array[0] = new float[outputFlow1.Width * outputFlow1.Height]; outputFlow1.Array[1] = new float[outputFlow1.Width * outputFlow1.Height]; FlowArray outputFlow2 = new FlowArray(); outputFlow2.Array = new float[2][]; outputFlow2.Width = P2.Width; outputFlow2.Height = P2.Height; outputFlow2.Array[0] = new float[outputFlow2.Width * outputFlow2.Height]; outputFlow2.Array[1] = new float[outputFlow2.Width * outputFlow2.Height]; IMem <float> uInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> vInputFlowMemObject = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, Flow.Width * Flow.Height * sizeof(float), out error); IMem <float> P11_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error); IMem <float> P12_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P1.Width * P1.Height * sizeof(float), out error); IMem <float> P21_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error); IMem <float> P22_input = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, P2.Width * P2.Height * sizeof(float), out error); IMem <float> P11_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error); IMem <float> P12_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow1.Width * outputFlow1.Height * sizeof(float), out error); IMem <float> P21_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error); IMem <float> P22_output = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, outputFlow2.Width * outputFlow2.Height * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "calcP", out error); error |= Cl.SetKernelArg <float>(_Kernel, 0, uInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 1, vInputFlowMemObject); error |= Cl.SetKernelArg <float>(_Kernel, 2, this.tau); error |= Cl.SetKernelArg <float>(_Kernel, 3, this.theta); error |= Cl.SetKernelArg <float>(_Kernel, 4, P11_input); error |= Cl.SetKernelArg <float>(_Kernel, 5, P12_input); error |= Cl.SetKernelArg <float>(_Kernel, 6, P21_input); error |= Cl.SetKernelArg <float>(_Kernel, 7, P22_input); error |= Cl.SetKernelArg <float>(_Kernel, 8, P11_output); error |= Cl.SetKernelArg <float>(_Kernel, 9, P12_output); error |= Cl.SetKernelArg <float>(_Kernel, 10, P21_output); error |= Cl.SetKernelArg <float>(_Kernel, 11, P22_output); error |= Cl.SetKernelArg(_Kernel, 12, Flow.Width); error |= Cl.SetKernelArg(_Kernel, 13, Flow.Height); Event _event; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(outputFlow1.Height * outputFlow1.Width) }; error = Cl.EnqueueWriteBuffer <float>(commandQueue, uInputFlowMemObject, Bool.True, Flow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, vInputFlowMemObject, Bool.True, Flow.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P11_input, Bool.True, P1.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P12_input, Bool.True, P1.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P21_input, Bool.True, P2.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, P22_input, Bool.True, P2.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P11_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P12_output, Bool.True, 0, (outputFlow1.Width * outputFlow1.Height), outputFlow1.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P21_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, P22_output, Bool.True, 0, (outputFlow2.Width * outputFlow2.Height), outputFlow2.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(uInputFlowMemObject); Cl.ReleaseMemObject(vInputFlowMemObject); Cl.ReleaseMemObject(P11_input); Cl.ReleaseMemObject(P12_input); Cl.ReleaseMemObject(P21_input); Cl.ReleaseMemObject(P22_input); Cl.ReleaseMemObject(P11_output); Cl.ReleaseMemObject(P12_output); Cl.ReleaseMemObject(P21_output); Cl.ReleaseMemObject(P22_output); Cl.ReleaseKernel(_Kernel); FlowArray[] OutputFlows = new FlowArray[2]; OutputFlows[0] = outputFlow1; OutputFlows[1] = outputFlow2; return(OutputFlows); }
public HTTPResponse GetResponse(HTTPRequest request) { HTTPResponse response = new HTTPResponse(200); StringBuilder sb = new StringBuilder(); ErrorCode error; if (!_isInit) { init(); _isInit = true; } if (request.Method == HTTPRequest.METHOD_GET) { sb.Append("<html><body>"); sb.Append(GenUploadForm()); sb.Append("</body></html>"); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); return(response); } else if (request.Method == HTTPRequest.METHOD_POST) { sb.Append(request.Body); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); string programPath = System.Environment.CurrentDirectory + "/Kernel.cl"; if (!System.IO.File.Exists(programPath)) { Console.WriteLine("Program doesn't exist at path " + programPath); return(new HTTPResponse(404)); } sb.Append("<html><body>"); string programSource = System.IO.File.ReadAllText(programPath); using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) { LogError(error, "Cl.CreateProgramWithSource"); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); LogError(error, "Cl.BuildProgram"); if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <OpenCL.Net.BuildStatus>() != BuildStatus.Success) { LogError(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); return(new HTTPResponse(404)); } Kernel kernel = Cl.CreateKernel(program, "answer", out error); LogError(error, "Cl.CreateKernel"); Random rand = new Random(); int[] input = (from i in Enumerable.Range(0, 100) select(int) rand.Next()).ToArray(); int[] output = new int[100]; var buffIn = _context.CreateBuffer(input, MemFlags.ReadOnly); var buffOut = _context.CreateBuffer(output, MemFlags.WriteOnly); int IntPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernel, 0, (IntPtr)IntPtrSize, buffIn); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)IntPtrSize, buffOut); LogError(error, "Cl.SetKernelArg"); CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); LogError(error, "Cl.CreateCommandQueue"); Event clevent; error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)100, (IntPtr)1 }, null, 0, null, out clevent); LogError(error, "Cl.EnqueueNDRangeKernel"); error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finih"); error = Cl.EnqueueReadBuffer(cmdQueue, buffOut, OpenCL.Net.Bool.True, 0, 100, output, 0, null, out clevent); LogError(error, "Cl.EnqueueReadBuffer"); error = Cl.Finish(cmdQueue); LogError(error, "Cl.Finih"); Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseMemObject(buffIn); Cl.ReleaseMemObject(buffOut); sb.Append("<pre>"); for (int i = 0; i != 100; i++) { sb.Append(input[i] + " % 42 = " + output[i] + "<br />"); } sb.Append("</pre>"); } sb.Append("</body></html>"); response.Body = Encoding.UTF8.GetBytes(sb.ToString()); return(response); } return(new HTTPResponse(501)); }
public void ScryptTest() { ErrorCode error; //Load and compile kernel source code. string programPath = System.Environment.CurrentDirectory + "/../../scrypt.cl"; //Cl if (!System.IO.File.Exists(programPath)) { Console.WriteLine("Program doesn't exist at path " + programPath); return; } string programSource = System.IO.File.ReadAllText(programPath); IntPtr[] sz = new IntPtr[programSource.Length * 2]; Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error); if (1 == 1) { CheckErr(error, "Cl.CreateProgramWithSource"); // status = clBuildProgram(clState->program, 1, &devices[gpu], ""-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", NULL, NULL); //Compile kernel source error = Cl.BuildProgram(program, 1, new[] { _device }, "-D LOOKUP_GAP=1 -D CONCURRENT_THREADS=1 -D WORKSIZE=1", null, IntPtr.Zero); CheckErr(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success && 1 == 0) { CheckErr(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); return; } //Create the required kernel (entry function) [search] Kernel kernel = Cl.CreateKernel(program, "search", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); //Image's RGBA data converted to an unmanaged[] array byte[] inputByteArray; //OpenCL memory buffer that will keep our image's byte[] data. Mem inputImage2DBuffer; //Create a command queue, where all of the commands for execution will be added CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); CheckErr(error, "Cl.CreateCommandQueue"); clState _clState = new clState(); _clState.cl_command_queue = cmdQueue; _clState.cl_kernel = kernel; _clState.cl_context = _context; IntPtr buffersize = new IntPtr(1024); IntPtr blank_res = new IntPtr(1024); Object thrdataRes = new Object(); //int buffersize = 1024; OpenCL.Net.Event clevent; // status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, blank_res, 0, NULL, NULL); dev_blk_ctx blk = new dev_blk_ctx(); ErrorCode err = queue_scrypt_kernel(_clState, blk); ErrorCode status = Cl.EnqueueWriteBuffer(_clState.cl_command_queue, _clState.outputBuffer, OpenCL.Net.Bool.True, new IntPtr(0), buffersize, blank_res, 0, null, out clevent); IntPtr[] globalThreads = new IntPtr[0]; IntPtr[] localThreads = new IntPtr[0]; //uint16 workdim = new uint16(1); uint workdim = 1; status = Cl.EnqueueNDRangeKernel(_clState.cl_command_queue, _clState.cl_kernel, workdim, null, globalThreads, localThreads, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); IntPtr offset = new IntPtr(0); status = Cl.EnqueueReadBuffer(_clState.cl_command_queue, _clState.outputBuffer, OpenCL.Net.Bool.False, offset, buffersize, thrdataRes, 0, null, out clevent); //Wait for completion of all calculations on the GPU. error = Cl.Finish(_clState.cl_command_queue); CheckErr(error, "Cl.Finish"); //Clean up memory Cl.ReleaseKernel(_clState.cl_kernel); Cl.ReleaseCommandQueue(_clState.cl_command_queue); } }
public void ImagingTest(string inputImagePath, string outputImagePath) { ErrorCode error; //Load and compile kernel source code. string programPath = System.Environment.CurrentDirectory + "/../../imagingtest.cl"; //The path to the source file may vary if (!System.IO.File.Exists(programPath)) { Console.WriteLine("Program doesn't exist at path " + programPath); return; } string programSource = System.IO.File.ReadAllText(programPath); using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) { CheckErr(error, "Cl.CreateProgramWithSource"); //Compile kernel source error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); CheckErr(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>() != BuildStatus.Success && 1 == 0) { CheckErr(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); return; } //Create the required kernel (entry function) Kernel kernel = Cl.CreateKernel(program, "imagingTest", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); //Image's RGBA data converted to an unmanaged[] array byte[] inputByteArray; //OpenCL memory buffer that will keep our image's byte[] data. Mem inputImage2DBuffer; OpenCL.Net.ImageFormat clImageFormat = new OpenCL.Net.ImageFormat(ChannelOrder.RGBA, ChannelType.Unsigned_Int8); int inputImgWidth, inputImgHeight; int inputImgBytesSize; int inputImgStride; //Try loading the input image using (FileStream imageFileStream = new FileStream(inputImagePath, FileMode.Open)) { System.Drawing.Image inputImage = System.Drawing.Image.FromStream(imageFileStream); if (inputImage == null) { Console.WriteLine("Unable to load input image"); return; } inputImgWidth = inputImage.Width; inputImgHeight = inputImage.Height; System.Drawing.Bitmap bmpImage = new System.Drawing.Bitmap(inputImage); //Get raw pixel data of the bitmap //The format should match the format of clImageFormat BitmapData bitmapData = bmpImage.LockBits(new Rectangle(0, 0, bmpImage.Width, bmpImage.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);//inputImage.PixelFormat); inputImgStride = bitmapData.Stride; inputImgBytesSize = bitmapData.Stride * bitmapData.Height; //Copy the raw bitmap data to an unmanaged byte[] array inputByteArray = new byte[inputImgBytesSize]; Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize); //Allocate OpenCL image memory buffer inputImage2DBuffer = (OpenCL.Net.Mem)OpenCL.Net.Cl.CreateImage2D(_context, OpenCL.Net.MemFlags.CopyHostPtr | OpenCL.Net.MemFlags.ReadOnly, clImageFormat, (IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height, (IntPtr)0, inputByteArray, out error); CheckErr(error, "Cl.CreateImage2D input"); } //Unmanaged output image's raw RGBA byte[] array byte[] outputByteArray = new byte[inputImgBytesSize]; //Allocate OpenCL image memory buffer OpenCL.Net.Mem outputImage2DBuffer = (OpenCL.Net.Mem)OpenCL.Net.Cl.CreateImage2D(_context, OpenCL.Net.MemFlags.CopyHostPtr | OpenCL.Net.MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)0, outputByteArray, out error); CheckErr(error, "Cl.CreateImage2D output"); //Pass the memory buffers to our kernel function error = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer); CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error); CheckErr(error, "Cl.CreateCommandQueue"); OpenCL.Net.Event clevent; //Copy input image from the host to the GPU. IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; //x, y, z IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, OpenCL.Net.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent); CheckErr(error, "Cl.EnqueueWriteImage"); //Execute our kernel (OpenCL code) // CommandQueue q = new OpenCL.Net.CommandQueue(); //enqueue nd range kernel // error = cmdQueue.EnqueueKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent); // OpenCL.Net.Cl.EnqueueNDRangeKernel( OpenCL.Net.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); //Wait for completion of all calculations on the GPU. error = Cl.Finish(cmdQueue); CheckErr(error, "Cl.Finish"); //Read the processed image from GPU to raw RGBA data byte[] array error = Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, OpenCL.Net.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, outputByteArray, 0, null, out clevent); CheckErr(error, "Cl.clEnqueueReadImage"); //Clean up memory Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseMemObject(inputImage2DBuffer); Cl.ReleaseMemObject(outputImage2DBuffer); //Get a pointer to our unmanaged output byte[] array GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned); IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject(); //Create a new bitmap with processed data and save it to a file. Bitmap outputBitmap = new Bitmap(inputImgWidth, inputImgHeight, inputImgStride, PixelFormat.Format32bppArgb, outputBmpPointer); outputBitmap.Save(outputImagePath, System.Drawing.Imaging.ImageFormat.Png); pinnedOutputArray.Free(); } }
public static void Initialize() { Platform[] platforms = Cl.GetPlatformIDs(out ErrorCode error); if (error != ErrorCode.Success) { Log.Print("Impossible to run OpenCL, no any graphic platform available, abording launch."); Application.Exit(); } Vector2I res = Graphics.RenderResolution; int pixelXAmount = res.x; int pixelYAmount = res.y; int amountOfObjects = 1; unsafe { inputSize = sizeof(C_CAMERA); outputSize = sizeof(byte) * pixelXAmount * pixelYAmount * 4; } UsedDevice = Cl.GetDeviceIDs(platforms[0], DeviceType.All, out error)[0]; gpu_context = Cl.CreateContext(null, 1, new Device[] { UsedDevice }, null, IntPtr.Zero, out error); InfoBuffer namebuffer = Cl.GetDeviceInfo(UsedDevice, DeviceInfo.Name, out error); Log.Print("OpenCL Running on " + namebuffer); Queue = Cl.CreateCommandQueue(gpu_context, UsedDevice, CommandQueueProperties.OutOfOrderExecModeEnable, out error); if (error != ErrorCode.Success) { Console.WriteLine("Impossible to create gpu queue, abording launch."); Application.Exit(); } CLoader.LoadProjectPaths(@".\libs", new[] { "c" }, out string[] cfiles, out string[] hfiles); Program program = CLoader.LoadProgram(cfiles, hfiles, UsedDevice, gpu_context); //Program prog = CLoader.LoadProgram(CLoader.GetCFilesDir(@".\", new[] { "cl" }).ToArray(), new[] { "headers" }, UsedDevice, gpu_context); kernel = Cl.CreateKernel(program, "rm_render_entry", out error); if (error != ErrorCode.Success) { Log.Print("Error when creating kernel: " + error.ToString()); } memory = new IntPtr(outputSize); memInput = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, inputSize, out error); memTime = (Mem)(Cl.CreateBuffer(gpu_context, MemFlags.ReadOnly, timeSize, out error)); memOutput = (Mem)Cl.CreateBuffer(gpu_context, MemFlags.WriteOnly, outputSize, out error); //GPU_PARAM param = new GPU_PARAM() { X_RESOLUTION = res.x, Y_RESOLUTION = res.y }; ////Vector3D pos = camera.Malleable.Position; //Quaternion q = camera.Malleable.Rotation; IntPtr notused; InfoBuffer local = new InfoBuffer(new IntPtr(4)); error = Cl.GetKernelWorkGroupInfo(kernel, UsedDevice, KernelWorkGroupInfo.WorkGroupSize, new IntPtr(sizeof(int)), local, out notused); if (error != ErrorCode.Success) { Log.Print("Error getting kernel workgroup info: " + error.ToString()); } //int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, memInput); Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, memTime); Cl.SetKernelArg(kernel, 4, (IntPtr)intPtrSize, memOutput); //Cl.SetKernelArg(kernel, 2, new IntPtr(4), pixelAmount * 4); workGroupSizePtr = new IntPtr[] { new IntPtr(pixelXAmount * pixelYAmount) }; }
public void PoissonJacobi() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonJacobi", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // initialize host memory uint dimX = 162; uint dimY = 122; uint N = 15000; float x0 = (float)(-0.25 * Math.PI); float y0 = (float)(-0.25 * Math.PI); float hx = 2.0f * Math.Abs(x0) / dimX; float hy = 2.0f * Math.Abs(y0) / dimY; float[] hData = new float[dimX * dimY]; uint stride = dimX; //boundary values for (uint i = 1; i < dimY - 1; i++) { uint y_idx = i * stride; float y_val = y0 + i * hy; hData[y_idx] = u(x0, y_val); hData[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val); } for (uint j = 1; j < dimX - 1; j++) { float x_val = x0 + j * hx; hData[j] = u(x_val, y0); hData[j + (dimY - 1) * stride] = u(x_val, y0 + (dimY - 1) * hy); } // allocate device vectors Cl.Mem input = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(float) * hData.Length), hData, out error); clSafeCall(error); Cl.Mem output = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(float) * hData.Length), hData, out error); clSafeCall(error); float a1 = 2 * hy / hx; float a2 = 2 * hx / hy; float a3 = a1; float a4 = a2; float a = a1 + a2 + a3 + a4; // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 2, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 3, dimX)); clSafeCall(Cl.SetKernelArg(kernel, 4, dimY)); clSafeCall(Cl.SetKernelArg(kernel, 5, stride)); clSafeCall(Cl.SetKernelArg(kernel, 6, a1)); clSafeCall(Cl.SetKernelArg(kernel, 7, a2)); clSafeCall(Cl.SetKernelArg(kernel, 8, a3)); clSafeCall(Cl.SetKernelArg(kernel, 9, a4)); clSafeCall(Cl.SetKernelArg(kernel, 10, a)); clSafeCall(Cl.SetKernelArg(kernel, 11, hx)); clSafeCall(Cl.SetKernelArg(kernel, 12, hy)); clSafeCall(Cl.SetKernelArg(kernel, 13, x0)); clSafeCall(Cl.SetKernelArg(kernel, 14, y0)); IntPtr[] lo = { (IntPtr)16, (IntPtr)16 }; IntPtr[] gl = { (IntPtr)((dimX - 2 + AREA_SIZE_X - 1) / AREA_SIZE_X * 16), (IntPtr)((dimY - 2 + AREA_SIZE_Y - 1) / AREA_SIZE_Y * 16) }; Cl.Mem curIn = input; Cl.Mem curOut = output; // execute kernel (and perform data transfering silently) clSafeCall(Cl.SetKernelArg(kernel, 0, curIn)); clSafeCall(Cl.SetKernelArg(kernel, 1, curOut)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (uint idx = 1; idx < N; idx++) { // swap buffers Cl.Mem temp = curIn; curIn = curOut; curOut = temp; // execute kernel clSafeCall(Cl.SetKernelArg(kernel, 0, curIn)); clSafeCall(Cl.SetKernelArg(kernel, 1, curOut)); clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent)); } clSafeCall(Cl.Finish(cmdQueue)); stopwatch.Stop(); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, curOut, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * hData.Length), hData, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); float avgerr = 0, maxerr = 0; for (uint i = 1; i < dimY - 1; i++) { for (uint j = 1; j < dimX - 1; j++) { float theory = u(x0 + j * hx, y0 + i * hy); float err = Math.Abs(theory - hData[j + i * stride]) / Math.Abs(theory); avgerr += err; maxerr = Math.Max(maxerr, err); } } avgerr /= dimX * dimY; long elapsedTime = stopwatch.ElapsedMilliseconds; double dataSizePerIteration = dimX * dimY * 2 * sizeof(float); double dataSizeTotal = dataSizePerIteration * N; double elapsedSeconds = elapsedTime * 0.001; double gigabyteFactor = 1 << 30; double bandwidth = dataSizeTotal / (gigabyteFactor * elapsedSeconds); Console.WriteLine("avgerr = {0} maxerr = {1} elapsedTime = {2} ms bandwidth = {3} GB/s", avgerr, maxerr, elapsedTime, bandwidth); Assert.That(maxerr, Is.LessThanOrEqualTo(5E-2F)); Assert.That(avgerr, Is.LessThanOrEqualTo(1E-2F)); }
public void MatMul() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "MatMul", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host matrices float[] A = new float[WA * HA]; float[] B = new float[WB * HB]; float[] C = new float[WC * HC]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = (float)rand.Next() / short.MaxValue; } for (int i = 0; i < B.Length; i++) { B[i] = (float)rand.Next() / short.MaxValue; } // allocate device vectors Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * A.Length), A, out error); clSafeCall(error); Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * B.Length), B, out error); clSafeCall(error); Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(float) * C.Length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA)); clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB)); clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC)); clSafeCall(Cl.SetKernelArg(kernel, 3, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 4, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null)); clSafeCall(Cl.SetKernelArg(kernel, 5, WA)); clSafeCall(Cl.SetKernelArg(kernel, 6, WB)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)WC, (IntPtr)HC }, new[] { (IntPtr)BLOCK_SIZE, (IntPtr)BLOCK_SIZE }, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * C.Length), C, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); for (int i = 0; i < HA; ++i) { for (int j = 0; j < WB; ++j) { float sum = 0; for (int k = 0; k < WA; ++k) { sum += A[i * WA + k] * B[k * WB + j]; } float err = Math.Abs((sum - C[i * WB + j]) / sum); Assert.That(err, Is.LessThanOrEqualTo(1E-3F)); } } }
private void init(string oclProgramSourcePath) { string kernelSource = File.ReadAllText(oclProgramSourcePath); string[] kernelNames = new string[] { "accumulate", "quickBlurImgH", "quickBlurImgV", "upsizeImg", "halfSizeImgH", "halfSizeImgV", "getLumaImg", "mapToGreyscaleBmp", "getContrastImg", "capHolesImg", "maxReduceImgH", "maxReduceImgV", "mapToFauxColorsBmp", "quickSpikesFilterImg", "convolveImg" }; bool gpu = true; //err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); // NVidia driver doesn't seem to support a NULL first param (properties) // http://stackoverflow.com/questions/19140989/how-to-remove-cl-invalid-platform-error-in-opencl-code // now get all the platform IDs Platform[] platforms = Cl.GetPlatformIDs(out err); assert(err, "Error: Failed to get platform ids!"); InfoBuffer deviceInfo = Cl.GetPlatformInfo(platforms[0], PlatformInfo.Name, out err); assert(err, "error retrieving platform name"); Console.WriteLine("Platform name: {0}\n", deviceInfo.ToString()); // Arbitrary, should be configurable Device[] devices = Cl.GetDeviceIDs(platforms[0], gpu ? DeviceType.Gpu : DeviceType.Cpu, out err); assert(err, "Error: Failed to create a device group!"); _device = devices[0]; // Arbitrary, should be configurable deviceInfo = Cl.GetDeviceInfo(_device, DeviceInfo.Name, out err); assert(err, "error retrieving device name"); Debug.WriteLine("Device name: {0}", deviceInfo.ToString()); deviceInfo = Cl.GetDeviceInfo(_device, DeviceInfo.ImageSupport, out err); assert(err, "error retrieving device image capability"); Debug.WriteLine("Device supports img: {0}", (deviceInfo.CastTo <Bool>() == Bool.True)); // Create a compute context // _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out err); assert(err, "Error: Failed to create a compute context!"); // Create the compute program from the source buffer // _program = Cl.CreateProgramWithSource(_context, 1, new[] { kernelSource }, new[] { (IntPtr)kernelSource.Length }, out err); assert(err, "Error: Failed to create compute program!"); // Build the program executable // err = Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); assert(err, "Error: Failed to build program executable!"); InfoBuffer buffer = Cl.GetProgramBuildInfo(_program, _device, ProgramBuildInfo.Log, out err); Debug.WriteLine("build success: {0}", buffer.CastTo <BuildStatus>() == BuildStatus.Success); foreach (string kernelName in kernelNames) { // Create the compute kernel in the program we wish to run // OpenCL.Net.Kernel kernel = Cl.CreateKernel(_program, kernelName, out err); assert(err, "Error: Failed to create compute kernel!"); _kernels.Add(kernelName, kernel); } // Create a command queue // _commandsQueue = Cl.CreateCommandQueue(_context, _device, CommandQueueProperties.None, out err); assert(err, "Error: Failed to create a command commands!"); }
private void FixImage() { Event e; ErrorCode error; OpenCL.Net.Program program = Cl.CreateProgramWithSource(context, 1, new[] { script }, null, out error); error = Cl.BuildProgram(program, 0, null, string.Empty, null, IntPtr.Zero); //MessageBox.Show(error.ToString()); Kernel kernel = Cl.CreateKernel(program, "fixImage", out error); int intPtrSize = Marshal.SizeOf(typeof(IntPtr)); Mem dest; OpenCL.Net.ImageFormat clImageFormat = new OpenCL.Net.ImageFormat(OpenCL.Net.ChannelOrder.RGBA, OpenCL.Net.ChannelType.Unsigned_Int8); int inputImgWidth, inputImgHeight; Image img = Image.FromFile(path); inputImgWidth = img.Width; inputImgHeight = img.Height; Bitmap bmp = new Bitmap(img); float[] buffer = new float[40960 * count]; float[] array = new float[] { radius, O.X, O.Y }; dest = (Mem)Cl.CreateBuffer(context, MemFlags.WriteOnly, new IntPtr(count * 40960 * sizeof(float)), out error); Mem P = (Mem)Cl.CreateBuffer(context, MemFlags.ReadWrite, Marshal.SizeOf(typeof(PointF)) * 10240 * count, out error); Mem data = (Mem)Cl.CreateBuffer(context, MemFlags.ReadOnly, Marshal.SizeOf(typeof(PointF)) * points.Count, out error); Mem ptr = (Mem)Cl.CreateBuffer(context, MemFlags.ReadOnly, 3 * sizeof(float), out error); Cl.EnqueueWriteBuffer(queue, P, Bool.True, IntPtr.Zero, new IntPtr(Marshal.SizeOf(typeof(PointF)) * 10240 * count), map.ToArray(), 0, null, out e); Cl.EnqueueWriteBuffer(queue, data, Bool.True, IntPtr.Zero, new IntPtr(Marshal.SizeOf(typeof(PointF)) * points.Count), points.ToArray(), 0, null, out e); Cl.EnqueueWriteBuffer(queue, ptr, Bool.True, IntPtr.Zero, (IntPtr)3, array, 0, null, out e); error = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, dest); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, P); error |= Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, data); error |= Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, ptr); error |= Cl.SetKernelArg(kernel, 4, (IntPtr)intPtrSize, points.Count); IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)count, (IntPtr)10240 }; Cl.EnqueueNDRangeKernel(queue, kernel, 2, null, workGroupSizePtr, null, 0, null, out e); Cl.Finish(queue); error |= Cl.EnqueueReadBuffer(queue, dest, Bool.True, IntPtr.Zero, new IntPtr(sizeof(float) * 40960 * count), buffer, 0, null, out e); Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(queue); Cl.ReleaseMemObject(dest); Cl.ReleaseMemObject(P); Cl.ReleaseMemObject(data); for (int i = 0; i < buffer.Length; i += 4) { int dx = (int)Math.Round(buffer[i]); int dy = (int)Math.Round(buffer[i + 1]); int sx = (int)Math.Round(buffer[i + 2]); int sy = (int)Math.Round(buffer[i + 3]); bool test = (sx >= 0 && sx < img.Width) && (sy >= 0 && sy < img.Height); Color src = test ? bmp.GetPixel(sx, sy) : Color.White; if ((dx >= 0 && dx < img.Width) && (dy >= 0 && dy < img.Height)) { bmp.SetPixel(dx, dy, src); } } int index = path.LastIndexOf('.'); string ext = path.Substring(index + 1); newPath = path.Substring(0, index) + $"_final.{ext}"; System.Drawing.Imaging.ImageFormat format = System.Drawing.Imaging.ImageFormat.Jpeg; if (ext == "bmp") { format = System.Drawing.Imaging.ImageFormat.Bmp; } else if (ext == "png") { format = System.Drawing.Imaging.ImageFormat.Png; } bmp.Save(newPath, format); }
public FlowArray calc_divP_Flow(float[] Idx, float[] Idy, float[] grad_2, float[] rho_c, FlowArray inFlow, FlowArray P1, FlowArray P2) { FlowArray outFlow = new FlowArray(); outFlow.Array = new float[2][]; outFlow.Width = inFlow.Width; outFlow.Height = inFlow.Height; outFlow.Array[0] = new float[outFlow.Width * outFlow.Height]; outFlow.Array[1] = new float[outFlow.Width * outFlow.Height]; IMem <float> grad_2Buf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> rho_cBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> IdxBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> IdyBuf = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> InFlow_U = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> InFlow_V = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP11 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP12 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP21 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> divP22 = Cl.CreateBuffer <float>(context, MemFlags.ReadOnly, inFlow.Width * inFlow.Height * sizeof(float), out error); IMem <float> OutFlow_U = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error); IMem <float> OutFlow_V = Cl.CreateBuffer <float>(context, MemFlags.WriteOnly, inFlow.Height * inFlow.Width * sizeof(float), out error); Kernel _Kernel = Cl.CreateKernel(program, "divP_Flow", out error); error |= Cl.SetKernelArg(_Kernel, 0, rho_cBuf); error |= Cl.SetKernelArg(_Kernel, 1, IdxBuf); error |= Cl.SetKernelArg <float>(_Kernel, 2, IdyBuf); error |= Cl.SetKernelArg <float>(_Kernel, 3, InFlow_U); error |= Cl.SetKernelArg <float>(_Kernel, 4, InFlow_V); error |= Cl.SetKernelArg <float>(_Kernel, 5, OutFlow_U); error |= Cl.SetKernelArg <float>(_Kernel, 6, OutFlow_V); error |= Cl.SetKernelArg <float>(_Kernel, 7, this.theta); error |= Cl.SetKernelArg <float>(_Kernel, 8, this.lambda); error |= Cl.SetKernelArg <float>(_Kernel, 9, grad_2Buf); error |= Cl.SetKernelArg <float>(_Kernel, 10, divP11); error |= Cl.SetKernelArg <float>(_Kernel, 11, divP12); error |= Cl.SetKernelArg <float>(_Kernel, 12, divP21); error |= Cl.SetKernelArg <float>(_Kernel, 13, divP22); error |= Cl.SetKernelArg <float>(_Kernel, 14, threshold); error |= Cl.SetKernelArg(_Kernel, 15, inFlow.Width); error |= Cl.SetKernelArg(_Kernel, 16, inFlow.Height); Event _event; IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(inFlow.Height * inFlow.Width) }; error = Cl.EnqueueWriteBuffer <float>(commandQueue, rho_cBuf, Bool.True, rho_c, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdxBuf, Bool.True, Idx, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, IdyBuf, Bool.True, Idy, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, grad_2Buf, Bool.True, grad_2, 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_U, Bool.True, inFlow.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, InFlow_V, Bool.True, inFlow.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP11, Bool.True, P1.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP12, Bool.True, P1.Array[1], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP21, Bool.True, P2.Array[0], 0, null, out _event); error = Cl.EnqueueWriteBuffer <float>(commandQueue, divP22, Bool.True, P2.Array[1], 0, null, out _event); error = Cl.EnqueueNDRangeKernel(commandQueue, _Kernel, 1, null, workGroupSizePtr, null, 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_U, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[0], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.EnqueueReadBuffer <float>(commandQueue, OutFlow_V, Bool.True, 0, (outFlow.Width * outFlow.Height), outFlow.Array[1], 0, null, out _event); error = Cl.Finish(commandQueue); Cl.ReleaseMemObject(grad_2Buf); Cl.ReleaseMemObject(rho_cBuf); Cl.ReleaseMemObject(IdxBuf); Cl.ReleaseMemObject(IdyBuf); Cl.ReleaseMemObject(InFlow_U); Cl.ReleaseMemObject(InFlow_V); Cl.ReleaseMemObject(divP11); Cl.ReleaseMemObject(divP12); Cl.ReleaseMemObject(divP21); Cl.ReleaseMemObject(divP22); Cl.ReleaseMemObject(OutFlow_U); Cl.ReleaseMemObject(OutFlow_V); Cl.ReleaseKernel(_Kernel); return(outFlow); }
public void VecAdd() { if (!prepared) { Prepare(this.BuildIR().InlineIR()); prepared = true; } // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "VecAdd", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); int length = 1 << 10; // allocate host vectors float[] A = new float[length]; float[] B = new float[length]; float[] C = new float[length]; // initialize host memory Random rand = new Random(); for (int i = 0; i < length; i++) { A[i] = (float)rand.Next() / short.MaxValue; B[i] = (float)rand.Next() / short.MaxValue; } // allocate device vectors Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * length), A, out error); clSafeCall(error); Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * length), B, out error); clSafeCall(error); Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(float) * length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA)); clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB)); clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC)); clSafeCall(Cl.SetKernelArg(kernel, 3, length)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)length }, new[] { (IntPtr)256 }, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(float) * length), C, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); for (int i = 0; i < length; i++) { float sum = A[i] + B[i]; float err = Math.Abs((sum - C[i]) / sum); Assert.That(err, Is.LessThanOrEqualTo(1E-3F)); } }
public void ProgramAndKernelTests() { const string correctSource = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid]; } "; const string sourceWithErrors = @" // Erroneous kernel __kernel void add_array(__global float *a, __global float *b, __global float *c) { foo(); // <-- Error right here! int xid = get_global_id(0); c[xid] = a[xid] + b[xid]; }"; ErrorCode error; using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { sourceWithErrors }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreNotEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Error); Console.WriteLine("There were error(s) compiling the provided kernel"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error)); } using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error)) { Assert.AreEqual(error, ErrorCode.Success); error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); Assert.AreEqual(ErrorCode.Success, error); Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success); // Try to get information from the program Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Context, out error).CastTo <Context>(), _context); Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.NumDevices, out error).CastTo <int>(), 1); Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Devices, out error).CastTo <Device>(0), _device); Console.WriteLine("Program source was:"); Console.WriteLine(Cl.GetProgramInfo(program, ProgramInfo.Source, out error)); Kernel kernel = Cl.CreateKernel(program, "add_array", out error); Assert.AreEqual(error, ErrorCode.Success); kernel.Dispose(); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); Assert.AreEqual(error, ErrorCode.Success); Assert.AreEqual(kernels.Length, 2); Assert.AreEqual("add_array", Cl.GetKernelInfo(kernels[0], KernelInfo.FunctionName, out error).ToString()); Assert.AreEqual("sub_array", Cl.GetKernelInfo(kernels[1], KernelInfo.FunctionName, out error).ToString()); } }