/// <summary> /// Creates OpenCL environment on this process /// </summary> /// <param name="_MpiEnv">MPI environment of this process</param> public clEnvironment(MPIEnviroment _MpiEnv) { MpiEnv = _MpiEnv; cl_platform_id[] platforms = cl.GetPlatformIDs(); cl_platform_info_return pinfo = cl.GetPlatformInfo(platforms[0]); string vnd = pinfo.vendor; switch (vnd) { case "NVIDIA Corporation": _vendor = Vendor.NVIDIA; break; case "Advanced Micro Devices, Inc.": _vendor = Vendor.AMD; break; default: _vendor = Vendor.Other; break; } Console.WriteLine(vnd + " : " + _vendor.ToString()); Console.WriteLine(pinfo.version); if (_vendor == Vendor.AMD) { clVectorSource.source[0] = clVectorSource.source[0].Replace("cl_khr_fp64", "cl_amd_fp64"); clMatrixSource.source[0] = clMatrixSource.source[0].Replace("cl_khr_fp64", "cl_amd_fp64"); Console.WriteLine("Adjusted AMD fp64 extenstion"); } cl_device_id[] devices = cl.GetDeviceIDs(platforms[0], cl_device_type.CL_DEVICE_TYPE_GPU); int numDevices = devices.Length; if (numDevices < _MpiEnv.ProcessesOnMySMP) { throw new ApplicationException("not enougth OpenCL devices; There must be at least one OpenCL device for each MPI process;"); } device = devices[0]; context = cl.CreateContext(platforms[0], devices); //cl_device_info_return dinfo = cl.GetDeviceInfo(device); //Console.WriteLine("Max work group size: " + dinfo.max_work_group_size); //Console.WriteLine("Process " + _MpiEnv.ProcessRankOnSMP + " running on device " + dinfo.name + ", " + dinfo.version); //Console.WriteLine(dinfo.extensions); }
public static void Main() { const int cnBlockSize = 4; const int cnBlocks = 3; IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize); string sProgramSource = @" __kernel void vectorAdd(__global const float * a, __global const float * b, __global float * c) { // Vector element index int nIndex = get_global_id(0); c[nIndex] = a[nIndex] + b[nIndex]; } "; ErrorCode error; // create OpenCL device & context cl_context hContext; unsafe { hContext = CL.CreateContextFromType((ContextProperties *)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); } // query all devices available to the context IntPtr nContextDescriptorSize; CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize); cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()]; unsafe { fixed(cl_device_id *ptr = aDevices) { IntPtr ret; CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret); } } // create a command queue for first device the context reported cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error); // create & compile program cl_program hProgram; unsafe { hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); } CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero); // create kernel cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error); // allocate host vectors float[] A = new float[cnDimension.ToInt32()]; float[] B = new float[cnDimension.ToInt32()]; float[] C = new float[cnDimension.ToInt32()]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = rand.Next() % 256; B[i] = rand.Next() % 256; } // allocate device memory unsafe { fixed(float *pA = A) fixed(float *pB = B) fixed(float *pC = C) { cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC; hDeviceMemA = CL.CreateBuffer(hContext, MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), out error); hDeviceMemB = CL.CreateBuffer(hContext, MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), out error); hDeviceMemC = CL.CreateBuffer(hContext, MemFlags.MemWriteOnly, new IntPtr(cnDimension.ToInt32() * sizeof(float)), IntPtr.Zero, out error); // setup parameter values CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA)); CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB)); CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC)); // write data from host to device CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemA, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), 0, null, (IntPtr[])null); CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemB, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pB), 0, null, (IntPtr[])null); // execute kernel error = (ErrorCode)CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null); if (error != ErrorCode.Success) { throw new Exception(error.ToString()); } // copy results from device back to host IntPtr event_handle = IntPtr.Zero; error = (ErrorCode)CL.EnqueueReadBuffer(hCmdQueue, hDeviceMemC, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pC), 0, null, (IntPtr[])null); if (error != ErrorCode.Success) { throw new Exception(error.ToString()); } CL.Finish(hCmdQueue); CL.ReleaseMemObject(hDeviceMemA); CL.ReleaseMemObject(hDeviceMemB); CL.ReleaseMemObject(hDeviceMemC); } } for (int i = 0; i < A.Length; i++) { System.Diagnostics.Trace.WriteLine(String.Format("{0} + {1} = {2}", A[i], B[i], C[i])); } }
public static void Main() { const int cnBlockSize = 4; const int cnBlocks = 3; IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize); string sProgramSource = @" __kernel void vectorAdd(__global const float * a, __global const float * b, __global float * c) { // Vector element index int nIndex = get_global_id(0); c[nIndex] = a[nIndex] + b[nIndex]; } "; ErrorCode error; // create OpenCL device & context cl_context hContext; unsafe { hContext = CL.CreateContextFromType((ContextProperties*)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); } // query all devices available to the context IntPtr nContextDescriptorSize; CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize); cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()]; unsafe { fixed (cl_device_id* ptr = aDevices) { IntPtr ret; CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret); } } // create a command queue for first device the context reported cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error); // create & compile program cl_program hProgram; unsafe { hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); } CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero); // create kernel cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error); // allocate host vectors float[] A = new float[cnDimension.ToInt32()]; float[] B = new float[cnDimension.ToInt32()]; float[] C = new float[cnDimension.ToInt32()]; // initialize host memory Random rand = new Random(); for (int i = 0; i < A.Length; i++) { A[i] = rand.Next() % 256; B[i] = rand.Next() % 256; } // allocate device memory unsafe { fixed (float* pA = A) fixed (float* pB = B) fixed (float* pC = C) { cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC; hDeviceMemA = CL.CreateBuffer(hContext, MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), out error); hDeviceMemB = CL.CreateBuffer(hContext, MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), out error); hDeviceMemC = CL.CreateBuffer(hContext, MemFlags.MemWriteOnly, new IntPtr(cnDimension.ToInt32() * sizeof(float)), IntPtr.Zero, out error); // setup parameter values CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA)); CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB)); CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC)); // write data from host to device CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemA, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pA), 0, null, (IntPtr[])null); CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemB, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pB), 0, null, (IntPtr[])null); // execute kernel error = (ErrorCode)CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null); if (error != ErrorCode.Success) throw new Exception(error.ToString()); // copy results from device back to host IntPtr event_handle = IntPtr.Zero; error = (ErrorCode)CL.EnqueueReadBuffer(hCmdQueue, hDeviceMemC, true, IntPtr.Zero, new IntPtr(cnDimension.ToInt32() * sizeof(float)), new IntPtr(pC), 0, null, (IntPtr[])null); if (error != ErrorCode.Success) throw new Exception(error.ToString()); CL.Finish(hCmdQueue); CL.ReleaseMemObject(hDeviceMemA); CL.ReleaseMemObject(hDeviceMemB); CL.ReleaseMemObject(hDeviceMemC); } } for (int i = 0; i < A.Length; i++) { System.Diagnostics.Trace.WriteLine(String.Format("{0} + {1} = {2}", A[i], B[i], C[i])); } }
internal static extern IntPtr clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, [MarshalAs(UnmanagedType.I4)] out ErrorCode errcode_ret);
internal static extern ErrorCode clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
internal static extern cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
internal static extern cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
public static ErrorCode CreateSubDevicesEXT(cl_device_id in_device, byte[] properties, cl_uint num_entries, cl_device_id[] out_devices, [Out] cl_uint *num_devices) { return(OpenCLAPI.clCreateSubDevicesEXT(in_device, properties, num_entries, out_devices, num_devices)); }
public static ErrorCode RetainDeviceEXT(cl_device_id device) { return(OpenCLAPI.clRetainDeviceEXT(device)); }
public static ErrorCode ReleaseDeviceEXT(cl_device_id device) { return OpenCLAPI.clReleaseDeviceEXT(device); }