Exemple #1
0
        /// <summary>
        /// Creates OpenCL environment on this process
        /// </summary>
        /// <param name="_MpiEnv">MPI environment of this process</param>
        public clEnvironment(MPIEnviroment _MpiEnv)
        {
            MpiEnv = _MpiEnv;

            cl_platform_id[]        platforms = cl.GetPlatformIDs();
            cl_platform_info_return pinfo     = cl.GetPlatformInfo(platforms[0]);
            string vnd = pinfo.vendor;

            switch (vnd)
            {
            case "NVIDIA Corporation":
                _vendor = Vendor.NVIDIA;
                break;

            case "Advanced Micro Devices, Inc.":
                _vendor = Vendor.AMD;
                break;

            default:
                _vendor = Vendor.Other;
                break;
            }

            Console.WriteLine(vnd + " : " + _vendor.ToString());
            Console.WriteLine(pinfo.version);

            if (_vendor == Vendor.AMD)
            {
                clVectorSource.source[0] = clVectorSource.source[0].Replace("cl_khr_fp64", "cl_amd_fp64");
                clMatrixSource.source[0] = clMatrixSource.source[0].Replace("cl_khr_fp64", "cl_amd_fp64");
                Console.WriteLine("Adjusted AMD fp64 extenstion");
            }

            cl_device_id[] devices    = cl.GetDeviceIDs(platforms[0], cl_device_type.CL_DEVICE_TYPE_GPU);
            int            numDevices = devices.Length;

            if (numDevices < _MpiEnv.ProcessesOnMySMP)
            {
                throw new ApplicationException("not enougth OpenCL devices; There must be at least one OpenCL device for each MPI process;");
            }

            device  = devices[0];
            context = cl.CreateContext(platforms[0], devices);

            //cl_device_info_return dinfo = cl.GetDeviceInfo(device);
            //Console.WriteLine("Max work group size: " + dinfo.max_work_group_size);
            //Console.WriteLine("Process " + _MpiEnv.ProcessRankOnSMP + " running on device " + dinfo.name + ", " + dinfo.version);
            //Console.WriteLine(dinfo.extensions);
        }
Exemple #2
0
        public static void Main()
        {
            const int cnBlockSize    = 4;
            const int cnBlocks       = 3;
            IntPtr    cnDimension    = new IntPtr(cnBlocks * cnBlockSize);
            string    sProgramSource = @"
__kernel void
vectorAdd(__global const float * a,
          __global const float * b,
          __global       float * c)
{
    // Vector element index
    int nIndex = get_global_id(0);
    c[nIndex] = a[nIndex] + b[nIndex];
}
";

            ErrorCode error;

            // create OpenCL device & context
            cl_context hContext;

            unsafe { hContext = CL.CreateContextFromType((ContextProperties *)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); }

            // query all devices available to the context
            IntPtr nContextDescriptorSize;

            CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize);
            cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()];
            unsafe
            {
                fixed(cl_device_id *ptr = aDevices)
                {
                    IntPtr ret;

                    CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret);
                }
            }


            // create a command queue for first device the context reported
            cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error);
            // create & compile program
            cl_program hProgram;

            unsafe { hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); }
            CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero);

            // create kernel
            cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error);

            // allocate host  vectors
            float[] A = new  float[cnDimension.ToInt32()];
            float[] B = new float[cnDimension.ToInt32()];
            float[] C = new float[cnDimension.ToInt32()];
            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = rand.Next() % 256;
                B[i] = rand.Next() % 256;
            }

            // allocate device memory
            unsafe
            {
                fixed(float *pA = A)
                fixed(float *pB = B)
                fixed(float *pC = C)
                {
                    cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC;

                    hDeviceMemA = CL.CreateBuffer(hContext,
                                                  MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  new IntPtr(pA),
                                                  out error);
                    hDeviceMemB = CL.CreateBuffer(hContext,
                                                  MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  new IntPtr(pA),
                                                  out error);
                    hDeviceMemC = CL.CreateBuffer(hContext,
                                                  MemFlags.MemWriteOnly,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  IntPtr.Zero,
                                                  out error);

                    // setup parameter values
                    CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA));
                    CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB));
                    CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC));

                    // write data from host to device
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemA, true, IntPtr.Zero,
                                          new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                          new IntPtr(pA), 0, null, (IntPtr[])null);
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemB, true, IntPtr.Zero,
                                          new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                          new IntPtr(pB), 0, null, (IntPtr[])null);

                    // execute kernel
                    error = (ErrorCode)CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null);
                    if (error != ErrorCode.Success)
                    {
                        throw new Exception(error.ToString());
                    }

                    // copy results from device back to host
                    IntPtr event_handle = IntPtr.Zero;

                    error = (ErrorCode)CL.EnqueueReadBuffer(hCmdQueue, hDeviceMemC, true, IntPtr.Zero,
                                                            new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                            new IntPtr(pC), 0, null, (IntPtr[])null);
                    if (error != ErrorCode.Success)
                    {
                        throw new Exception(error.ToString());
                    }

                    CL.Finish(hCmdQueue);

                    CL.ReleaseMemObject(hDeviceMemA);
                    CL.ReleaseMemObject(hDeviceMemB);
                    CL.ReleaseMemObject(hDeviceMemC);
                }
            }

            for (int i = 0; i < A.Length; i++)
            {
                System.Diagnostics.Trace.WriteLine(String.Format("{0} + {1} = {2}", A[i], B[i], C[i]));
            }
        }
Exemple #3
0
        public static void Main()
        {
            const int cnBlockSize = 4;
            const int cnBlocks    = 3;
            IntPtr cnDimension = new IntPtr(cnBlocks * cnBlockSize);
            string sProgramSource = @"
__kernel void
vectorAdd(__global const float * a,
          __global const float * b,
          __global       float * c)
{
    // Vector element index
    int nIndex = get_global_id(0);
    c[nIndex] = a[nIndex] + b[nIndex];
}
";

            ErrorCode error;

            // create OpenCL device & context
            cl_context hContext;
            unsafe { hContext = CL.CreateContextFromType((ContextProperties*)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); }

            // query all devices available to the context
            IntPtr nContextDescriptorSize;
            CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize);
            cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()];
            unsafe
            {
                fixed (cl_device_id* ptr = aDevices)
                {
                    IntPtr ret;
                    CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret);
                }
            }
            
            
            // create a command queue for first device the context reported
            cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error);
            // create & compile program
            cl_program hProgram;
            unsafe {  hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); }
            CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero);

            // create kernel
            cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error);
            // allocate host  vectors
            float[] A = new  float[cnDimension.ToInt32()];
            float[] B = new float[cnDimension.ToInt32()];
            float[] C = new float[cnDimension.ToInt32()];
            // initialize host memory
            Random rand = new Random();
            for (int i = 0; i < A.Length; i++)
            {
                A[i] = rand.Next() % 256;
                B[i] = rand.Next() % 256;
            }
            
            // allocate device memory
            unsafe
            {
                fixed (float* pA = A)
                fixed (float* pB = B)
                fixed (float* pC = C)
                {
                    cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC;
                    hDeviceMemA = CL.CreateBuffer(hContext,
                        MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                        new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                        new IntPtr(pA),
                        out error);
                    hDeviceMemB = CL.CreateBuffer(hContext,
                       MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                       new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                       new IntPtr(pA),
                       out error);
                    hDeviceMemC = CL.CreateBuffer(hContext,
                        MemFlags.MemWriteOnly,
                        new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                        IntPtr.Zero,
                        out error);

                    // setup parameter values
                    CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA));
                    CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB));
                    CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC));

                    // write data from host to device
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemA, true, IntPtr.Zero,
                        new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                        new IntPtr(pA), 0, null, (IntPtr[])null);
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemB, true, IntPtr.Zero,
                        new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                        new IntPtr(pB), 0, null, (IntPtr[])null);

                    // execute kernel
                    error = (ErrorCode)CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null);
                    if (error != ErrorCode.Success)
                        throw new Exception(error.ToString());

                    // copy results from device back to host
                    IntPtr event_handle = IntPtr.Zero;
                    error = (ErrorCode)CL.EnqueueReadBuffer(hCmdQueue, hDeviceMemC, true, IntPtr.Zero,
                         new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                         new IntPtr(pC), 0, null, (IntPtr[])null);
                    if (error != ErrorCode.Success)
                        throw new Exception(error.ToString());

                    CL.Finish(hCmdQueue);

                    CL.ReleaseMemObject(hDeviceMemA);
                    CL.ReleaseMemObject(hDeviceMemB);
                    CL.ReleaseMemObject(hDeviceMemC);
                }
            }

            for (int i = 0; i < A.Length; i++)
            {
                System.Diagnostics.Trace.WriteLine(String.Format("{0} + {1} = {2}", A[i], B[i], C[i]));
            }
        }
Exemple #4
0
 internal static extern IntPtr clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, [MarshalAs(UnmanagedType.I4)] out ErrorCode errcode_ret);
Exemple #5
0
 internal static extern ErrorCode clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
Exemple #6
0
 internal static extern cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
Exemple #7
0
 internal static extern cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
Exemple #8
0
 public static ErrorCode CreateSubDevicesEXT(cl_device_id in_device, byte[] properties, cl_uint num_entries, cl_device_id[] out_devices, [Out] cl_uint *num_devices)
 {
     return(OpenCLAPI.clCreateSubDevicesEXT(in_device, properties, num_entries, out_devices, num_devices));
 }
Exemple #9
0
 public static ErrorCode RetainDeviceEXT(cl_device_id device)
 {
     return(OpenCLAPI.clRetainDeviceEXT(device));
 }
Exemple #10
0
		public static ErrorCode ReleaseDeviceEXT(cl_device_id device) {
			return OpenCLAPI.clReleaseDeviceEXT(device);
		}