Пример #1
0
        private void init(IPartitioning p, cl_program program)
        {
            clscale     = cl.CreateKernel(program, "scale");
            clacc       = cl.CreateKernel(program, "acc");
            clmew       = cl.CreateKernel(program, "mew");
            cldnrm2     = cl.CreateKernel(program, "dnrm2");
            clinnerprod = cl.CreateKernel(program, "innerprod");

            size       = p.LocalLength;
            globalsize = size;
            int m = globalsize % localsize;

            if (m > 0)
            {
                globalsize += localsize - m;
            }

            globalsizehalf = globalsize / 2;
            m = globalsizehalf % localsize;
            if (m > 0)
            {
                globalsizehalf += localsize - m;
            }

            groups = globalsizehalf / localsize;
        }
Пример #2
0
        /// <summary>
        /// Create matrix
        /// </summary>
        /// <param name="M">Original matrix</param>
        /// <param name="device">Corresponding OpenCL device</param>
        /// <param name="kernelName">Name of the kernel function</param>
        public clMatrix(MsrMatrix M, clDevice device, string kernelName)
            : base(M)
        {
            this.device = device;
            base.PackMatrix(M);
            this.clmultiply = cl.CreateKernel(device.matrixProgram, kernelName);
            this.claccext   = cl.CreateKernel(device.matrixProgram, "accumulateExternal");
            disposed        = false;

            LMAA();

            if (extSize > 0)
            {
                extglobalsize = extSize;
                int m = extSize % extlocalsize;
                if (m > 0)
                {
                    extglobalsize += extlocalsize - m;
                }

                h_ElementsToAcc = Marshal.AllocHGlobal(extSize * sizeof(double));
                d_ElementsToAcc = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(double));

                d_IndicesToAccumulate = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(int));
                cl.EnqueueWriteBuffer(device.cq, d_IndicesToAccumulate, true, 0, (uint)extSize * sizeof(int), h_IndicesToAccumulate);
            }
        }
Пример #3
0
            internal clCommVector(MatrixBase M, clVector v)
                : base(M, v)
            {
                this.owner = v;

                clfill = cl.CreateKernel(owner.device.vectorProgram, "fillSendBuffer");

                IDictionary <int, int[]> comLists = M._SpmvCommPattern.ComLists;
                //int[] procranks = new int[comLists.Count]; // put all proccessor ranks in one list to have a unique ordering

                int totLen = 0;

                foreach (int procRnk in comLists.Keys)
                {
                    int l = comLists[procRnk].Length;
                    base.SendBuffersLengths[procRnk] = l;
                    totLen += l;
                }

                size       = totLen;
                globalsize = size;
                int m = size % localsize;

                if (m > 0)
                {
                    globalsize += localsize - m;
                }

                if (size > 0)
                {
                    // alloc
                    h_IndicesToSend = new int[size];
                    d_IndicesToSend = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)size * sizeof(int));

                    h_SendBuffer = Marshal.AllocHGlobal(size * sizeof(double));
                    d_SendBuffer = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_WRITE_ONLY, (uint)size * sizeof(double));

                    // concat lists:
                    int i0 = 0;
                    unsafe
                    {
                        double *P0 = (double *)h_SendBuffer;

                        foreach (int procRnk in comLists.Keys)
                        {
                            base.SendBuffers[procRnk] = (IntPtr)P0;  // startaddres for sending to process 'procRnk'

                            int l = base.SendBuffersLengths[procRnk];
                            P0 += l;
                            Array.Copy(comLists[procRnk], 0, h_IndicesToSend, i0, l); // concat comm list
                            i0 += l;
                        }
                    }

                    cl.EnqueueWriteBuffer(owner.device.cq, d_IndicesToSend, true, 0, (uint)size * sizeof(int), h_IndicesToSend);
                }
            }
Пример #4
0
 internal static extern ErrorCode clEnqueueNDRangeKernel(
     cl_command_queue command_queue,
     cl_kernel kernel,
     cl_uint work_dim,
     [In][MarshalAs(UnmanagedType.LPArray)] IntPtr[] global_work_offset,
     [In][MarshalAs(UnmanagedType.LPArray)] IntPtr[] global_work_size,
     [In][MarshalAs(UnmanagedType.LPArray)] IntPtr[] local_work_size,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
Пример #5
0
 internal static extern ErrorCode clEnqueueNDRangeKernel(
     cl_command_queue command_queue,
     cl_kernel kernel,
     cl_uint work_dim,
     IntPtr *global_work_offset,
     IntPtr *global_work_size,
     IntPtr *local_work_size,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
Пример #6
0
        public static void Main()
        {
            const int cnBlockSize    = 4;
            const int cnBlocks       = 3;
            IntPtr    cnDimension    = new IntPtr(cnBlocks * cnBlockSize);
            string    sProgramSource = @"
__kernel void
vectorAdd(__global const float * a,
          __global const float * b,
          __global       float * c)
{
    // Vector element index
    int nIndex = get_global_id(0);
    c[nIndex] = a[nIndex] + b[nIndex];
}
";

            ErrorCode error;

            // create OpenCL device & context
            cl_context hContext;

            unsafe { hContext = CL.CreateContextFromType((ContextProperties *)null, DeviceTypeFlags.DeviceTypeDefault, IntPtr.Zero, IntPtr.Zero, &error); }

            // query all devices available to the context
            IntPtr nContextDescriptorSize;

            CL.GetContextInfo(hContext, ContextInfo.ContextDevices, IntPtr.Zero, IntPtr.Zero, out nContextDescriptorSize);
            cl_device_id[] aDevices = new cl_device_id[nContextDescriptorSize.ToInt32()];
            unsafe
            {
                fixed(cl_device_id *ptr = aDevices)
                {
                    IntPtr ret;

                    CL.GetContextInfo(hContext, ContextInfo.ContextDevices, nContextDescriptorSize, new IntPtr(ptr), out ret);
                }
            }


            // create a command queue for first device the context reported
            cl_command_queue hCmdQueue = CL.CreateCommandQueue(hContext, aDevices[0], (CommandQueueFlags)0, out error);
            // create & compile program
            cl_program hProgram;

            unsafe { hProgram = CL.CreateProgramWithSource(hContext, 1, new string[] { sProgramSource }, null, &error); }
            CL.BuildProgram(hProgram, 0, (IntPtr[])null, null, IntPtr.Zero, IntPtr.Zero);

            // create kernel
            cl_kernel hKernel = CL.CreateKernel(hProgram, "vectorAdd", out error);

            // allocate host  vectors
            float[] A = new  float[cnDimension.ToInt32()];
            float[] B = new float[cnDimension.ToInt32()];
            float[] C = new float[cnDimension.ToInt32()];
            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = rand.Next() % 256;
                B[i] = rand.Next() % 256;
            }

            // allocate device memory
            unsafe
            {
                fixed(float *pA = A)
                fixed(float *pB = B)
                fixed(float *pC = C)
                {
                    cl_mem hDeviceMemA, hDeviceMemB, hDeviceMemC;

                    hDeviceMemA = CL.CreateBuffer(hContext,
                                                  MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  new IntPtr(pA),
                                                  out error);
                    hDeviceMemB = CL.CreateBuffer(hContext,
                                                  MemFlags.MemReadOnly | MemFlags.MemCopyHostPtr,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  new IntPtr(pA),
                                                  out error);
                    hDeviceMemC = CL.CreateBuffer(hContext,
                                                  MemFlags.MemWriteOnly,
                                                  new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                  IntPtr.Zero,
                                                  out error);

                    // setup parameter values
                    CL.SetKernelArg(hKernel, 0, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemA));
                    CL.SetKernelArg(hKernel, 1, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemB));
                    CL.SetKernelArg(hKernel, 2, new IntPtr(sizeof(cl_mem)), new IntPtr(&hDeviceMemC));

                    // write data from host to device
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemA, true, IntPtr.Zero,
                                          new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                          new IntPtr(pA), 0, null, (IntPtr[])null);
                    CL.EnqueueWriteBuffer(hCmdQueue, hDeviceMemB, true, IntPtr.Zero,
                                          new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                          new IntPtr(pB), 0, null, (IntPtr[])null);

                    // execute kernel
                    error = (ErrorCode)CL.EnqueueNDRangeKernel(hCmdQueue, hKernel, 1, null, &cnDimension, null, 0, null, null);
                    if (error != ErrorCode.Success)
                    {
                        throw new Exception(error.ToString());
                    }

                    // copy results from device back to host
                    IntPtr event_handle = IntPtr.Zero;

                    error = (ErrorCode)CL.EnqueueReadBuffer(hCmdQueue, hDeviceMemC, true, IntPtr.Zero,
                                                            new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                                            new IntPtr(pC), 0, null, (IntPtr[])null);
                    if (error != ErrorCode.Success)
                    {
                        throw new Exception(error.ToString());
                    }

                    CL.Finish(hCmdQueue);

                    CL.ReleaseMemObject(hDeviceMemA);
                    CL.ReleaseMemObject(hDeviceMemB);
                    CL.ReleaseMemObject(hDeviceMemC);
                }
            }

            for (int i = 0; i < A.Length; i++)
            {
                System.Diagnostics.Trace.WriteLine(String.Format("{0} + {1} = {2}", A[i], B[i], C[i]));
            }
        }
Пример #7
0
 internal static extern ErrorCode clEnqueueTask(
     cl_command_queue command_queue,
     cl_kernel kernel,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
Пример #8
0
 internal static extern ErrorCode clEnqueueTask(
     cl_command_queue command_queue,
     cl_kernel kernel,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
Пример #9
0
 internal static extern ErrorCode clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
Пример #10
0
 internal static extern ErrorCode clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, IntPtr param_value_size, void *param_value, out IntPtr param_value_size_ret);
Пример #11
0
 internal static extern ErrorCode clSetKernelArg(cl_kernel kernel, cl_uint arg_index, IntPtr arg_size, void *arg_value);
Пример #12
0
 internal static extern ErrorCode clReleaseKernel(cl_kernel kernel);
Пример #13
0
 internal static extern ErrorCode clRetainKernel(cl_kernel kernel);