예제 #1
0
        private void CreateDataStructures()
        {
            int[] h_xSubStart = new int[m_internalData.BlockSubVectorLength.Length + 1];
            int   len         = 0;

            for (int i = 0; i < m_internalData.BlockSubVectorLength.Length; i++)
            {
                h_xSubStart[i] = len;
                len           += m_internalData.BlockSubVectorLength[i];

                if (m_internalData.BlockSubVectorLength[i] > maxBlockSubVectorLength)
                {
                    maxBlockSubVectorLength = m_internalData.BlockSubVectorLength[i];
                }
            }
            h_xSubStart[m_internalData.BlockSubVectorLength.Length] = len;

            int[] h_blockSubVector = new int[len];
            int   idx = 0;

            for (int i = 0; i < m_internalData.BlockSubVectorLength.Length; i++)
            {
                for (int j = 0; j < m_internalData.BlockSubVectorLength[i]; j++)
                {
                    h_blockSubVector[idx] = m_internalData.BlockSubVector[i, j];
                    idx++;
                }
            }

            d_xSubStart      = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)(h_xSubStart.Length + 1) * sizeof(int), h_xSubStart);
            d_blockSubVector = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)h_blockSubVector.Length * sizeof(int), h_blockSubVector);
        }
예제 #2
0
파일: clMatrix.cs 프로젝트: xyuan/BoSSS
        /// <summary>
        /// Create matrix
        /// </summary>
        /// <param name="M">Original matrix</param>
        /// <param name="device">Corresponding OpenCL device</param>
        /// <param name="kernelName">Name of the kernel function</param>
        public clMatrix(MsrMatrix M, clDevice device, string kernelName)
            : base(M)
        {
            this.device = device;
            base.PackMatrix(M);
            this.clmultiply = cl.CreateKernel(device.matrixProgram, kernelName);
            this.claccext   = cl.CreateKernel(device.matrixProgram, "accumulateExternal");
            disposed        = false;

            LMAA();

            if (extSize > 0)
            {
                extglobalsize = extSize;
                int m = extSize % extlocalsize;
                if (m > 0)
                {
                    extglobalsize += extlocalsize - m;
                }

                h_ElementsToAcc = Marshal.AllocHGlobal(extSize * sizeof(double));
                d_ElementsToAcc = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(double));

                d_IndicesToAccumulate = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(int));
                cl.EnqueueWriteBuffer(device.cq, d_IndicesToAccumulate, true, 0, (uint)extSize * sizeof(int), h_IndicesToAccumulate);
            }
        }
예제 #3
0
 internal static extern ErrorCode clEnqueueUnmapMemObject(
     cl_command_queue command_queue,
     cl_mem memobj,
     void *mapped_ptr,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
예제 #4
0
        /// <summary>
        /// Allocate memory and copy matrix data on GPU
        /// </summary>
        public override void Lock()
        {
            base.Lock();

            d_cellData   = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)m_internalData.Val.Length * sizeof(double), m_internalData.Val);
            d_cellColIdx = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)m_internalData.CellColumn.Length * sizeof(double), m_internalData.CellColumn);
        }
예제 #5
0
        /// <summary>
        /// Allocate memory and copy matrix data on GPU
        /// </summary>
        public override void Lock()
        {
            base.Lock();

            d_val    = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)m_internalData.MtxEntries.Values.Length * sizeof(double), m_internalData.MtxEntries.Values);
            d_colIdx = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)m_internalData.ColInd.Values.Length * sizeof(int), m_internalData.ColInd.Values);
        }
예제 #6
0
 internal static extern ErrorCode clEnqueueUnmapMemObject(
     cl_command_queue command_queue,
     cl_mem memobj,
     void *mapped_ptr,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
예제 #7
0
파일: clVector.cs 프로젝트: xyuan/BoSSS
        /// <summary>
        /// Copy vector to device
        /// </summary>
        public override void Lock()
        {
            base.Lock();

            h_result = Marshal.AllocHGlobal(groups * sizeof(double));
            d_data   = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR, (uint)h_data.Length * sizeof(double), h_data);
            d_result = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_USE_HOST_PTR | cl_mem_flags.CL_MEM_WRITE_ONLY, (uint)groups * sizeof(double), h_result);
        }
예제 #8
0
파일: clVector.cs 프로젝트: xyuan/BoSSS
            internal clCommVector(MatrixBase M, clVector v)
                : base(M, v)
            {
                this.owner = v;

                clfill = cl.CreateKernel(owner.device.vectorProgram, "fillSendBuffer");

                IDictionary <int, int[]> comLists = M._SpmvCommPattern.ComLists;
                //int[] procranks = new int[comLists.Count]; // put all proccessor ranks in one list to have a unique ordering

                int totLen = 0;

                foreach (int procRnk in comLists.Keys)
                {
                    int l = comLists[procRnk].Length;
                    base.SendBuffersLengths[procRnk] = l;
                    totLen += l;
                }

                size       = totLen;
                globalsize = size;
                int m = size % localsize;

                if (m > 0)
                {
                    globalsize += localsize - m;
                }

                if (size > 0)
                {
                    // alloc
                    h_IndicesToSend = new int[size];
                    d_IndicesToSend = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)size * sizeof(int));

                    h_SendBuffer = Marshal.AllocHGlobal(size * sizeof(double));
                    d_SendBuffer = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_WRITE_ONLY, (uint)size * sizeof(double));

                    // concat lists:
                    int i0 = 0;
                    unsafe
                    {
                        double *P0 = (double *)h_SendBuffer;

                        foreach (int procRnk in comLists.Keys)
                        {
                            base.SendBuffers[procRnk] = (IntPtr)P0;  // startaddres for sending to process 'procRnk'

                            int l = base.SendBuffersLengths[procRnk];
                            P0 += l;
                            Array.Copy(comLists[procRnk], 0, h_IndicesToSend, i0, l); // concat comm list
                            i0 += l;
                        }
                    }

                    cl.EnqueueWriteBuffer(owner.device.cq, d_IndicesToSend, true, 0, (uint)size * sizeof(int), h_IndicesToSend);
                }
            }
예제 #9
0
 internal static extern ErrorCode clEnqueueCopyBufferToImage(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_image,
     IntPtr src_offset,
     IntPtr *dst_origin,
     IntPtr *region,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
예제 #10
0
 internal static extern ErrorCode clEnqueueCopyBufferToImage(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_image,
     IntPtr src_offset,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] dst_origin,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] region,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
예제 #11
0
 internal static extern ErrorCode clEnqueueCopyBuffer(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_buffer,
     IntPtr src_offset,
     IntPtr dst_offset,
     IntPtr cb,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
예제 #12
0
 internal static extern ErrorCode clEnqueueCopyBuffer(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_buffer,
     IntPtr src_offset,
     IntPtr dst_offset,
     IntPtr cb,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
예제 #13
0
 internal static extern ErrorCode clEnqueueWriteBuffer(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_write,
     IntPtr offset,
     IntPtr cb,
     void *ptr,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
예제 #14
0
 internal static extern ErrorCode clEnqueueWriteBuffer(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_write,
     IntPtr offset,
     IntPtr cb,
     void *ptr,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
예제 #15
0
파일: clCSRMatrix.cs 프로젝트: xyuan/BoSSS
        /// <summary>
        /// Allocate memory and copy matrix data on GPU
        /// </summary>
        public override void Lock()
        {
            base.Lock();

            MatrixBase.CSR LocalMatrix = (MatrixBase.CSR)base.m_LocalMtx;

            d_val      = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)LocalMatrix.Val.Length * sizeof(double), LocalMatrix.Val);
            d_rowStart = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)LocalMatrix.RowStart.Length * sizeof(double), LocalMatrix.RowStart);
            d_colIdx   = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_COPY_HOST_PTR | cl_mem_flags.CL_MEM_READ_ONLY, (uint)LocalMatrix.ColInd.Length * sizeof(double), LocalMatrix.ColInd);
        }
예제 #16
0
 internal static extern void *clEnqueueMapBuffer(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_map,
     cl_map_flags map_flags,
     IntPtr offset,
     IntPtr cb,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event,
     out ErrorCode errcode_ret);
예제 #17
0
 internal static extern void *clEnqueueMapBuffer(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_map,
     cl_map_flags map_flags,
     IntPtr offset,
     IntPtr cb,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event,
     out ErrorCode errcode_ret);
예제 #18
0
 internal static extern ErrorCode clEnqueueWriteImage(
     cl_command_queue command_queue,
     cl_mem image,
     cl_bool blocking_write,
     IntPtr *origin,
     IntPtr *region,
     IntPtr input_row_pitch,
     IntPtr input_slice_pitch,
     void *ptr,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event);
예제 #19
0
 internal static extern ErrorCode clEnqueueWriteImage(
     cl_command_queue command_queue,
     cl_mem image,
     cl_bool blocking_write,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] origin,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] region,
     IntPtr input_row_pitch,
     IntPtr input_slice_pitch,
     void *ptr,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event);
예제 #20
0
 internal static extern void *clEnqueueMapImage(
     cl_command_queue command_queue,
     cl_mem image,
     cl_bool blocking_map,
     cl_map_flags map_flags,
     IntPtr *origin,
     IntPtr *region,
     out IntPtr image_row_pitch,
     out IntPtr image_slice_pitch,
     cl_uint num_events_in_wait_list,
     IntPtr *event_wait_list,
     cl_event *_event,
     out ErrorCode errcode_ret);
예제 #21
0
 internal static extern void *clEnqueueMapImage(
     cl_command_queue command_queue,
     cl_mem image,
     cl_bool blocking_map,
     cl_map_flags map_flags,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] origin,
     [In][MarshalAs(UnmanagedType.LPArray, SizeConst = 3)] IntPtr[] region,
     out IntPtr image_row_pitch,
     out IntPtr image_slice_pitch,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] event_wait_list,
     cl_event *_event,
     out ErrorCode errcode_ret);
예제 #22
0
 internal static extern ErrorCode clEnqueueCopyBufferRect(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_buffer,
     IntPtr *src_origin,
     IntPtr *dst_origin,
     IntPtr *region,
     IntPtr src_row_pitch,
     IntPtr src_slice_pitch,
     IntPtr dst_row_pitch,
     IntPtr dst_slice_pitch,
     cl_uint num_events_in_wait_list,
     cl_event *_event_wait_list,
     cl_event *_event);
예제 #23
0
 internal static extern ErrorCode clEnqueueCopyBufferRect(
     cl_command_queue command_queue,
     cl_mem src_buffer,
     cl_mem dst_buffer,
     [In] IntPtr[] src_origin,
     [In] IntPtr[] dst_origin,
     [In] IntPtr[] region,
     IntPtr src_row_pitch,
     IntPtr src_slice_pitch,
     IntPtr dst_row_pitch,
     IntPtr dst_slice_pitch,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] _event_wait_list,
     cl_event *_event);
예제 #24
0
 internal static extern ErrorCode clEnqueueWriteBufferRect(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_write,
     IntPtr *buffer_offset,
     IntPtr *host_offset,
     IntPtr *region,
     IntPtr buffer_row_pitch,
     IntPtr buffer_slice_pitch,
     IntPtr host_row_pitch,
     IntPtr host_slice_pitch,
     void *ptr,
     cl_uint num_events_in_wait_list,
     cl_event *_event_wait_list,
     cl_event *_event);
예제 #25
0
 internal static extern ErrorCode clEnqueueWriteBufferRect(
     cl_command_queue command_queue,
     cl_mem buffer,
     cl_bool blocking_write,
     [In] IntPtr[] buffer_offset,
     [In] IntPtr[] host_offset,
     [In] IntPtr[] region,
     IntPtr buffer_row_pitch,
     IntPtr buffer_slice_pitch,
     IntPtr host_row_pitch,
     IntPtr host_slice_pitch,
     void *ptr,
     cl_uint num_events_in_wait_list,
     [In][MarshalAs(UnmanagedType.LPArray)] cl_event[] _event_wait_list,
     cl_event *_event);
예제 #26
0
파일: clCSRMatrix.cs 프로젝트: xyuan/BoSSS
        internal override void SetArguments(double alpha, clVector a, double beta, clVector acc)
        {
            cl_mem d_x      = a.GetDevicePointer();
            cl_mem d_result = acc.GetDevicePointer();

            cl.SetKernelArg(clmultiply, 0, d_val);
            cl.SetKernelArg(clmultiply, 1, d_colIdx);
            cl.SetKernelArg(clmultiply, 2, d_rowStart);
            cl.SetKernelArg(clmultiply, 3, d_result);
            cl.SetKernelArg(clmultiply, 4, d_x);
            cl.SetKernelArgLocalSize(clmultiply, 5, (uint)(localsize + 1) * sizeof(int));
            cl.SetKernelArg(clmultiply, 6, alpha);
            cl.SetKernelArg(clmultiply, 7, beta);
            cl.SetKernelArg(clmultiply, 8, size);
        }
예제 #27
0
        internal override void SetArguments(double alpha, clVector a, double beta, clVector acc)
        {
            cl_mem d_x      = a.GetDevicePointer();
            cl_mem d_result = acc.GetDevicePointer();

            cl.SetKernelArg(clmultiply, 0, d_val);
            cl.SetKernelArg(clmultiply, 1, d_colIdx);
            cl.SetKernelArg(clmultiply, 2, d_x);
            cl.SetKernelArg(clmultiply, 3, d_result);
            cl.SetKernelArg(clmultiply, 4, alpha);
            cl.SetKernelArg(clmultiply, 5, beta);
            cl.SetKernelArg(clmultiply, 6, size);
            cl.SetKernelArg(clmultiply, 7, colCount);
            cl.SetKernelArg(clmultiply, 8, valStride);
            cl.SetKernelArg(clmultiply, 9, colStride);
        }
예제 #28
0
파일: clMatrix.cs 프로젝트: xyuan/BoSSS
        internal override void SpMV_External_Begin(double alpha, double beta, VectorBase acc)
        {
            m_alpha = alpha;
            clVector _acc = (clVector)acc;

            d_acc = _acc.GetDevicePointer();

            unsafe
            {
                double *_acc_stor = (double *)h_ElementsToAcc;
                for (int i = (int)extSize - 1; i >= 0; i--)
                {
                    *_acc_stor = 0;
                    _acc_stor++;
                }
            }
        }
예제 #29
0
        internal override void SetArguments(double alpha, clVector a, double beta, clVector acc)
        {
            cl_mem d_x      = a.GetDevicePointer();
            cl_mem d_result = acc.GetDevicePointer();

            cl.SetKernelArg(clmultiply, 0, d_val);
            cl.SetKernelArg(clmultiply, 1, d_colIdx);
            cl.SetKernelArg(clmultiply, 2, d_xSubStart);
            cl.SetKernelArg(clmultiply, 3, d_blockSubVector);
            cl.SetKernelArg(clmultiply, 4, d_x);
            cl.SetKernelArg(clmultiply, 5, d_result);
            cl.SetKernelArgLocalSize(clmultiply, 6, (uint)maxBlockSubVectorLength * sizeof(double));
            cl.SetKernelArg(clmultiply, 7, alpha);
            cl.SetKernelArg(clmultiply, 8, beta);
            cl.SetKernelArg(clmultiply, 9, size);
            cl.SetKernelArg(clmultiply, 10, colCount);
            cl.SetKernelArg(clmultiply, 11, valStride);
            cl.SetKernelArg(clmultiply, 12, colStride);
        }
예제 #30
0
        internal override void SetArguments(double alpha, clVector a, double beta, clVector acc)
        {
            cl_mem d_x      = a.GetDevicePointer();
            cl_mem d_result = acc.GetDevicePointer();

            cl.SetKernelArg(clmultiply, 0, d_cellData);
            cl.SetKernelArg(clmultiply, 1, d_x);
            cl.SetKernelArg(clmultiply, 2, d_cellColIdx);
            cl.SetKernelArg(clmultiply, 3, d_result);
            cl.SetKernelArgLocalSize(clmultiply, 4, (uint)(localsize * sizeof(double)));
            cl.SetKernelArgLocalSize(clmultiply, 5, (uint)(cellrowsperblock * sizeof(int)));
            cl.SetKernelArgLocalSize(clmultiply, 6, (uint)(cellrowsperblock * sizeof(int)));
            cl.SetKernelArg(clmultiply, 7, alpha);
            cl.SetKernelArg(clmultiply, 8, beta);
            cl.SetKernelArg(clmultiply, 9, cellsize);
            cl.SetKernelArg(clmultiply, 10, cellrowsperblock);
            cl.SetKernelArg(clmultiply, 11, cellsperrow);
            cl.SetKernelArg(clmultiply, 12, stride);
            cl.SetKernelArg(clmultiply, 13, size);
        }