Beispiel #1
0
 /// <summary>
 /// Create OpenCL vector with external memory
 /// </summary>
 /// <param name="p">Parition</param>
 /// <param name="content">Memory for this vector</param>
 /// <param name="device">Device</param>
 public clVector(IPartitioning p, double[] content, clDevice device)
     : base(p)
 {
     h_data      = content;
     this.device = device;
     init(p, device.vectorProgram);
 }
Beispiel #2
0
 /// <summary>
 /// Create OpenCL vector
 /// </summary>
 /// <param name="p">Parition</param>
 /// <param name="device">Device</param>
 public clVector(IPartitioning p, clDevice device)
     : base(p)
 {
     h_data      = new double[p.LocalLength];
     this.device = device;
     init(p, device.vectorProgram);
 }
Beispiel #3
0
        /// <summary>
        /// Create matrix
        /// </summary>
        /// <param name="M">Original matrix</param>
        /// <param name="device">Corresponding OpenCL device</param>
        /// <param name="kernelName">Name of the kernel function</param>
        public clMatrix(MsrMatrix M, clDevice device, string kernelName)
            : base(M)
        {
            this.device = device;
            base.PackMatrix(M);
            this.clmultiply = cl.CreateKernel(device.matrixProgram, kernelName);
            this.claccext   = cl.CreateKernel(device.matrixProgram, "accumulateExternal");
            disposed        = false;

            LMAA();

            if (extSize > 0)
            {
                extglobalsize = extSize;
                int m = extSize % extlocalsize;
                if (m > 0)
                {
                    extglobalsize += extlocalsize - m;
                }

                h_ElementsToAcc = Marshal.AllocHGlobal(extSize * sizeof(double));
                d_ElementsToAcc = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(double));

                d_IndicesToAccumulate = cl.CreateBuffer(device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)extSize * sizeof(int));
                cl.EnqueueWriteBuffer(device.cq, d_IndicesToAccumulate, true, 0, (uint)extSize * sizeof(int), h_IndicesToAccumulate);
            }
        }
Beispiel #4
0
        public clCSRMatrix(MsrMatrix M, clDevice device)
            : base(M, device, "csrMultiply")
        {
            size       = base.RowPartitioning.LocalLength;
            localsize  = 256;
            globalsize = size;
            int m = size % localsize;

            if (m > 0)
            {
                globalsize += localsize - m;
            }
        }
Beispiel #5
0
        static Device GetOrCereateDevice(DeviceType DevType)
        {
            Device Dev = null;

            m_DeviceS.TryGetValue(DevType, out Dev);
            if (Dev == null)
            {
                switch (DevType)
                {
                case DeviceType.Cuda: Dev = new CUDA.CudaDevice(new CUDA.CudaEnviroment(Environment.MPIEnv)); break;

                case DeviceType.OpenCL: Dev = new CL.clDevice(new CL.clEnvironment(Environment.MPIEnv)); break;

                case DeviceType.CPU: Dev = new CPU.ReferenceDevice(); break;

                case DeviceType.MultiThreadCPU: Dev = new mtCPU.MtDevice(); break;

                case DeviceType.Auto: {
                    // try cuda at frist:
                    try {
                        Dev = GetOrCereateDevice(DeviceType.Cuda);
                    } catch (Exception) {
                        Dev = null;
                    }
                    if (Dev != null)
                    {
                        break;
                    }

                    // try OpenCL next:
                    //try {
                    //    Dev = GetOrCereateDevice(DeviceType.OpenCL);
                    //} catch (Exception) {
                    //    Dev = null;
                    //}
                    //if (Dev != null) break;

                    // fall back to CPU:
                    Dev = GetOrCereateDevice(DeviceType.CPU);

                    break;
                }

                default:
                    throw new NotImplementedException("monkey device type: " + DevType.ToString() + " missing in factory.");
                }
                m_DeviceS.Add(DevType, Dev);
            }
            return(Dev);
        }
Beispiel #6
0
        public clELLPACKmodMatrix(MsrMatrix M, clDevice device)
            : base(M, device, "ellMultiply")
        {
            m_internalData = (ELLPACKmod)m_LocalMtx;

            size      = m_internalData.NoOfRows;
            colCount  = m_internalData.NoOfPackedCols;
            valStride = m_internalData.MtxEntries.ColStride;
            colStride = m_internalData.ColInd.ColStride;

            localsize  = 256;
            globalsize = size;
            int m = size % localsize;

            if (m > 0)
            {
                globalsize += localsize - m;
            }
        }
Beispiel #7
0
        public clCCBCSRMatrix(MsrMatrix M, clDevice device)
            : base(M, device, "ccbcsrMultiply")
        {
            m_internalData = (CCBCSR)m_LocalMtx;

            size     = base.RowPartitioning.LocalLength;
            cellsize = m_internalData.CellSize;
            // Number of cells per block, choose so that it is around 256 threads per block
            cellrowsperblock = (int)Math.Ceiling(128.0 / cellsize);
            cellsperrow      = m_internalData.NoOfCellsPerRow;
            stride           = m_internalData.CellStride;

            // Number of threads per block
            localsize  = cellsize * cellrowsperblock;
            globalsize = size;
            int m = size % localsize;

            if (m > 0)
            {
                globalsize += localsize - m;
            }
        }