Пример #1
0
        /// <summary>
        ///
        /// </summary>
        internal CudaCCBCSRMatrix(MsrMatrix M, CudaEnviroment CudaEnv)
            : base(M, "blockMultiply2", CudaEnv)
        {
            m_internalData = (CCBCSR)m_LocalMtx;

            rowcount = base.RowPartitioning.LocalLength;
            cellsize = m_internalData.CellSize;
            // Number of cells per block, choose so that it is around 128 threads per block
            cellrowsperblock = (int)Math.Ceiling(128.0 / cellsize);
            cellsperrow      = m_internalData.NoOfCellsPerRow;
            stride           = m_internalData.CellStride;
            // Number of threads per block
            blocksize = cellsize * cellrowsperblock;
            // Number of blocks
            blockcount = (int)Math.Ceiling((Decimal)rowcount / blocksize);
        }
Пример #2
0
        public clCCBCSRMatrix(MsrMatrix M, clDevice device)
            : base(M, device, "ccbcsrMultiply")
        {
            m_internalData = (CCBCSR)m_LocalMtx;

            size     = base.RowPartitioning.LocalLength;
            cellsize = m_internalData.CellSize;
            // Number of cells per block, choose so that it is around 256 threads per block
            cellrowsperblock = (int)Math.Ceiling(128.0 / cellsize);
            cellsperrow      = m_internalData.NoOfCellsPerRow;
            stride           = m_internalData.CellStride;

            // Number of threads per block
            localsize  = cellsize * cellrowsperblock;
            globalsize = size;
            int m = size % localsize;

            if (m > 0)
            {
                globalsize += localsize - m;
            }
        }