示例#1
0
        private void SetCudaData()
        {
            CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks);

            alphaPtr      = cuda.CopyHostToDevice(alpha);
            gradPtr       = cuda.CopyHostToDevice(G);
            yPtr          = cuda.CopyHostToDevice(y);
            kernelDiagPtr = cuda.CopyHostToDevice(QD);

            //kernel columns i,j is simpler to copy array of zeros
            kiPtr = cuda.CopyHostToDevice(alpha);
            kjPtr = cuda.CopyHostToDevice(alpha);

            //todo:remove it
            int redSize = reductionThreads; //reductionBlocks

            reduceVal = new float[redSize];
            reduceIdx = new int[redSize];


            valRedPtr = cuda.CopyHostToDevice(reduceVal);
            idxRedPtr = cuda.CopyHostToDevice(reduceIdx);


            constCPtr = cuda.GetModuleGlobal(cuModule, "C");
            float[] cData = new float[] { C };
            cuda.CopyHostToDevice(constCPtr, cData);

            SetCudaParams();
        }
        public EllpackDenseVectorBuilder(CUDA cu,CUdeviceptr vector, CUdeviceptr vals,CUdeviceptr cols,CUdeviceptr length,int rows,int dim)
        {
            cuda = cu;
            vecPtr = vector;
            valsPtr = vals;
            idxPtr = cols;
            vecLengthPtr = length;
            nrRows = (uint)rows;
            vecDim = (uint)dim;

            blocksPerGrid = (int) Math.Ceiling( (vecDim + 0.0) / threadsPerBlock);

            var blocksPerGrid1 = (vecDim + threadsPerBlock - 1) / threadsPerBlock;

            Debug.Assert(blocksPerGrid == blocksPerGrid1);

        }
示例#3
0
        private void SetCudaData()
        {
            CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks);

            alphaPtr = cuda.CopyHostToDevice(alpha);
            gradPtr  = cuda.CopyHostToDevice(G);
            yPtr     = cuda.CopyHostToDevice(y);


            //kernel columns i,j is simpler to copy array of zeros

            uint memSize = (uint)(sizeof(float) * problemSize * 2);

            kiPtr = cuda.Allocate(memSize);
            kjPtr = kiPtr + sizeof(float) * problemSize;

            //todo:remove it
            int redSize = reductionThreads; //reductionBlocks

            reduceVal = new float[redSize * 2];
            reduceIdx = new int[redSize * 2];


            valRedPtr = cuda.CopyHostToDevice(reduceVal);
            idxRedPtr = cuda.CopyHostToDevice(reduceIdx);


            constCPtr = cuda.GetModuleGlobal(cuModule, "C");
            float[] cData = new float[] { C };
            cuda.CopyHostToDevice(constCPtr, cData);

            constBPtr = cuda.GetModuleGlobal(cuModule, "B");
            B         = new float[] { 0, 0, C };
            cuda.CopyHostToDevice(constBPtr, B);

            constAPtr = cuda.GetModuleGlobal(cuModule, "A");
            A         = new float[] { -C, 0, 0 };
            cuda.CopyHostToDevice(constAPtr, A);


            SetCudaParams();
        }
 public static extern void cublasDger(int m, int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr A, int lda);
 public static extern void cublasDgemm(char transa, char transb, int m, int n, int k, double alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, double beta, CUdeviceptr C, int ldc);
 public static extern void cublasDcopy(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasCtrsv(char uplo, char trans, char diag, int n, CUdeviceptr A, int lda, CUdeviceptr x, int incx);
 public static extern void cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
 public static extern void cublasZsyrk(char uplo, char trans, int n, int k, cuDoubleComplex alpha, CUdeviceptr A, int lda, cuDoubleComplex beta, CUdeviceptr C, int ldc);
 public static extern void cublasZswap(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasZdscal(int n, double alpha, CUdeviceptr x, int incx);
 public static extern void cublasZdrot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy, double c, double s);
 public static extern cuDoubleComplex cublasZdotu(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasZaxpy(int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasStrsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
 public static extern void cublasSsyrk(char uplo, char trans, int n, int k, float alpha, CUdeviceptr A, int lda, float beta, CUdeviceptr C, int ldc);
 public static extern void cublasZscal(int n, cuDoubleComplex alpha, CUdeviceptr x, int incx);
 public static extern void cublasZgemm(char transa, char transb, int m, int n, int k, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, cuDoubleComplex beta, CUdeviceptr C, int ldc);
 public static extern void cublasZsymm(char side, char uplo, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, cuDoubleComplex beta, CUdeviceptr C, int ldc);
 public static extern void cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr x, int incx, cuDoubleComplex beta, CUdeviceptr y, int incy);
 public static extern void cublasZtpsv(char uplo, char trans, char diag, int n, CUdeviceptr AP, CUdeviceptr x, int incx);
 public static extern void cublasZgeru(int m, int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr A, int lda);
 public static extern void cublasCtrsm(char side, char uplo, char transa, char diag, int m, int n, cuFloatComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
 public static extern void cublasZherk(char uplo, char trans, int n, int k, double alpha, CUdeviceptr A, int lda, double beta, CUdeviceptr C, int ldc);
 public static extern void cublasDaxpy(int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, CUdeviceptr AP, CUdeviceptr x, int incx, cuDoubleComplex beta, CUdeviceptr y, int incy);
 public static extern double cublasDdot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
 public static extern void cublasZhpr(char uplo, int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr AP);
 public static extern void cublasDgemv(char trans, int m, int n, double alpha, CUdeviceptr A, int lda, CUdeviceptr x, int incx, double beta, CUdeviceptr y, int incy);
 public static extern void cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr AP);
 public static extern double cublasDnrm2(int n, CUdeviceptr x, int incx);
 public static extern void cublasZrot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy, double sc, cuDoubleComplex cs);