private void SetCudaData() { CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks); alphaPtr = cuda.CopyHostToDevice(alpha); gradPtr = cuda.CopyHostToDevice(G); yPtr = cuda.CopyHostToDevice(y); kernelDiagPtr = cuda.CopyHostToDevice(QD); //kernel columns i,j is simpler to copy array of zeros kiPtr = cuda.CopyHostToDevice(alpha); kjPtr = cuda.CopyHostToDevice(alpha); //todo:remove it int redSize = reductionThreads; //reductionBlocks reduceVal = new float[redSize]; reduceIdx = new int[redSize]; valRedPtr = cuda.CopyHostToDevice(reduceVal); idxRedPtr = cuda.CopyHostToDevice(reduceIdx); constCPtr = cuda.GetModuleGlobal(cuModule, "C"); float[] cData = new float[] { C }; cuda.CopyHostToDevice(constCPtr, cData); SetCudaParams(); }
public EllpackDenseVectorBuilder(CUDA cu,CUdeviceptr vector, CUdeviceptr vals,CUdeviceptr cols,CUdeviceptr length,int rows,int dim) { cuda = cu; vecPtr = vector; valsPtr = vals; idxPtr = cols; vecLengthPtr = length; nrRows = (uint)rows; vecDim = (uint)dim; blocksPerGrid = (int) Math.Ceiling( (vecDim + 0.0) / threadsPerBlock); var blocksPerGrid1 = (vecDim + threadsPerBlock - 1) / threadsPerBlock; Debug.Assert(blocksPerGrid == blocksPerGrid1); }
private void SetCudaData() { CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks); alphaPtr = cuda.CopyHostToDevice(alpha); gradPtr = cuda.CopyHostToDevice(G); yPtr = cuda.CopyHostToDevice(y); //kernel columns i,j is simpler to copy array of zeros uint memSize = (uint)(sizeof(float) * problemSize * 2); kiPtr = cuda.Allocate(memSize); kjPtr = kiPtr + sizeof(float) * problemSize; //todo:remove it int redSize = reductionThreads; //reductionBlocks reduceVal = new float[redSize * 2]; reduceIdx = new int[redSize * 2]; valRedPtr = cuda.CopyHostToDevice(reduceVal); idxRedPtr = cuda.CopyHostToDevice(reduceIdx); constCPtr = cuda.GetModuleGlobal(cuModule, "C"); float[] cData = new float[] { C }; cuda.CopyHostToDevice(constCPtr, cData); constBPtr = cuda.GetModuleGlobal(cuModule, "B"); B = new float[] { 0, 0, C }; cuda.CopyHostToDevice(constBPtr, B); constAPtr = cuda.GetModuleGlobal(cuModule, "A"); A = new float[] { -C, 0, 0 }; cuda.CopyHostToDevice(constAPtr, A); SetCudaParams(); }
public static extern void cublasDger(int m, int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr A, int lda);
public static extern void cublasDgemm(char transa, char transb, int m, int n, int k, double alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, double beta, CUdeviceptr C, int ldc);
public static extern void cublasDcopy(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasCtrsv(char uplo, char trans, char diag, int n, CUdeviceptr A, int lda, CUdeviceptr x, int incx);
public static extern void cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
public static extern void cublasZsyrk(char uplo, char trans, int n, int k, cuDoubleComplex alpha, CUdeviceptr A, int lda, cuDoubleComplex beta, CUdeviceptr C, int ldc);
public static extern void cublasZswap(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasZdscal(int n, double alpha, CUdeviceptr x, int incx);
public static extern void cublasZdrot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy, double c, double s);
public static extern cuDoubleComplex cublasZdotu(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasZaxpy(int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasStrsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
public static extern void cublasSsyrk(char uplo, char trans, int n, int k, float alpha, CUdeviceptr A, int lda, float beta, CUdeviceptr C, int ldc);
public static extern void cublasZscal(int n, cuDoubleComplex alpha, CUdeviceptr x, int incx);
public static extern void cublasZgemm(char transa, char transb, int m, int n, int k, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, cuDoubleComplex beta, CUdeviceptr C, int ldc);
public static extern void cublasZsymm(char side, char uplo, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb, cuDoubleComplex beta, CUdeviceptr C, int ldc);
public static extern void cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, CUdeviceptr A, int lda, CUdeviceptr x, int incx, cuDoubleComplex beta, CUdeviceptr y, int incy);
public static extern void cublasZtpsv(char uplo, char trans, char diag, int n, CUdeviceptr AP, CUdeviceptr x, int incx);
public static extern void cublasZgeru(int m, int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr A, int lda);
public static extern void cublasCtrsm(char side, char uplo, char transa, char diag, int m, int n, cuFloatComplex alpha, CUdeviceptr A, int lda, CUdeviceptr B, int ldb);
public static extern void cublasZherk(char uplo, char trans, int n, int k, double alpha, CUdeviceptr A, int lda, double beta, CUdeviceptr C, int ldc);
public static extern void cublasDaxpy(int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, CUdeviceptr AP, CUdeviceptr x, int incx, cuDoubleComplex beta, CUdeviceptr y, int incy);
public static extern double cublasDdot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy);
public static extern void cublasZhpr(char uplo, int n, double alpha, CUdeviceptr x, int incx, CUdeviceptr AP);
public static extern void cublasDgemv(char trans, int m, int n, double alpha, CUdeviceptr A, int lda, CUdeviceptr x, int incx, double beta, CUdeviceptr y, int incy);
public static extern void cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, CUdeviceptr x, int incx, CUdeviceptr y, int incy, CUdeviceptr AP);
public static extern double cublasDnrm2(int n, CUdeviceptr x, int incx);
public static extern void cublasZrot(int n, CUdeviceptr x, int incx, CUdeviceptr y, int incy, double sc, cuDoubleComplex cs);