private void SetCudaData() { CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks); alphaPtr = cuda.CopyHostToDevice(alpha); gradPtr = cuda.CopyHostToDevice(G); yPtr = cuda.CopyHostToDevice(y); kernelDiagPtr = cuda.CopyHostToDevice(QD); //kernel columns i,j is simpler to copy array of zeros kiPtr = cuda.CopyHostToDevice(alpha); kjPtr = cuda.CopyHostToDevice(alpha); //todo:remove it int redSize = reductionThreads; //reductionBlocks reduceVal = new float[redSize]; reduceIdx = new int[redSize]; valRedPtr = cuda.CopyHostToDevice(reduceVal); idxRedPtr = cuda.CopyHostToDevice(reduceIdx); constCPtr = cuda.GetModuleGlobal(cuModule, "C"); float[] cData = new float[] { C }; cuda.CopyHostToDevice(constCPtr, cData); SetCudaParams(); }
private void SetCudaData() { CudaHelpers.GetNumThreadsAndBlocks(problemSize, maxReductionBlocks, threadsPerBlock, ref reductionThreads, ref reductionBlocks); alphaPtr = cuda.CopyHostToDevice(alpha); gradPtr = cuda.CopyHostToDevice(G); yPtr = cuda.CopyHostToDevice(y); //kernel columns i,j is simpler to copy array of zeros uint memSize = (uint)(sizeof(float) * problemSize * 2); kiPtr = cuda.Allocate(memSize); kjPtr = kiPtr + sizeof(float) * problemSize; //todo:remove it int redSize = reductionThreads; //reductionBlocks reduceVal = new float[redSize * 2]; reduceIdx = new int[redSize * 2]; valRedPtr = cuda.CopyHostToDevice(reduceVal); idxRedPtr = cuda.CopyHostToDevice(reduceIdx); constCPtr = cuda.GetModuleGlobal(cuModule, "C"); float[] cData = new float[] { C }; cuda.CopyHostToDevice(constCPtr, cData); constBPtr = cuda.GetModuleGlobal(cuModule, "B"); B = new float[] { 0, 0, C }; cuda.CopyHostToDevice(constBPtr, B); constAPtr = cuda.GetModuleGlobal(cuModule, "A"); A = new float[] { -C, 0, 0 }; cuda.CopyHostToDevice(constAPtr, A); SetCudaParams(); }