public virtual void SetMemoryForDenseVector(int i, int j) { if (IVectorIdx != i) { SparseVec vecI = problemElements[i]; CudaHelpers.FillDenseVector(vecI, VectorI); cuda.CopyHostToDevice(VecIPtr, VectorI); } if (JVectorIdx != j) { SparseVec vecJ = problemElements[j]; CudaHelpers.FillDenseVector(vecJ, VectorJ); cuda.CopyHostToDevice(VecJPtr, VectorJ); } }
public override void Init() { linKernel.ProblemElements = problemElements; linKernel.Y = Y; linKernel.Init(); base.Init(); float[] vecVals; int[] vecColIdx; int[] vecLenght; int align = preFetch; CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, problemElements, align); // CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, problemElements); selfLinDot = linKernel.DiagonalDotCache; #region cuda initialization InitCudaModule(); //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); labelsPtr = cuda.CopyHostToDevice(Y); selfLinDotPtr = cuda.CopyHostToDevice(selfLinDot); uint memSize = (uint)(2 * problemElements.Length * sizeof(float)); //allocate mapped memory for our results //CUDARuntime.cudaSetDeviceFlags(CUDARuntime.cudaDeviceMapHost); // var e= CUDADriver.cuMemHostAlloc(ref outputIntPtr, memSize, 8); //CUDARuntime.cudaHostAlloc(ref outputIntPtr, memSize, CUDARuntime.cudaHostAllocMapped); //var errMsg=CUDARuntime.cudaGetErrorString(e); //cuda.HostRegister(outputIntPtr,memSize, Cuda) outputIntPtr = cuda.HostAllocate(memSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP); outputPtr = cuda.GetHostDevicePointer(outputIntPtr, 0); //normal memory allocation //outputPtr = cuda.Allocate((uint)(sizeof(float) * problemElements.Length)); #endregion SetCudaFunctionParameters(); //allocate memory for main vector, size of this vector is the same as dimenson, so many //indexes will be zero, but cuda computation is faster VectorI = new float[problemElements[0].Dim + 1]; VectorJ = new float[problemElements[0].Dim + 1]; CudaHelpers.FillDenseVector(problemElements[0], VectorI); CudaHelpers.FillDenseVector(problemElements[1], VectorJ); CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuVecI_TexRef, cuVecITexRefName, VectorI, ref VecIPtr); CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuVecJ_TexRef, cuVecJTexRefName, VectorJ, ref VecJPtr); }