private void SetCudaData(Problem <SparseVec> sub_prob) { int vecDim = sub_prob.FeaturesCount;//.Elements[0].Dim; /* * copy vectors to CUDA device */ #region copy trainning examples to GPU float[] vecVals; int[] vecIdx; int[] vecLenght; CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements); valsCSRPtr = cuda.CopyHostToDevice(vecVals); idxCSRPtr = cuda.CopyHostToDevice(vecIdx); vecLenghtCSRPtr = cuda.CopyHostToDevice(vecLenght); Stopwatch timer = Stopwatch.StartNew(); CudaHelpers.TransformToCSCFormat2(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements); timer.Stop(); valsCSCPtr = cuda.CopyHostToDevice(vecVals); idxCSCPtr = cuda.CopyHostToDevice(vecIdx); vecLenghtCSCPtr = cuda.CopyHostToDevice(vecLenght); // float[] vecVals2; // int[] vecIdx2; // int[] vecLenght2; // Stopwatch timer2 = Stopwatch.StartNew(); // CudaHelpers.TransformToCSCFormat2(out vecVals2, out vecIdx2, out vecLenght2, sub_prob.Elements); // timer2.Stop(); //var a= vecIdx.SequenceEqual(vecIdx2); //var b= vecVals.SequenceEqual(vecVals2); //var c= vecLenght.SequenceEqual(vecLenght2); #endregion /* * allocate memory for gradient */ alphaMemSize = (uint)(sub_prob.ElementsCount * sizeof(float)); gradPtr = cuda.Allocate(alphaMemSize); gradOldPtr = cuda.Allocate(alphaMemSize); alphaPtr = cuda.Allocate(alphaMemSize); alphaOldPtr = cuda.Allocate(alphaMemSize); alphaTmpPtr = cuda.Allocate(alphaMemSize); /* * reduction blocks for computing Obj */ GetNumThreadsAndBlocks(vecDim, 64, threadsPerBlock, ref threadsForReduceObjW, ref bpgReduceW); reduceObjW = new float[bpgReduceW]; uint reduceWBytes = (uint)bpgReduceW * sizeof(float); reduceObjWPtr = cuda.Allocate(reduceWBytes); /* * reduction size for kernels which operate on alpha */ int reductionSize = problem.ElementsCount; threadsForReduceObjAlpha = 0; GetNumThreadsAndBlocks(problem.ElementsCount, 64, threadsPerBlock, ref threadsForReduceObjAlpha, ref bpgReduceAlpha); uint alphaReductionBytes = (uint)bpgReduceAlpha * sizeof(float); /* * reduction array for computing objective function value */ reduceObjAlpha = new float[bpgReduceAlpha]; reduceObjAlphaPtr = cuda.Allocate(alphaReductionBytes); /* * reduction array for computing gradient max norm */ reduceGradMaxNorm = new float[bpgReduceAlpha]; reduceGradMaxNormPtr = cuda.Allocate(alphaReductionBytes); /* * reduction arrays for computing BB step */ alphaPartReduce = new float[bpgReduceAlpha]; gradPartReduce = new float[bpgReduceAlpha]; alphaGradPartReduce = new float[bpgReduceAlpha]; reduceBBAlphaGradPtr = cuda.Allocate(alphaReductionBytes); reduceBBAlphaPtr = cuda.Allocate(alphaReductionBytes); reduceBBGradPtr = cuda.Allocate(alphaReductionBytes); //float[] wVec = new float[vecDim]; wVecMemSize = (uint)vecDim * sizeof(float); wTempVecPtr = cuda.Allocate(wVecMemSize); //move W wector SetTextureMemory(ref cuWVecTexRef, cudaWVecTexRefName, ref wVecPtr, wVecMemSize); //set texture memory for labels SetTextureMemory(ref cuLabelsTexRef, cudaLabelsTexRefName, sub_prob.Y, ref labelsPtr); SetTextureMemory(ref cuDeltasTexRef, "deltasTexRef", ref deltasPtr, alphaMemSize); diagPtr = cuda.GetModuleGlobal(cuModule, "diag_shift"); stepBBPtr = cuda.GetModuleGlobal(cuModule, "stepBB"); float[] stepData = new float[] { 0.1f }; cuda.CopyHostToDevice(stepBBPtr, stepData); SetCudaParameters(sub_prob); }