Beispiel #1
0
        private void SetCudaDataForEllpack()
        {
            float[] vecVals;
            int[]   vecColIdx;
            int[]   vecLenght;


            CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements);

            evalBlocks = (sizeSV + evalThreads - 1) / evalThreads;

            //copy data to device, set cuda function parameters
            valsPtr      = cuda.CopyHostToDevice(vecVals);
            idxPtr       = cuda.CopyHostToDevice(vecColIdx);
            vecLengthPtr = cuda.CopyHostToDevice(vecLenght);
        }
        private void SetCudaDataForEllpack()
        {
            float[] vecVals;
            int[]   vecColIdx;
            int[]   vecLenght;


            CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements);

            float[] selfSum = TrainedModel.SupportElements.AsParallel().Select(c => c.Values.Sum()).ToArray();

            evalBlocks = (sizeSV + evalThreads - 1) / evalThreads;

            //copy data to device, set cuda function parameters
            valsPtr      = cuda.CopyHostToDevice(vecVals);
            idxPtr       = cuda.CopyHostToDevice(vecColIdx);
            vecLengthPtr = cuda.CopyHostToDevice(vecLenght);

            selfSumPtr = cuda.CopyHostToDevice(selfSum);
        }
Beispiel #3
0
        public override void Init()
        {
            linKernel.ProblemElements = problemElements;
            linKernel.Y = Y;
            linKernel.Init();

            base.Init();

            float[] vecVals;
            int[]   vecColIdx;
            int[]   vecLenght;

            int align = preFetch;

            CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, problemElements, align);
            // CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, problemElements);

            selfLinDot = linKernel.DiagonalDotCache;

            #region cuda initialization

            InitCudaModule();


            //copy data to device, set cuda function parameters
            valsPtr = cuda.CopyHostToDevice(vecVals);

            idxPtr       = cuda.CopyHostToDevice(vecColIdx);
            vecLengthPtr = cuda.CopyHostToDevice(vecLenght);


            labelsPtr = cuda.CopyHostToDevice(Y);

            selfLinDotPtr = cuda.CopyHostToDevice(selfLinDot);

            uint memSize = (uint)(2 * problemElements.Length * sizeof(float));
            //allocate mapped memory for our results
            //CUDARuntime.cudaSetDeviceFlags(CUDARuntime.cudaDeviceMapHost);



            // var e= CUDADriver.cuMemHostAlloc(ref outputIntPtr, memSize, 8);
            //CUDARuntime.cudaHostAlloc(ref outputIntPtr, memSize, CUDARuntime.cudaHostAllocMapped);
            //var errMsg=CUDARuntime.cudaGetErrorString(e);
            //cuda.HostRegister(outputIntPtr,memSize, Cuda)
            outputIntPtr = cuda.HostAllocate(memSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP);
            outputPtr    = cuda.GetHostDevicePointer(outputIntPtr, 0);

            //normal memory allocation
            //outputPtr = cuda.Allocate((uint)(sizeof(float) * problemElements.Length));


            #endregion

            SetCudaFunctionParameters();

            //allocate memory for main vector, size of this vector is the same as dimenson, so many
            //indexes will be zero, but cuda computation is faster
            VectorI = new float[problemElements[0].Dim + 1];
            VectorJ = new float[problemElements[0].Dim + 1];

            CudaHelpers.FillDenseVector(problemElements[0], VectorI);
            CudaHelpers.FillDenseVector(problemElements[1], VectorJ);

            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuVecI_TexRef, cuVecITexRefName, VectorI, ref VecIPtr);
            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuVecJ_TexRef, cuVecJTexRefName, VectorJ, ref VecJPtr);
        }