Ejemplo n.º 1
0
        public override void Init()
        {
            //it's not necessary to init linear kernel its used only for computing element product
            //linKernel.ProblemElements = problemElements;
            //linKernel.Labels = Labels;
            //linKernel.Init();

            base.Init();


            float[] vecVals;
            int[]   vecIdx;
            int[]   vecLenght;
            CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, problemElements);

            #region cuda initialization

            InitCudaModule();

            //copy data to device, set cuda function parameters
            valsPtr      = cuda.CopyHostToDevice(vecVals);
            idxPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLengthPtr = cuda.CopyHostToDevice(vecLenght);

            uint memSize = (uint)(problemElements.Length * sizeof(float));
            //allocate mapped memory for our results
            outputIntPtr = cuda.HostAllocate(memSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP);
            outputPtr    = cuda.GetHostDevicePointer(outputIntPtr, 0);



            #endregion
            SetCudaFunctionParameters();


            //allocate memory for main vector, size of this vector is the same as dimenson, so many
            //indexes will be zero, but cuda computation is faster



            mainVector = new float[problemElements[0].Dim + 1];
            CudaHelpers.FillDenseVector(problemElements[0], mainVector);

            //get reference to cuda texture for main vector
            //cuMainVecTexRef = cuda.GetModuleTexture(cuModule, cudaMainVecTexRefName);
            //mainVecPtr = cuda.CopyHostToDevice(mainVector);
            //cuda.SetTextureAddress(cuMainVecTexRef, mainVecPtr, (uint)(sizeof(float) * mainVector.Length));

            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuMainVecTexRef, cudaMainVecTexRefName, mainVector, ref mainVecPtr);

            //cuLabelsTexRef = cuda.GetModuleTexture(cuModule, cudaLabelsTexRefName);
            //labelsPtr = cuda.CopyHostToDevice(Labels);
            //uint align = cuda.SetTextureAddress(cuLabelsTexRef, labelsPtr, (uint)(sizeof(float) * Labels.Length));

            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuLabelsTexRef, cudaLabelsTexRefName, Y, ref labelsPtr);
        }
Ejemplo n.º 2
0
        public override void Init()
        {
            linKernel.ProblemElements = problemElements;
            linKernel.Y = Y;
            linKernel.Init();

            base.Init();

            float[] vecVals;
            int[]   vecIdx;
            int[]   vecLenght;
            CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, problemElements);


            selfLinDot = linKernel.DiagonalDotCache;

            #region cuda initialization

            InitCudaModule();

            //copy data to device, set cuda function parameters
            valsPtr      = cuda.CopyHostToDevice(vecVals);
            idxPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLengthPtr = cuda.CopyHostToDevice(vecLenght);


            //!!!!!
            selfLinDotPtr = cuda.CopyHostToDevice(selfLinDot);

            uint memSize = (uint)(problemElements.Length * sizeof(float));
            //allocate mapped memory for our results
            outputIntPtr = cuda.HostAllocate(memSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP);
            outputPtr    = cuda.GetHostDevicePointer(outputIntPtr, 0);

            //normal memory allocation
            //outputPtr = cuda.Allocate((uint)(sizeof(float) * problemElements.Length));


            #endregion

            SetCudaFunctionParameters();

            //allocate memory for main vector, size of this vector is the same as dimension, so many
            //indexes will be zero, but cuda computation is faster
            mainVector = new float[problemElements[0].Dim + 1];
            CudaHelpers.FillDenseVector(problemElements[0], mainVector);

            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuMainVecTexRef, cudaMainVecTexRefName, mainVector, ref mainVecPtr);

            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuLabelsTexRef, cudaLabelsTexRefName, Y, ref labelsPtr);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Predicts the specified elements.
        /// </summary>
        /// <param name="elements">The elements.</param>
        /// <returns>array of predicted labels +1 or -1</returns>
        public override float[] Predict(SparseVec[] elements)
        {
            if (!IsInitialized)
            {
                throw new ApplicationException("Evaluator is not initialized. Call init method");
            }


            //tranfsorm elements to matrix in CSR format
            // elements values
            float[] vecVals;
            //elements indexes
            int[] vecIdx;
            //elements lenght
            int[] vecLenght;
            CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, elements);

            //copy data to device, set cuda function parameters
            valsPtr      = cuda.CopyHostToDevice(vecVals);
            idxPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLenghtPtr = cuda.CopyHostToDevice(vecLenght);

            //release arrays
            vecVals   = null;
            vecIdx    = null;
            vecLenght = null;

            uint memElementsSize = (uint)(elements.Length * sizeof(float));

            //allocate mapped memory for our results
            outputIntPtr = cuda.HostAllocate(memElementsSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP);
            outputPtr    = cuda.GetHostDevicePointer(outputIntPtr, 0);

            //outputPtr = cuda.Allocate(memElementsSize);

            // Set the cuda kernel paramerters
            #region set cuda parameters
            uint Rows = (uint)elements.Length;
            uint Cols = (uint)TrainedModel.SupportElements.Length;


            cuda.SetFunctionBlockShape(cuFunc, blockSizeX, blockSizeY, 1);

            int offset = 0;
            //set elements param
            cuda.SetParameter(cuFunc, offset, valsPtr.Pointer);
            offset += IntPtr.Size;
            cuda.SetParameter(cuFunc, offset, idxPtr.Pointer);
            offset += IntPtr.Size;
            cuda.SetParameter(cuFunc, offset, vecLenghtPtr.Pointer);
            offset += IntPtr.Size;

            //set labels param
            cuda.SetParameter(cuFunc, offset, labelsPtr.Pointer);
            offset += IntPtr.Size;
            //set alphas param
            cuda.SetParameter(cuFunc, offset, alphasPtr.Pointer);
            offset += IntPtr.Size;
            //set output (reslut) param
            cuda.SetParameter(cuFunc, offset, outputPtr.Pointer);
            offset += IntPtr.Size;
            //set number of elements param
            cuda.SetParameter(cuFunc, offset, (uint)Rows);
            offset += sizeof(int);
            //set number of support vectors param
            cuda.SetParameter(cuFunc, offset, (uint)Cols);
            offset += sizeof(int);
            //set support vector index param
            lastParameterOffset = offset;
            cuda.SetParameter(cuFunc, offset, (uint)0);
            offset += sizeof(int);
            cuda.SetParameterSize(cuFunc, (uint)offset);
            #endregion

            int gridDimX = (int)Math.Ceiling((Rows + 0.0) / (blockSizeX));


            for (int k = 0; k < TrainedModel.SupportElements.Length; k++)
            {
                //set the buffer values from k-th support vector
                CudaHelpers.InitBuffer(TrainedModel.SupportElements[k], svVecIntPtrs[k % 2]);

                cuda.SynchronizeStream(stream);
                //copy asynchronously from buffer to devece
                cuda.CopyHostToDeviceAsync(mainVecPtr, svVecIntPtrs[k % 2], memSvSize, stream);
                //set the last parameter in kernel (column index)
                // colIndexParamOffset
                cuda.SetParameter(cuFunc, lastParameterOffset, (uint)k);
                //launch kernl
                cuda.LaunchAsync(cuFunc, gridDimX, 1, stream);

                if (k > 0)
                {
                    //clear the previous host buffer
                    CudaHelpers.SetBufferIdx(TrainedModel.SupportElements[k - 1], svVecIntPtrs[(k + 1) % 2], 0.0f);
                }
            }

            //CUdeviceptr symbolAdr;
            //CUDARuntime.cudaGetSymbolAddress(ref symbolAdr,"RHO");
            rho = TrainedModel.Bias;
            //IntPtr symbolVal = new IntPtr(&rho);
            //CUDARuntime.cudaMemcpyToSymbol("RHO", symbolVal, 1, 1, cudaMemcpyKind.cudaMemcpyHostToDevice);

            cuda.SetFunctionBlockShape(cuFuncSign, blockSizeX, blockSizeY, 1);
            int signFuncOffset = 0;
            //set array param
            cuda.SetParameter(cuFuncSign, signFuncOffset, outputPtr.Pointer);
            signFuncOffset += IntPtr.Size;
            //set size
            cuda.SetParameter(cuFuncSign, signFuncOffset, Rows);
            signFuncOffset += sizeof(int);

            cuda.SetParameter(cuFuncSign, signFuncOffset, rho);
            signFuncOffset += sizeof(float);

            cuda.SetParameterSize(cuFuncSign, (uint)signFuncOffset);


            //gridDimX is valid for this function
            cuda.LaunchAsync(cuFuncSign, gridDimX, 1, stream);

            //wait for all computation
            cuda.SynchronizeContext();


            float[] result = new float[elements.Length];
            //copy result
            Marshal.Copy(outputIntPtr, result, 0, elements.Length);

            return(result);
        }