Beispiel #1
0
        public void TransformToCSCFormatTest()
        {
            float[] vecVals   = null; // TODO: Initialize to an appropriate value
            int[]   vecIdx    = null; // TODO: Initialize to an appropriate value
            int[]   vecLenght = null; // TODO: Initialize to an appropriate value


            int[]   vecIdxExpected    = new int[] { 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3 };
            float[] vecValsExpected   = new float[] { 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3, 0, 2, 4, 1, 3 };
            int[]   vecLenghtExpected = new int[] { 0, 3, 5, 8, 10, 13, 15, 18, 20, 23, 25 };

            CudaHelpers.TransformToCSCFormat(out vecVals, out vecIdx, out vecLenght, problemElements);

            for (int i = 0; i < vecIdxExpected.Length; i++)
            {
                Assert.AreEqual(vecIdxExpected[i], vecIdx[i], "vec idx different at position" + i);
            }

            for (int i = 0; i < vecValsExpected.Length; i++)
            {
                Assert.AreEqual(vecValsExpected[i], vecVals[i], "vec val different at position" + i);
            }

            for (int i = 0; i < vecLenghtExpected.Length; i++)
            {
                Assert.AreEqual(vecLenghtExpected[i], vecLenght[i], "vec lenght different at position" + i);
            }
        }
Beispiel #2
0
        private void SetCudaData(Problem <SparseVec> sub_prob)
        {
            int vecDim = sub_prob.FeaturesCount;//.Elements[0].Dim;

            /*
             * copy vectors to CUDA device
             */

            #region copy trainning examples to GPU

            float[] vecVals;
            int[]   vecIdx;
            int[]   vecLenght;
            CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements);
            valsCSRPtr      = cuda.CopyHostToDevice(vecVals);
            idxCSRPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLenghtCSRPtr = cuda.CopyHostToDevice(vecLenght);


            CudaHelpers.TransformToCSCFormat(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements);
            valsCSCPtr      = cuda.CopyHostToDevice(vecVals);
            idxCSCPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLenghtCSCPtr = cuda.CopyHostToDevice(vecLenght);

            #endregion

            /*
             * allocate memory for gradient
             */
            alphaMemSize = (uint)(sub_prob.ElementsCount * sizeof(float));

            gradPtr    = cuda.Allocate(alphaMemSize);
            gradOldPtr = cuda.Allocate(alphaMemSize);

            alphaPtr    = cuda.Allocate(alphaMemSize);
            alphaOldPtr = cuda.Allocate(alphaMemSize);
            alphaTmpPtr = cuda.Allocate(alphaMemSize);


            /*
             * reduction blocks for computing Obj
             */



            GetNumThreadsAndBlocks(vecDim, 64, threadsPerBlock, ref threadsForReduceObjW, ref bpgReduceW);

            reduceObjW = new float[bpgReduceW];
            uint reduceWBytes = (uint)bpgReduceW * sizeof(float);
            reduceObjWPtr = cuda.Allocate(reduceWBytes);

            /*
             * reduction size for kernels which operate on alpha
             */
            int reductionSize = problem.ElementsCount;
            threadsForReduceObjAlpha = 0;

            GetNumThreadsAndBlocks(problem.ElementsCount, 64, threadsPerBlock, ref threadsForReduceObjAlpha, ref bpgReduceAlpha);

            uint alphaReductionBytes = (uint)bpgReduceAlpha * sizeof(float);

            /*
             * reduction array for computing objective function value
             */

            reduceObjAlpha    = new float[bpgReduceAlpha];
            reduceObjAlphaPtr = cuda.Allocate(alphaReductionBytes);


            /*
             * reduction arrays for computing BB step
             */
            alphaPartReduce     = new float[bpgReduceAlpha];
            gradPartReduce      = new float[bpgReduceAlpha];
            alphaGradPartReduce = new float[bpgReduceAlpha];

            reduceBBAlphaGradPtr = cuda.Allocate(alphaReductionBytes);
            reduceBBAlphaPtr     = cuda.Allocate(alphaReductionBytes);
            reduceBBGradPtr      = cuda.Allocate(alphaReductionBytes);

            /*
             * reduction arrays for comuting lin part
             */
            reduceLinPart    = new float[bpgReduceAlpha];
            reduceLinPartPtr = cuda.Allocate(alphaReductionBytes);



            //float[] wVec = new float[vecDim];
            wVecMemSize = (uint)vecDim * sizeof(float);
            wTempVecPtr = cuda.Allocate(wVecMemSize);
            //move W wector
            SetTextureMemory(ref cuWVecTexRef, cudaWVecTexRefName, ref wVecPtr, wVecMemSize);

            //set texture memory for labels
            SetTextureMemory(ref cuLabelsTexRef, cudaLabelsTexRefName, sub_prob.Y, ref labelsPtr);


            SetTextureMemory(ref cuDeltasTexRef, "deltasTexRef", ref deltasPtr, alphaMemSize);

            diagPtr = cuda.GetModuleGlobal(cuModule, "diag_shift");


            stepBBPtr = cuda.GetModuleGlobal(cuModule, "stepBB");
            float[] stepData = new float[] { 0.1f };
            cuda.CopyHostToDevice(stepBBPtr, stepData);

            SetCudaParameters(sub_prob);
        }
Beispiel #3
0
        private void SetCudaData(Problem <SparseVec> sub_prob)
        {
            int vecDim = sub_prob.Elements[0].Dim;

            /*
             * copy vectors to CUDA device
             */
            float[] vecVals;
            int[]   vecIdx;
            int[]   vecLenght;
            CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements);
            valsCSRPtr      = cuda.CopyHostToDevice(vecVals);
            idxCSRPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLenghtCSRPtr = cuda.CopyHostToDevice(vecLenght);


            CudaHelpers.TransformToCSCFormat(out vecVals, out vecIdx, out vecLenght, sub_prob.Elements);
            valsCSCPtr      = cuda.CopyHostToDevice(vecVals);
            idxCSCPtr       = cuda.CopyHostToDevice(vecIdx);
            vecLenghtCSCPtr = cuda.CopyHostToDevice(vecLenght);



            /*
             * allocate memory for gradient
             */
            uint memSize = (uint)(sub_prob.ElementsCount * sizeof(float));

            //allocate mapped memory for our results (dot product beetween vector W and all elements)
            gradIntPtr = cuda.HostAllocate(memSize, CUDADriver.CU_MEMHOSTALLOC_DEVICEMAP);
            gradPtr    = cuda.GetHostDevicePointer(gradIntPtr, 0);

            //allocate memory for main vector, size of this vector is the same as dimenson, so many
            //indexes will be zero, but cuda computation is faster
            mainVector = new float[vecDim];


            //move W wector
            //CudaHelpers.FillDenseVector(problemElements[0], mainVector);
            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuMainVecTexRef, cudaMainVecTexRefName, mainVector, ref mainVecPtr);


            //set texture memory for labels
            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuLabelsTexRef, cudaLabelsTexRefName, sub_prob.Y, ref labelsPtr);


            /*
             * data for cuda solver
             */

            //normaly for L2 solver QDii= xii*xii+Diag_i
            //where Diag_i = 0.5/Cp if yi=1
            //      Diag_i = 0.5/Cn if yi=-1
            //but we will add this on GPU
            QD     = new float[sub_prob.ElementsCount];
            alpha  = new float[sub_prob.ElementsCount];
            deltas = new float[sub_prob.ElementsCount];
            float[] diag = new float[3];
            for (int i = 0; i < sub_prob.ElementsCount; i++)
            {
                QD[i]     = sub_prob.Elements[i].DotProduct();
                alpha[i]  = 0f;
                deltas[i] = 0;
            }

            qdPtr = cuda.CopyHostToDevice(QD);

            alphaPtr = cuda.Allocate(alpha);


            //deltasPtr = cuda.Allocate(deltas);
            CudaHelpers.SetTextureMemory(cuda, cuModule, ref cuDeltasTexRef, "deltasTexRef", deltas, ref deltasPtr);

            diagPtr = cuda.GetModuleGlobal(cuModule, "diag_shift");
            //set this in fill function
            //cuda.CopyHostToDevice(diagPtr, diag);

            //CUdeviceptr dimPtr = cuda.GetModuleGlobal(cuModule, "Dim");
            ////todo: check if it ok
            ////cuda.Memset(dimPtr,(uint) vecDim, 1);
            //int[] dimArr = new int[] { vecDim };
            //cuda.CopyHostToDevice(dimPtr,dimArr);

            //CUDARuntime.cudaMemcpyToSymbol("Dim", dimPtr, 1, 0, cudaMemcpyKind.cudaMemcpyHostToDevice);
            //CUDARuntime.cudaMemcpyToSymbol("Dim", ,1,0, cudaMemcpyKind.cudaMemcpyHostToDevice);

            CUdeviceptr deltaScalingPtr = cuda.GetModuleGlobal(cuModule, "stepScaling");

            //two ways of computing scaling param, should be the same, but it depends on rounding.
            //stepScaling = (float)(1.0 / Math.Sqrt(sub_prob.ElementsCount));

            stepScaling = 0.0002f;// (float)(1.0 / sub_prob.ElementsCount);

            //set scaling constant
            float[] scArr = new float[] { stepScaling };
            cuda.CopyHostToDevice(deltaScalingPtr, scArr);
            //cuda.Memset(deltaScalingPtr, (uint) scaling,sizeof(float));

            //cuda.CopyHostToDevice(dimPtr, problem.Elements[0].Dim);

            SetCudaParameters(sub_prob);
        }