Example #1
0
        /// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary>
        /// <param name="M">Sparse matrix</param>
        /// <param name="x">Vector to be multiplied</param>
        /// <param name="y">Result</param>
        public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y)
        {
            if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) throw new Exception("M, x and y dimensions not compatible");

            if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL)
            {
                CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow });
                CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow };

                //Ideally matrix dimension should be a multiple of 4, but OK if it's not
                kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2));
            }
            else
            {
                y.VectorData = MultiplyNoCL(M, x);
            }
        }
Example #2
0
        /// <summary>Computes dot product of two vectors and stores result in
        /// dotProdSum</summary>
        private void CLDotProd(CLImgVector v1, CLImgVector v2)
        {
            int[] vlenby4 = new int[] { (v1.Length >> 2) + 1 };

            vLenBy4.WriteToDevice(vlenby4);

            //Computes products and most sums
            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { v1.CLVector, v2.CLVector, dotProd, vLenBy4 };

            //kernelDotProduct.Execute(args, GLOBALWORKSIZE);
            kernelDotProduct.Execute(args, new int[] { GLOBALWORKSIZE }, new int[] { (int)CLCalc.CLDevices[CLCalc.Program.DefaultCQ].MaxWorkItemSizes[0] });

            //Sums what's left
            int i = GLOBALWORKSIZE >> 3;
            args = new CLCalc.Program.MemoryObject[] { dotProd };
            while (i > 0)
            {
                kernelSum.Execute(args, i);
                i = (i >> 1);
            }

            //Reads final value
            args = new CLCalc.Program.MemoryObject[] { dotProd, dotProdSum };
            kernelGetDotSum.Execute(args, 1);
        }
Example #3
0
        /// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary>
        /// <param name="M">Matrix M</param>
        /// <param name="b">Vector b</param>
        /// <param name="tol">Error tolerance</param>
        /// <param name="x">Initial guess</param>
        public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x)
        {
            int n = b.Length;
            int nBy4 = 1 + ((n - 1) >> 2);

            if (lambda == null)
            {
                lambda = new float[1];
                CLlambda = new CLCalc.Program.Variable(lambda);
            }

            if (r == null || r.Length != n)
            {
                r = new CLImgVector(n);
                p = new CLImgVector(n);
                //x = new CLImgVector(n);
                Ap = new CLImgVector(n);
                temp = new CLImgVector(n);
            }
            if (temp == null) temp = new CLImgVector(n);

            if (x == null || x.Length != n) x = new CLImgVector(n);

            float alpha, beta, RDotROld, RDotR;

            //Initialization
            Multiply(M, x, Ap);

            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector };
            kernelInitRP.Execute(args, nBy4);

            //Loop
            int count = 0;

            RDotR = DotProduct(r, r);

            while (count<1 || ((RDotR > tol) && (count < n*MAXITER)))
            {
                RDotROld = RDotR;

                //if ((count & 0x0080) == 0)
                //{
                //    Multiply(M, x, Ap);

                //    args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector };
                //    kernelInitRP.Execute(args, nBy4);
                //}

                Multiply(M, p, Ap);

                alpha = RDotROld / DotProduct(Ap, p);

                //Update x
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4);
                lambda[0] = alpha; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4);

                //Update r
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4);
                lambda[0] = -alpha; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4);

                RDotR = DotProduct(r, r);
                beta = RDotR / RDotROld;

                //Update p
                kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4);
                lambda[0] = beta; CLlambda.WriteToDevice(lambda);
                kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4);

                count++;
            }
        }
        static Kernels()
        {
            try
            {
                CLCalc.Program.Compile(src);
                CLCalc.Program.MemoryObject[] Args = new CLCalc.Program.MemoryObject[100]; ;
                int globalWorkSize = 4;

                // compile the kernels

                KernelStart = new CLCalc.Program.Kernel("KernelStart");
                coalesced = new CLCalc.Program.Kernel("coalesced");

                // run kernel start

                KernelStart.Execute(Args, globalWorkSize);
            }
            catch (NullReferenceException nre)
            {
                System.Console.WriteLine("" + nre);
            }

            //           System.Diagnostics.Debug.WriteLine("Hello");
        }
Example #5
0
        /// <summary>Computes the i-th line of matrix K[i][j]</summary>
        /// <param name="problemSolution">SVM to solve</param>
        /// <param name="i">Kernel line number to compute</param>
        private static void CLComputeKernels(SVM problemSolution, int i)
        {
            if (problemSolution.TrainingSet.IsKernelCalculated[i]) return;
            problemSolution.TrainingSet.kernels[i] = new float[problemSolution.TrainingSet.getN];

            TrainingSet trainingSet = problemSolution.TrainingSet;

            trainingSet.IsKernelCalculated[i] = true;

            for (int j = 0; j < trainingSet.trainingArray[i].xVector.Length; j++)
                problemSolution.HostSample[j] = trainingSet.trainingArray[i].xVector[j];
            problemSolution.CLSample.WriteToDevice(problemSolution.HostSample);

            //OpenCL Kernel execution
            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[]
            {
                problemSolution.CLTrainingFeatures,
                problemSolution.CLXVecLen,
                problemSolution.CLSample,
                problemSolution.CLKernelValues,
                problemSolution.CLLambda
            };

            lock (CLResource)
            {
                kernelComputeKernelRBF.Execute(args, trainingSet.getN);
                problemSolution.CLKernelValues.ReadFromDeviceTo(trainingSet.kernels[i]);
            }
        }
Example #6
0
        /// <summary>Classifies multiple samples stored in OpenCL memory</summary>
        /// <param name="Samples">Samples data to classify</param>
        /// <param name="svm">SVM to use as classifier</param>
        public static float[] MultiClassify(SVM svm, CLCalc.Program.Image2D Samples)
        {
            float[] resp = new float[Samples.Height];

            //svm.WriteToDevice();

            if ((Samples.Width << 2) != svm.HostVLen[0]) throw new Exception("Invalid Samples width, should be the same length of training features");

            if (svm.CLKernelValuesMultiClassify == null || svm.CLKernelValuesMultiClassify.OriginalVarLength != svm.alphaList.Count * Samples.Height)
            {
                svm.CLKernelValuesMultiClassify = new CLCalc.Program.Variable(new float[svm.alphaList.Count * Samples.Height]);
            }

            if (svm.CLAlphas == null || svm.CLAlphas.OriginalVarLength != svm.alphaList.Count)
            {
                svm.CLAlphas = new CLCalc.Program.Variable(svm.alphaList.ToArray());

                float[] ys = new float[svm.TrainingSet.trainingArray.Count];
                for (int i = 0; i < ys.Length; i++) ys[i] = svm.TrainingSet.trainingArray[i].y;

                svm.CLys = new CLCalc.Program.Variable(ys);
            }
            if (svm.CLb==null)
            {
                svm.CLb = new CLCalc.Program.Variable(new float[] { svm.b });
                svm.CLQtdSupVecs = new CLCalc.Program.Variable(new int[] { svm.alphaList.Count });
                CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]);
            }

            if (CLMultiClassifSums.OriginalVarLength != Samples.Height)
            {
                CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]);
            }

            //svm.CLAlphas.WriteToDevice(svm.alphaList.ToArray());
            //svm.CLys.WriteToDevice(ys);
            //svm.CLb.WriteToDevice(new float[] { svm.b });
            //svm.CLQtdSupVecs.WriteToDevice(new int[] { svm.alphaList.Count });

            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { svm.CLTrainingFeatures, svm.CLQtdSupVecs, svm.CLXVecLen, Samples, svm.CLKernelValuesMultiClassify, svm.CLLambda };
            kernelComputeMultiKernelRBF.Execute(args, new int[] { svm.alphaList.Count, Samples.Height });

            CLCalc.Program.Sync();

            args = new CLCalc.Program.MemoryObject[] { svm.CLAlphas, svm.CLQtdSupVecs, svm.CLXVecLen, svm.CLys, svm.CLKernelValuesMultiClassify, svm.CLb, CLMultiClassifSums };
            kernelSumKernels.Execute(args, Samples.Height);

            CLMultiClassifSums.ReadFromDeviceTo(resp);
            return resp;
        }
Example #7
0
        /// <summary>
        /// Predicts the output of a single entry, given a previous problem, solution and correspondent training set
        /// </summary>
        /// <param name="problemSolution">Correspondent problem solution</param>
        /// <param name="untrainedUnit">Input features from which the output will be predicted</param>
        /// <returns>The y classification (true/false = positive/negative)</returns>
        public static float CLpredictOutput(SVM problemSolution, TrainingUnit untrainedUnit)
        {
            TrainingSet trainingSet = problemSolution.TrainingSet;
            ProblemConfig problemConfig = problemSolution.ProblemCfg;

            #region Compute kernel
            float[] K = new float[problemSolution.TrainingSet.getN];

            CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[]
            {
                problemSolution.CLTrainingFeatures,
                problemSolution.CLXVecLen,
                problemSolution.CLSample,
                problemSolution.CLKernelValues,
                problemSolution.CLLambda
            };

            for (int j = 0; j < untrainedUnit.xVector.Length; j++)
                problemSolution.HostSample[j] = untrainedUnit.xVector[j];

            problemSolution.CLSample.WriteToDevice(problemSolution.HostSample);

            lock (CLResource)
            {
                kernelComputeKernelRBF.Execute(args, problemSolution.TrainingSet.getN);
                problemSolution.CLKernelValues.ReadFromDeviceTo(K);
            }
            #endregion

            // F(x) = sum + b
            // sum = summation of alpha_i * y_i * kernel(untrained unit, i) for all i in the training set
            float sum = 0;
            for (int i = 0; i < trainingSet.getN; i++)
            {
                if (trainingSet.trainingArray[i].y > 0)
                    sum += problemSolution.alphaList[i] * K[i];
                else
                    sum -= problemSolution.alphaList[i] * K[i];
            }

            return sum + problemSolution.b;
        }