/// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> /// <param name="y">Result</param> public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y) { if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) throw new Exception("M, x and y dimensions not compatible"); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow }; //Ideally matrix dimension should be a multiple of 4, but OK if it's not kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2)); } else { y.VectorData = MultiplyNoCL(M, x); } }
/// <summary>Computes dot product of two vectors and stores result in /// dotProdSum</summary> private void CLDotProd(CLImgVector v1, CLImgVector v2) { int[] vlenby4 = new int[] { (v1.Length >> 2) + 1 }; vLenBy4.WriteToDevice(vlenby4); //Computes products and most sums CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { v1.CLVector, v2.CLVector, dotProd, vLenBy4 }; //kernelDotProduct.Execute(args, GLOBALWORKSIZE); kernelDotProduct.Execute(args, new int[] { GLOBALWORKSIZE }, new int[] { (int)CLCalc.CLDevices[CLCalc.Program.DefaultCQ].MaxWorkItemSizes[0] }); //Sums what's left int i = GLOBALWORKSIZE >> 3; args = new CLCalc.Program.MemoryObject[] { dotProd }; while (i > 0) { kernelSum.Execute(args, i); i = (i >> 1); } //Reads final value args = new CLCalc.Program.MemoryObject[] { dotProd, dotProdSum }; kernelGetDotSum.Execute(args, 1); }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> /// <param name="x">Initial guess</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); //x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) temp = new CLImgVector(n); if (x == null || x.Length != n) x = new CLImgVector(n); float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while (count<1 || ((RDotR > tol) && (count < n*MAXITER))) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
static Kernels() { try { CLCalc.Program.Compile(src); CLCalc.Program.MemoryObject[] Args = new CLCalc.Program.MemoryObject[100]; ; int globalWorkSize = 4; // compile the kernels KernelStart = new CLCalc.Program.Kernel("KernelStart"); coalesced = new CLCalc.Program.Kernel("coalesced"); // run kernel start KernelStart.Execute(Args, globalWorkSize); } catch (NullReferenceException nre) { System.Console.WriteLine("" + nre); } // System.Diagnostics.Debug.WriteLine("Hello"); }
/// <summary>Computes the i-th line of matrix K[i][j]</summary> /// <param name="problemSolution">SVM to solve</param> /// <param name="i">Kernel line number to compute</param> private static void CLComputeKernels(SVM problemSolution, int i) { if (problemSolution.TrainingSet.IsKernelCalculated[i]) return; problemSolution.TrainingSet.kernels[i] = new float[problemSolution.TrainingSet.getN]; TrainingSet trainingSet = problemSolution.TrainingSet; trainingSet.IsKernelCalculated[i] = true; for (int j = 0; j < trainingSet.trainingArray[i].xVector.Length; j++) problemSolution.HostSample[j] = trainingSet.trainingArray[i].xVector[j]; problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); //OpenCL Kernel execution CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; lock (CLResource) { kernelComputeKernelRBF.Execute(args, trainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(trainingSet.kernels[i]); } }
/// <summary>Classifies multiple samples stored in OpenCL memory</summary> /// <param name="Samples">Samples data to classify</param> /// <param name="svm">SVM to use as classifier</param> public static float[] MultiClassify(SVM svm, CLCalc.Program.Image2D Samples) { float[] resp = new float[Samples.Height]; //svm.WriteToDevice(); if ((Samples.Width << 2) != svm.HostVLen[0]) throw new Exception("Invalid Samples width, should be the same length of training features"); if (svm.CLKernelValuesMultiClassify == null || svm.CLKernelValuesMultiClassify.OriginalVarLength != svm.alphaList.Count * Samples.Height) { svm.CLKernelValuesMultiClassify = new CLCalc.Program.Variable(new float[svm.alphaList.Count * Samples.Height]); } if (svm.CLAlphas == null || svm.CLAlphas.OriginalVarLength != svm.alphaList.Count) { svm.CLAlphas = new CLCalc.Program.Variable(svm.alphaList.ToArray()); float[] ys = new float[svm.TrainingSet.trainingArray.Count]; for (int i = 0; i < ys.Length; i++) ys[i] = svm.TrainingSet.trainingArray[i].y; svm.CLys = new CLCalc.Program.Variable(ys); } if (svm.CLb==null) { svm.CLb = new CLCalc.Program.Variable(new float[] { svm.b }); svm.CLQtdSupVecs = new CLCalc.Program.Variable(new int[] { svm.alphaList.Count }); CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } if (CLMultiClassifSums.OriginalVarLength != Samples.Height) { CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } //svm.CLAlphas.WriteToDevice(svm.alphaList.ToArray()); //svm.CLys.WriteToDevice(ys); //svm.CLb.WriteToDevice(new float[] { svm.b }); //svm.CLQtdSupVecs.WriteToDevice(new int[] { svm.alphaList.Count }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { svm.CLTrainingFeatures, svm.CLQtdSupVecs, svm.CLXVecLen, Samples, svm.CLKernelValuesMultiClassify, svm.CLLambda }; kernelComputeMultiKernelRBF.Execute(args, new int[] { svm.alphaList.Count, Samples.Height }); CLCalc.Program.Sync(); args = new CLCalc.Program.MemoryObject[] { svm.CLAlphas, svm.CLQtdSupVecs, svm.CLXVecLen, svm.CLys, svm.CLKernelValuesMultiClassify, svm.CLb, CLMultiClassifSums }; kernelSumKernels.Execute(args, Samples.Height); CLMultiClassifSums.ReadFromDeviceTo(resp); return resp; }
/// <summary> /// Predicts the output of a single entry, given a previous problem, solution and correspondent training set /// </summary> /// <param name="problemSolution">Correspondent problem solution</param> /// <param name="untrainedUnit">Input features from which the output will be predicted</param> /// <returns>The y classification (true/false = positive/negative)</returns> public static float CLpredictOutput(SVM problemSolution, TrainingUnit untrainedUnit) { TrainingSet trainingSet = problemSolution.TrainingSet; ProblemConfig problemConfig = problemSolution.ProblemCfg; #region Compute kernel float[] K = new float[problemSolution.TrainingSet.getN]; CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; for (int j = 0; j < untrainedUnit.xVector.Length; j++) problemSolution.HostSample[j] = untrainedUnit.xVector[j]; problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); lock (CLResource) { kernelComputeKernelRBF.Execute(args, problemSolution.TrainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(K); } #endregion // F(x) = sum + b // sum = summation of alpha_i * y_i * kernel(untrained unit, i) for all i in the training set float sum = 0; for (int i = 0; i < trainingSet.getN; i++) { if (trainingSet.trainingArray[i].y > 0) sum += problemSolution.alphaList[i] * K[i]; else sum -= problemSolution.alphaList[i] * K[i]; } return sum + problemSolution.b; }