/// <summary>Computes the i-th line of matrix K[i][j]</summary> /// <param name="problemSolution">SVM to solve</param> /// <param name="i">Kernel line number to compute</param> private static void CLComputeKernels(SVM problemSolution, int i) { if (problemSolution.TrainingSet.IsKernelCalculated[i]) { return; } problemSolution.TrainingSet.kernels[i] = new float[problemSolution.TrainingSet.getN]; TrainingSet trainingSet = problemSolution.TrainingSet; trainingSet.IsKernelCalculated[i] = true; for (int j = 0; j < trainingSet.trainingArray[i].xVector.Length; j++) { problemSolution.HostSample[j] = trainingSet.trainingArray[i].xVector[j]; } problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); //OpenCL Kernel execution CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; lock (CLResource) { kernelComputeKernelRBF.Execute(args, trainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(trainingSet.kernels[i]); } }
/// <summary>Computes dot product of two vectors and stores result in /// dotProdSum</summary> private void CLDotProd(CLImgVector v1, CLImgVector v2) { int[] vlenby4 = new int[] { (v1.Length >> 2) + 1 }; vLenBy4.WriteToDevice(vlenby4); //Computes products and most sums CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { v1.CLVector, v2.CLVector, dotProd, vLenBy4 }; //kernelDotProduct.Execute(args, GLOBALWORKSIZE); kernelDotProduct.Execute(args, new int[] { GLOBALWORKSIZE }, new int[] { (int)CLCalc.CLDevices[CLCalc.Program.DefaultCQ].MaxWorkItemSizes[0] }); //Sums what's left int i = GLOBALWORKSIZE >> 3; args = new CLCalc.Program.MemoryObject[] { dotProd }; while (i > 0) { kernelSum.Execute(args, i); i = (i >> 1); } //Reads final value args = new CLCalc.Program.MemoryObject[] { dotProd, dotProdSum }; kernelGetDotSum.Execute(args, 1); }
/// <summary>Computes frame difference</summary> private void ComputeFrameDiff() { //Needs both images to compute if (CLBmp == null || CLBmpPrev == null || CLBmp.Width != CLBmpPrev.Width || CLBmp.Height != CLBmpPrev.Height) { return; } if (frameDiff == null || frameDiff.Length != ((CLBmp.Height * CLBmp.Width) >> 6)) { //Reduces image size by 8 frameDiff = new byte[(CLBmp.Height * CLBmp.Width) >> 6]; CLframeDiff = new CLCalc.Program.Variable(frameDiff); MovingRegionBoxes = new List <int>(); } CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLBmp, CLBmpPrev, CLframeDiff }; kernelComputeFrameDiff.Execute(args, new int[] { CLBmp.Width >> 3, CLBmp.Height >> 3 }); CLframeDiff.ReadFromDeviceTo(frameDiff); MovingRegionBoxes.Clear(); BracketMovingRegions(frameDiff, CLBmp.Width >> 3, CLBmp.Height >> 3, MovingRegionBoxes); }
/// <summary>Apply filter to an image</summary> /// <param name="id">Filter index, CLFilters[id], to apply</param> /// <param name="bmp">Bitmap to be processes</param> public static Bitmap ApplyFilter(int id, Bitmap bmp) { //if (bmp.Width < 4096) //{ // CLCalc.Program.Image2D CLImgSrc = new CLCalc.Program.Image2D(bmp); // CLCalc.Program.Image2D CLImgDst = new CLCalc.Program.Image2D(bmp); // CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLImgSrc, CLImgDst }; // CLFilters[id].FilterKernel.Execute(args, new int[] { bmp.Width - 7, bmp.Height - 7 }); // return CLImgDst.ReadBitmap(); //} //else //{ //Pictures can be too big; it's necessary to split List <Bitmap> bmps = MPOReader.SplitJPS(bmp); CLCalc.Program.Image2D CLImgSrc0 = new CLCalc.Program.Image2D(bmps[0]); CLCalc.Program.Image2D CLImgDst0 = new CLCalc.Program.Image2D(bmps[0]); CLCalc.Program.MemoryObject[] args0 = new CLCalc.Program.MemoryObject[] { CLImgSrc0, CLImgDst0 }; CLFilters[id].FilterKernel.Execute(args0, new int[] { bmps[0].Width - 7, bmps[0].Height - 7 }); CLCalc.Program.Image2D CLImgSrc1 = new CLCalc.Program.Image2D(bmps[1]); CLCalc.Program.Image2D CLImgDst1 = new CLCalc.Program.Image2D(bmps[1]); CLCalc.Program.MemoryObject[] args1 = new CLCalc.Program.MemoryObject[] { CLImgSrc1, CLImgDst1 }; CLFilters[id].FilterKernel.Execute(args1, new int[] { bmps[1].Width - 7, bmps[1].Height - 7 }); Bitmap bmpL = CLImgDst0.ReadBitmap(); Bitmap bmpR = CLImgDst1.ReadBitmap(); return(MPOReader.AssembleJPS(bmpL, bmpR)); //} }
/// <summary>Classifies multiple samples stored in OpenCL memory</summary> /// <param name="Samples">Samples data to classify</param> /// <param name="svm">SVM to use as classifier</param> public static float[] MultiClassify(SVM svm, CLCalc.Program.Image2D Samples) { float[] resp = new float[Samples.Height]; //svm.WriteToDevice(); if ((Samples.Width << 2) != svm.HostVLen[0]) { throw new Exception("Invalid Samples width, should be the same length of training features"); } if (svm.CLKernelValuesMultiClassify == null || svm.CLKernelValuesMultiClassify.OriginalVarLength != svm.alphaList.Count * Samples.Height) { svm.CLKernelValuesMultiClassify = new CLCalc.Program.Variable(new float[svm.alphaList.Count * Samples.Height]); } if (svm.CLAlphas == null || svm.CLAlphas.OriginalVarLength != svm.alphaList.Count) { svm.CLAlphas = new CLCalc.Program.Variable(svm.alphaList.ToArray()); float[] ys = new float[svm.TrainingSet.trainingArray.Count]; for (int i = 0; i < ys.Length; i++) { ys[i] = svm.TrainingSet.trainingArray[i].y; } svm.CLys = new CLCalc.Program.Variable(ys); } if (svm.CLb == null) { svm.CLb = new CLCalc.Program.Variable(new float[] { svm.b }); svm.CLQtdSupVecs = new CLCalc.Program.Variable(new int[] { svm.alphaList.Count }); CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } if (CLMultiClassifSums.OriginalVarLength != Samples.Height) { CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } //svm.CLAlphas.WriteToDevice(svm.alphaList.ToArray()); //svm.CLys.WriteToDevice(ys); //svm.CLb.WriteToDevice(new float[] { svm.b }); //svm.CLQtdSupVecs.WriteToDevice(new int[] { svm.alphaList.Count }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { svm.CLTrainingFeatures, svm.CLQtdSupVecs, svm.CLXVecLen, Samples, svm.CLKernelValuesMultiClassify, svm.CLLambda }; kernelComputeMultiKernelRBF.Execute(args, new int[] { svm.alphaList.Count, Samples.Height }); CLCalc.Program.Sync(); args = new CLCalc.Program.MemoryObject[] { svm.CLAlphas, svm.CLQtdSupVecs, svm.CLXVecLen, svm.CLys, svm.CLKernelValuesMultiClassify, svm.CLb, CLMultiClassifSums }; kernelSumKernels.Execute(args, Samples.Height); CLMultiClassifSums.ReadFromDeviceTo(resp); return(resp); }
/// <summary> /// Predicts the output of a single entry, given a previous problem, solution and correspondent training set /// </summary> /// <param name="problemSolution">Correspondent problem solution</param> /// <param name="untrainedUnit">Input features from which the output will be predicted</param> /// <returns>The y classification (true/false = positive/negative)</returns> public static float CLpredictOutput(SVM problemSolution, TrainingUnit untrainedUnit) { TrainingSet trainingSet = problemSolution.TrainingSet; ProblemConfig problemConfig = problemSolution.ProblemCfg; #region Compute kernel float[] K = new float[problemSolution.TrainingSet.getN]; CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; for (int j = 0; j < untrainedUnit.xVector.Length; j++) { problemSolution.HostSample[j] = untrainedUnit.xVector[j]; } problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); lock (CLResource) { kernelComputeKernelRBF.Execute(args, problemSolution.TrainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(K); } #endregion // F(x) = sum + b // sum = summation of alpha_i * y_i * kernel(untrained unit, i) for all i in the training set float sum = 0; for (int i = 0; i < trainingSet.getN; i++) { if (trainingSet.trainingArray[i].y > 0) { sum += problemSolution.alphaList[i] * K[i]; } else { sum -= problemSolution.alphaList[i] * K[i]; } } return(sum + problemSolution.b); }
/// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> /// <param name="y">Result</param> public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y) { if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) { throw new Exception("M, x and y dimensions not compatible"); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow }; //Ideally matrix dimension should be a multiple of 4, but OK if it's not kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2)); } else { y.VectorData = MultiplyNoCL(M, x); } }
static Kernels() { try { CLCalc.Program.Compile(src); CLCalc.Program.MemoryObject[] Args = new CLCalc.Program.MemoryObject[100];; int globalWorkSize = 4; // compile the kernels KernelStart = new CLCalc.Program.Kernel("KernelStart"); vetTransl = new CLCalc.Program.Kernel("vetTransl"); // run kernel start KernelStart.Execute(Args, globalWorkSize); } catch (NullReferenceException nre) { System.Console.WriteLine("" + nre); } // System.Diagnostics.Debug.WriteLine("Hello"); }
/// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> /// <param name="y">Result</param> public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y) { if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) throw new Exception("M, x and y dimensions not compatible"); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow }; //Ideally matrix dimension should be a multiple of 4, but OK if it's not kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2)); } else { y.VectorData = MultiplyNoCL(M, x); } }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> /// <param name="x">Initial guess</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); //x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) temp = new CLImgVector(n); if (x == null || x.Length != n) x = new CLImgVector(n); float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while (count<1 || ((RDotR > tol) && (count < n*MAXITER))) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
public void Process(BaseCameraApplication app) { if (Visible || WireFrame) { DepthCameraFrame depthFrame = app.GetPrimaryDevice().GetDepthImage(); ColorCameraFrame colorFrame = app.GetPrimaryDevice().GetColorImage(); TextureMapFrame textureFrame = app.GetPrimaryDevice().GetTextureImage(); CameraDataFilter filter = (CameraDataFilter)app.GetImageFilter(); CLGLInteropFunctions.AcquireGLElements(new CLCalc.Program.MemoryObject[] { positionBuffer, colorBuffer, normalBuffer }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { app.GetPrimaryDevice().GetBoundingBox(),filter.GetDepthImage(),filter.GetTextureImage(),colorFrame.GetMemoryObject(),positionBuffer,colorBuffer,normalBuffer}; kernelCopyImage.Execute(args, new int[] { depthFrame.Width, depthFrame.Height }); CLGLInteropFunctions.ReleaseGLElements(new CLCalc.Program.MemoryObject[] { positionBuffer, colorBuffer, normalBuffer }); } }
/// <summary>Specific function when SVM contains only one object, such as faces</summary> /// <param name="bmp">Next frame to process</param> public List<int> FindSingleObj(Bitmap bmp) { //System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(), sw2 = new System.Diagnostics.Stopwatch(); //sw.Start(); if (SVM == null) return null; if (imgWidth != bmp.Width || imgHeight != bmp.Height) { imgWidth = bmp.Width; imgHeight = bmp.Height; SubFramePos = 0; List<int> subFrames = new List<int>(); ComputeSubFrames(0, 0, bmp.Width, bmp.Height, subFrames); SubFrames = subFrames.ToArray(); SubFeatures = new float[(SubFrames.Length / 3) * 364]; if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLSubFrames = new CLCalc.Program.Variable(SubFrames); CLSubFeatures = new CLCalc.Program.Image2D(SubFeatures, 91, SubFrames.Length / 3); CLBmp = new CLCalc.Program.Image2D(bmp); //CLBmpTemp = new CLCalc.Program.Image2D(bmp); CLBmpPrev = new CLCalc.Program.Image2D(bmp); } } //Swaps current and previous bitmap pointers CLCalc.Program.Image2D temp = CLBmp; CLBmp = CLBmpPrev; CLBmpPrev = temp; //Computes frame difference ComputeFrameDiff(); //Replaces subFrames based on moving regions for (int k = 0; k < MovingRegionBoxes.Count >> 2; k++) { List<int> sframes = new List<int>(); int ind = 4 * k; ComputeSubFrames(MovingRegionBoxes[ind] << 3, MovingRegionBoxes[ind + 2] << 3, MovingRegionBoxes[ind + 1] << 3, MovingRegionBoxes[ind + 3] << 3, sframes); for (int p = 0; p < sframes.Count; p += 3) { SubFrames[SubFramePos] = sframes[p]; SubFrames[SubFramePos + 1] = sframes[p + 1]; SubFrames[SubFramePos + 2] = sframes[p + 2]; SubFramePos += 3; if (SubFramePos > SubFrames.Length - 1) SubFramePos = 0; } } CLSubFrames.WriteToDevice(SubFrames); CLBmp.WriteBitmap(bmp); ////Segments skin //kernelSegregateSkin.Execute(new CLCalc.Program.MemoryObject[] { CLBmpTemp, CLBmp }, new int[] { bmp.Width, bmp.Height }); //Extract features using OpenCL CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLSubFrames, CLSubFeatures, CLBmp }; kernelExtractFeatures.Execute(args, SubFrames.Length / 3); #region No OpenCL //float[] testSubFeats = new float[364 * (SubFrames.Length / 3)]; //CLSubFeatures.ReadFromDeviceTo(testSubFeats); //Extract features without OpenCL //ExtractFeatures(SubFrames, SubFeatures, bmp); //CLSubFeatures.WriteToDevice(SubFeatures); #endregion //sw2.Start(); float[] maxvals = OpenCLTemplate.MachineLearning.SVM.MultiClassify(SVM.SVMs[0], CLSubFeatures); //SVM.Classify(CLSubFeatures, out maxvals); //sw2.Stop(); List<int> FacesPos = new List<int>(); List<float> MaxVals = new List<float>(); //Goes in decreasing window size order for (int kk = Config.WINDOWSIZES.Length - 1; kk >= 0; kk--) { for (int i = maxvals.Length - 1; i >= 0; i--) { if (SubFrames[3 * i + 2] == Config.WINDOWSIZES[kk] && maxvals[i] > Config.REQCERTAINTY) { //Checks if a face already has been found in that region bool contido = false; int i3 = 3 * i; int kmax = FacesPos.Count / 3; for (int k = 0; k < kmax; k++) { int k3 = 3 * k; if ( (FacesPos[k3] <= SubFrames[i3] && SubFrames[i3] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] && SubFrames[i3 + 1] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] + SubFrames[i3 + 2] && SubFrames[i3] + SubFrames[i3 + 2] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] + SubFrames[i3 + 2] && SubFrames[i3 + 1] + SubFrames[i3 + 2] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] && SubFrames[i3] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] + SubFrames[i3 + 2] && SubFrames[i3 + 1] + SubFrames[i3 + 2] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] + SubFrames[i3 + 2] && SubFrames[i3] + SubFrames[i3 + 2] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] && SubFrames[i3 + 1] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) ) { contido = true; //Replaces if better if (maxvals[i] > MaxVals[k] && SubFrames[3 * i + 2] == FacesPos[3 * k + 2]) { FacesPos[k3] = SubFrames[i3]; FacesPos[k3 + 1] = SubFrames[i3 + 1]; FacesPos[k3 + 2] = SubFrames[i3 + 2]; MaxVals[k] = maxvals[i]; } k = FacesPos.Count; } } if (!contido) { FacesPos.Add(SubFrames[3 * i]); FacesPos.Add(SubFrames[3 * i + 1]); FacesPos.Add(SubFrames[3 * i + 2]); MaxVals.Add(maxvals[i]); } } } } //sw.Stop(); Random rnd = new Random(); //Updates frame search region if (MovingRegionBoxes.Count > 0) { for (int i = 0; i < maxvals.Length; i++) { if (maxvals[i] > Config.REFINEUNCERTAINTY) { int i3 = 3 * i; List<int> sframes = new List<int>(); int cx = SubFrames[i3] + (SubFrames[i3 + 2] >> 1) + rnd.Next(7) - 3; int cy = SubFrames[i3 + 1] + (SubFrames[i3 + 2] >> 1) + rnd.Next(7) - 3; int bigwSize = Config.WINDOWSIZES[Config.WINDOWSIZES.Length - 1]; try { ComputeSubFrames(cx - (bigwSize >> 1), cy - (bigwSize >> 1), cx + (bigwSize >> 1), cy + (bigwSize >> 1), sframes); for (int p = 0; p < sframes.Count; p += 3) { SubFrames[SubFramePos] = sframes[p]; SubFrames[SubFramePos + 1] = sframes[p + 1]; SubFrames[SubFramePos + 2] = sframes[p + 2]; SubFramePos += 3; if (SubFramePos > SubFrames.Length - 1) SubFramePos = 0; } } catch { } } } } return FacesPos; }
/// <summary> /// Predicts the output of a single entry, given a previous problem, solution and correspondent training set /// </summary> /// <param name="problemSolution">Correspondent problem solution</param> /// <param name="untrainedUnit">Input features from which the output will be predicted</param> /// <returns>The y classification (true/false = positive/negative)</returns> public static float CLpredictOutput(SVM problemSolution, TrainingUnit untrainedUnit) { TrainingSet trainingSet = problemSolution.TrainingSet; ProblemConfig problemConfig = problemSolution.ProblemCfg; #region Compute kernel float[] K = new float[problemSolution.TrainingSet.getN]; CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; for (int j = 0; j < untrainedUnit.xVector.Length; j++) problemSolution.HostSample[j] = untrainedUnit.xVector[j]; problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); lock (CLResource) { kernelComputeKernelRBF.Execute(args, problemSolution.TrainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(K); } #endregion // F(x) = sum + b // sum = summation of alpha_i * y_i * kernel(untrained unit, i) for all i in the training set float sum = 0; for (int i = 0; i < trainingSet.getN; i++) { if (trainingSet.trainingArray[i].y > 0) sum += problemSolution.alphaList[i] * K[i]; else sum -= problemSolution.alphaList[i] * K[i]; } return sum + problemSolution.b; }
/// <summary>Specific function when SVM contains only one object, such as faces</summary> /// <param name="bmp">Next frame to process</param> public List <int> FindSingleObj(Bitmap bmp) { //System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(), sw2 = new System.Diagnostics.Stopwatch(); //sw.Start(); if (SVM == null) { return(null); } if (imgWidth != bmp.Width || imgHeight != bmp.Height) { imgWidth = bmp.Width; imgHeight = bmp.Height; SubFramePos = 0; List <int> subFrames = new List <int>(); ComputeSubFrames(0, 0, bmp.Width, bmp.Height, subFrames); SubFrames = subFrames.ToArray(); SubFeatures = new float[(SubFrames.Length / 3) * 364]; if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLSubFrames = new CLCalc.Program.Variable(SubFrames); CLSubFeatures = new CLCalc.Program.Image2D(SubFeatures, 91, SubFrames.Length / 3); CLBmp = new CLCalc.Program.Image2D(bmp); //CLBmpTemp = new CLCalc.Program.Image2D(bmp); CLBmpPrev = new CLCalc.Program.Image2D(bmp); } } //Swaps current and previous bitmap pointers CLCalc.Program.Image2D temp = CLBmp; CLBmp = CLBmpPrev; CLBmpPrev = temp; //Computes frame difference ComputeFrameDiff(); //Replaces subFrames based on moving regions for (int k = 0; k < MovingRegionBoxes.Count >> 2; k++) { List <int> sframes = new List <int>(); int ind = 4 * k; ComputeSubFrames(MovingRegionBoxes[ind] << 3, MovingRegionBoxes[ind + 2] << 3, MovingRegionBoxes[ind + 1] << 3, MovingRegionBoxes[ind + 3] << 3, sframes); for (int p = 0; p < sframes.Count; p += 3) { SubFrames[SubFramePos] = sframes[p]; SubFrames[SubFramePos + 1] = sframes[p + 1]; SubFrames[SubFramePos + 2] = sframes[p + 2]; SubFramePos += 3; if (SubFramePos > SubFrames.Length - 1) { SubFramePos = 0; } } } CLSubFrames.WriteToDevice(SubFrames); CLBmp.WriteBitmap(bmp); ////Segments skin //kernelSegregateSkin.Execute(new CLCalc.Program.MemoryObject[] { CLBmpTemp, CLBmp }, new int[] { bmp.Width, bmp.Height }); //Extract features using OpenCL CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLSubFrames, CLSubFeatures, CLBmp }; kernelExtractFeatures.Execute(args, SubFrames.Length / 3); #region No OpenCL //float[] testSubFeats = new float[364 * (SubFrames.Length / 3)]; //CLSubFeatures.ReadFromDeviceTo(testSubFeats); //Extract features without OpenCL //ExtractFeatures(SubFrames, SubFeatures, bmp); //CLSubFeatures.WriteToDevice(SubFeatures); #endregion //sw2.Start(); float[] maxvals = OpenCLTemplate.MachineLearning.SVM.MultiClassify(SVM.SVMs[0], CLSubFeatures); //SVM.Classify(CLSubFeatures, out maxvals); //sw2.Stop(); List <int> FacesPos = new List <int>(); List <float> MaxVals = new List <float>(); //Goes in decreasing window size order for (int kk = Config.WINDOWSIZES.Length - 1; kk >= 0; kk--) { for (int i = maxvals.Length - 1; i >= 0; i--) { if (SubFrames[3 * i + 2] == Config.WINDOWSIZES[kk] && maxvals[i] > Config.REQCERTAINTY) { //Checks if a face already has been found in that region bool contido = false; int i3 = 3 * i; int kmax = FacesPos.Count / 3; for (int k = 0; k < kmax; k++) { int k3 = 3 * k; if ( (FacesPos[k3] <= SubFrames[i3] && SubFrames[i3] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] && SubFrames[i3 + 1] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] + SubFrames[i3 + 2] && SubFrames[i3] + SubFrames[i3 + 2] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] + SubFrames[i3 + 2] && SubFrames[i3 + 1] + SubFrames[i3 + 2] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] && SubFrames[i3] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] + SubFrames[i3 + 2] && SubFrames[i3 + 1] + SubFrames[i3 + 2] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) || (FacesPos[k3] <= SubFrames[i3] + SubFrames[i3 + 2] && SubFrames[i3] + SubFrames[i3 + 2] <= FacesPos[k3] + FacesPos[k3 + 2] && FacesPos[k3 + 1] <= SubFrames[i3 + 1] && SubFrames[i3 + 1] <= FacesPos[k3 + 1] + FacesPos[k3 + 2]) ) { contido = true; //Replaces if better if (maxvals[i] > MaxVals[k] && SubFrames[3 * i + 2] == FacesPos[3 * k + 2]) { FacesPos[k3] = SubFrames[i3]; FacesPos[k3 + 1] = SubFrames[i3 + 1]; FacesPos[k3 + 2] = SubFrames[i3 + 2]; MaxVals[k] = maxvals[i]; } k = FacesPos.Count; } } if (!contido) { FacesPos.Add(SubFrames[3 * i]); FacesPos.Add(SubFrames[3 * i + 1]); FacesPos.Add(SubFrames[3 * i + 2]); MaxVals.Add(maxvals[i]); } } } } //sw.Stop(); Random rnd = new Random(); //Updates frame search region if (MovingRegionBoxes.Count > 0) { for (int i = 0; i < maxvals.Length; i++) { if (maxvals[i] > Config.REFINEUNCERTAINTY) { int i3 = 3 * i; List <int> sframes = new List <int>(); int cx = SubFrames[i3] + (SubFrames[i3 + 2] >> 1) + rnd.Next(7) - 3; int cy = SubFrames[i3 + 1] + (SubFrames[i3 + 2] >> 1) + rnd.Next(7) - 3; int bigwSize = Config.WINDOWSIZES[Config.WINDOWSIZES.Length - 1]; try { ComputeSubFrames(cx - (bigwSize >> 1), cy - (bigwSize >> 1), cx + (bigwSize >> 1), cy + (bigwSize >> 1), sframes); for (int p = 0; p < sframes.Count; p += 3) { SubFrames[SubFramePos] = sframes[p]; SubFrames[SubFramePos + 1] = sframes[p + 1]; SubFrames[SubFramePos + 2] = sframes[p + 2]; SubFramePos += 3; if (SubFramePos > SubFrames.Length - 1) { SubFramePos = 0; } } } catch { } } } } return(FacesPos); }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) { temp = new CLImgVector(n); } float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while ((RDotR > tol) && (count < n * MAXITER)) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
/// <summary>Classifies multiple samples stored in OpenCL memory</summary> /// <param name="Samples">Samples data to classify</param> /// <param name="svm">SVM to use as classifier</param> public static float[] MultiClassify(SVM svm, CLCalc.Program.Image2D Samples) { float[] resp = new float[Samples.Height]; //svm.WriteToDevice(); if ((Samples.Width << 2) != svm.HostVLen[0]) throw new Exception("Invalid Samples width, should be the same length of training features"); if (svm.CLKernelValuesMultiClassify == null || svm.CLKernelValuesMultiClassify.OriginalVarLength != svm.alphaList.Count * Samples.Height) { svm.CLKernelValuesMultiClassify = new CLCalc.Program.Variable(new float[svm.alphaList.Count * Samples.Height]); } if (svm.CLAlphas == null || svm.CLAlphas.OriginalVarLength != svm.alphaList.Count) { svm.CLAlphas = new CLCalc.Program.Variable(svm.alphaList.ToArray()); float[] ys = new float[svm.TrainingSet.trainingArray.Count]; for (int i = 0; i < ys.Length; i++) ys[i] = svm.TrainingSet.trainingArray[i].y; svm.CLys = new CLCalc.Program.Variable(ys); } if (svm.CLb==null) { svm.CLb = new CLCalc.Program.Variable(new float[] { svm.b }); svm.CLQtdSupVecs = new CLCalc.Program.Variable(new int[] { svm.alphaList.Count }); CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } if (CLMultiClassifSums.OriginalVarLength != Samples.Height) { CLMultiClassifSums = new CLCalc.Program.Variable(new float[Samples.Height]); } //svm.CLAlphas.WriteToDevice(svm.alphaList.ToArray()); //svm.CLys.WriteToDevice(ys); //svm.CLb.WriteToDevice(new float[] { svm.b }); //svm.CLQtdSupVecs.WriteToDevice(new int[] { svm.alphaList.Count }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { svm.CLTrainingFeatures, svm.CLQtdSupVecs, svm.CLXVecLen, Samples, svm.CLKernelValuesMultiClassify, svm.CLLambda }; kernelComputeMultiKernelRBF.Execute(args, new int[] { svm.alphaList.Count, Samples.Height }); CLCalc.Program.Sync(); args = new CLCalc.Program.MemoryObject[] { svm.CLAlphas, svm.CLQtdSupVecs, svm.CLXVecLen, svm.CLys, svm.CLKernelValuesMultiClassify, svm.CLb, CLMultiClassifSums }; kernelSumKernels.Execute(args, Samples.Height); CLMultiClassifSums.ReadFromDeviceTo(resp); return resp; }
/// <summary>Equalizes image histogram using OpenCL</summary> private void CLEqualizeHistogram(ref Bitmap bmp) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.Unknown) { CLCalc.InitCL(); } if (CLCalc.CLAcceleration != CLCalc.CLAccelerationType.UsingCL) { return; } float[] PartialHistograms = new float[NLumIntens * bmp.Width]; float[] histLuminance = new float[NLumIntens]; if (kernelComputeHistograms == null || CLN == null || CLHistogram == null) { CLHistogram = new CLCalc.Program.Variable(histLuminance); CLPartialHistograms = new CLCalc.Program.Variable(PartialHistograms); } InitKernels(); System.Diagnostics.Stopwatch swTotal = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swCopyBmp = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swRescaling = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swComputeHistPartial = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swComputeHistConsolid = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swHistIntegral = new System.Diagnostics.Stopwatch(); swTotal.Start(); swCopyBmp.Start(); if (CLbmp == null || CLbmp.Height != bmp.Height || CLbmp.Width != bmp.Width) { CLbmp = new CLCalc.Program.Image2D(bmp); CLNewBmp = new CLCalc.Program.Image2D(bmp); CLPartialHistograms = new CLCalc.Program.Variable(PartialHistograms); } else { CLbmp.WriteBitmap(bmp); CLN.WriteToDevice(new int[] { NLumIntens }); CLWidth.WriteToDevice(new int[] { bmp.Width }); CLHeight.WriteToDevice(new int[] { bmp.Height }); } swCopyBmp.Stop(); swComputeHistPartial.Start(); //Partial histograms CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLbmp, CLPartialHistograms, CLHeight, CLN }; kernelComputeHistograms.Execute(args, bmp.Width); CLCalc.Program.Sync(); swComputeHistPartial.Stop(); swComputeHistConsolid.Start(); args = new CLCalc.Program.MemoryObject[] { CLPartialHistograms, CLHistogram, CLHeight, CLN }; kernelConsolidateHist.Execute(args, NLumIntens); CLHistogram.ReadFromDeviceTo(histLuminance); swComputeHistConsolid.Stop(); swHistIntegral.Start(); //Perform histogram integration - better performance in CPU //Compute histogram integrals in-place for (int i = 1; i < NLumIntens; i++) { histLuminance[i] += histLuminance[i - 1]; } float scale = 0.9f / histLuminance[NLumIntens - 1]; //Scales histograms for (int i = 0; i < NLumIntens; i++) { histLuminance[i] *= scale; } //Writes histogram integral CLHistogram.WriteToDevice(histLuminance); swHistIntegral.Stop(); swRescaling.Start(); //Computes equalized image args = new CLCalc.Program.MemoryObject[] { CLbmp, CLNewBmp, CLHistogram, CLN }; kernelPerformNormalization.Execute(args, new int [] { bmp.Width, bmp.Height }); bmp = CLNewBmp.ReadBitmap(); swRescaling.Stop(); swTotal.Stop(); }
/// <summary>Computes the i-th line of matrix K[i][j]</summary> /// <param name="problemSolution">SVM to solve</param> /// <param name="i">Kernel line number to compute</param> private static void CLComputeKernels(SVM problemSolution, int i) { if (problemSolution.TrainingSet.IsKernelCalculated[i]) return; problemSolution.TrainingSet.kernels[i] = new float[problemSolution.TrainingSet.getN]; TrainingSet trainingSet = problemSolution.TrainingSet; trainingSet.IsKernelCalculated[i] = true; for (int j = 0; j < trainingSet.trainingArray[i].xVector.Length; j++) problemSolution.HostSample[j] = trainingSet.trainingArray[i].xVector[j]; problemSolution.CLSample.WriteToDevice(problemSolution.HostSample); //OpenCL Kernel execution CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { problemSolution.CLTrainingFeatures, problemSolution.CLXVecLen, problemSolution.CLSample, problemSolution.CLKernelValues, problemSolution.CLLambda }; lock (CLResource) { kernelComputeKernelRBF.Execute(args, trainingSet.getN); problemSolution.CLKernelValues.ReadFromDeviceTo(trainingSet.kernels[i]); } }
/// <summary>Computes frame difference</summary> private void ComputeFrameDiff() { //Needs both images to compute if (CLBmp == null || CLBmpPrev == null || CLBmp.Width != CLBmpPrev.Width || CLBmp.Height != CLBmpPrev.Height) return; if (frameDiff == null || frameDiff.Length != ((CLBmp.Height * CLBmp.Width) >> 6)) { //Reduces image size by 8 frameDiff = new byte[(CLBmp.Height * CLBmp.Width) >> 6]; CLframeDiff = new CLCalc.Program.Variable(frameDiff); MovingRegionBoxes = new List<int>(); } CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { CLBmp, CLBmpPrev, CLframeDiff }; kernelComputeFrameDiff.Execute(args, new int[] { CLBmp.Width >> 3, CLBmp.Height >> 3 }); CLframeDiff.ReadFromDeviceTo(frameDiff); MovingRegionBoxes.Clear(); BracketMovingRegions(frameDiff, CLBmp.Width >> 3, CLBmp.Height >> 3, MovingRegionBoxes); }
public void Process(BaseCameraApplication capture) { if (Visisble) { DepthCameraFrame depthFrame = capture.GetPrimaryDevice().GetDepthImage(); ColorCameraFrame colorFrame = capture.GetPrimaryDevice().GetColorImage(); TextureMapFrame textureFrame = capture.GetPrimaryDevice().GetTextureImage(); if (depthFrame != null && colorFrame != null) { CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { depthFrame.GetMemoryObject(), textureFrame.GetMemoryObject(), colorFrame.GetMemoryObject(), positions, colors }; CLGLInteropFunctions.AcquireGLElements(args); kernelCopyBmp.Execute(args, new int[] { depthFrame.Width, depthFrame.Height }); CLGLInteropFunctions.ReleaseGLElements(args); } } }