/// <summary>Computes dot product of 2 vectors using their OpenCL images. Assumes data has been inserted to VectorData and WriteToDevice() has been called</summary> /// <param name="v1">First vector</param> /// <param name="v2">Second vector</param> public float DotProduct(CLImgVector v1, CLImgVector v2) { if (v1.Length != v2.Length) { throw new Exception("Incompatible lengths"); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLDotProd(v1, v2); float[] resp = new float[1]; dotProdSum.ReadFromDeviceTo(resp); dprod[0] = resp[0]; } else { for (int i = 0; i < v1.Length; i++) { dprod[0] += v1.VectorData[i] * v2.VectorData[i]; } dprod[0] = dprod[0]; } return(dprod[0]); }
/// <summary>Computes dot product of two vectors and stores result in /// dotProdSum</summary> private void CLDotProd(CLImgVector v1, CLImgVector v2) { int[] vlenby4 = new int[] { (v1.Length >> 2) + 1 }; vLenBy4.WriteToDevice(vlenby4); //Computes products and most sums CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { v1.CLVector, v2.CLVector, dotProd, vLenBy4 }; //kernelDotProduct.Execute(args, GLOBALWORKSIZE); kernelDotProduct.Execute(args, new int[] { GLOBALWORKSIZE }, new int[] { (int)CLCalc.CLDevices[CLCalc.Program.DefaultCQ].MaxWorkItemSizes[0] }); //Sums what's left int i = GLOBALWORKSIZE >> 3; args = new CLCalc.Program.MemoryObject[] { dotProd }; while (i > 0) { kernelSum.Execute(args, i); i = (i >> 1); } //Reads final value args = new CLCalc.Program.MemoryObject[] { dotProd, dotProdSum }; kernelGetDotSum.Execute(args, 1); }
/// <summary>Computes dot product of 2 vectors without OpenCL, in double precision</summary> public double ExactDotProductNoCL(CLImgVector v1, CLImgVector v2) { double dProd = 0; for (int i = 0; i < v1.Length; i++) { dProd += (double)v1.VectorData[i] * (double)v2.VectorData[i]; } return(dProd); }
/// <summary>Computes dot product of 2 vectors without OpenCL</summary> public float DotProductNoCL(CLImgVector v1, CLImgVector v2) { float dProd = 0; for (int i = 0; i < v1.Length; i++) { dProd += v1.VectorData[i] * v2.VectorData[i]; } return(dProd); }
/// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> /// <param name="y">Result</param> public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y) { if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) { throw new Exception("M, x and y dimensions not compatible"); } if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow }; //Ideally matrix dimension should be a multiple of 4, but OK if it's not kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2)); } else { y.VectorData = MultiplyNoCL(M, x); } }
/// <summary>Computes product of sparse matrix M and vector x</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> public float[] MultiplyNoCL(CLImgSparseMatrix M, CLImgVector x) { if (x.Length != M.MatrixDimension) { throw new Exception("M and x dimensions not compatible"); } float[] resp = new float[x.Length]; for (int i = 0; i < M.MatrixDimension; i++) { for (int j = 0; j < M.NonZeroElemsPerRow; j++) { if (M.Columns[j + M.NonZeroElemsPerRow * i] >= 0) { resp[i] += M.MatrixData[j + M.NonZeroElemsPerRow * i] * x.VectorData[M.Columns[j + M.NonZeroElemsPerRow * i]]; } } } return(resp); }
/// <summary>Solves linear system Mx = b using conjugate gradient method</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public float[] LinSolveNoCL(CLImgSparseMatrix M, CLImgVector b, float tol) { if (b.Length != M.MatrixDimension) { throw new Exception("M and x dimensions not compatible"); } int n = b.Length; if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); x = new CLImgVector(n); Ap = new CLImgVector(n); } float alpha, beta, RDotROld, RDotR; //Initialization Ap.VectorData = MultiplyNoCL(M, x); for (int i = 0; i < n; i++) { r.VectorData[i] = b.VectorData[i] - Ap.VectorData[i]; p.VectorData[i] = r.VectorData[i]; } //Loop int count = 0; RDotR = DotProductNoCL(r, r); while ((RDotR > tol) && (count < n * MAXITER)) { RDotROld = RDotR; Ap.VectorData = MultiplyNoCL(M, p); alpha = RDotROld / DotProductNoCL(Ap, p); for (int i = 0; i < n; i++) { x.VectorData[i] += alpha * p.VectorData[i]; r.VectorData[i] -= alpha * Ap.VectorData[i]; } RDotR = DotProductNoCL(r, r); beta = RDotR / RDotROld; for (int i = 0; i < n; i++) { p.VectorData[i] = r.VectorData[i] + beta * p.VectorData[i]; } count++; } float[] resp = new float[n]; for (int i = 0; i < n; i++) { resp[i] = x.VectorData[i]; } return(resp); }
/// <summary>Computes dot product of 2 vectors without OpenCL</summary> public float DotProductNoCL(CLImgVector v1, CLImgVector v2) { float dProd = 0; for (int i = 0; i < v1.Length; i++) dProd += v1.VectorData[i] * v2.VectorData[i]; return dProd; }
/// <summary>Computes product of sparse matrix M and vector x</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> public float[] MultiplyNoCL(CLImgSparseMatrix M, CLImgVector x) { if (x.Length != M.MatrixDimension) throw new Exception("M and x dimensions not compatible"); float[] resp = new float[x.Length]; for (int i = 0; i < M.MatrixDimension; i++) { for (int j = 0; j < M.NonZeroElemsPerRow; j++) { if (M.Columns[j + M.NonZeroElemsPerRow * i] >= 0) resp[i] += M.MatrixData[j + M.NonZeroElemsPerRow * i] * x.VectorData[M.Columns[j + M.NonZeroElemsPerRow * i]]; } } return resp; }
/// <summary>Computes M*x and stores the result in y. Does not automatically read result from device memory</summary> /// <param name="M">Sparse matrix</param> /// <param name="x">Vector to be multiplied</param> /// <param name="y">Result</param> public void Multiply(CLImgSparseMatrix M, CLImgVector x, CLImgVector y) { if (x.Length != M.MatrixDimension || y.Length != M.MatrixDimension) throw new Exception("M, x and y dimensions not compatible"); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLNonZeroElemsPerRow.WriteToDevice(new int[] { M.NonZeroElemsPerRow }); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { M.CLMatrixData, M.CLColumns, x.CLVector, y.CLVector, CLNonZeroElemsPerRow }; //Ideally matrix dimension should be a multiple of 4, but OK if it's not kernelSparseMatrixVecMult.Execute(args, 1 + ((M.MatrixDimension - 1) >> 2)); } else { y.VectorData = MultiplyNoCL(M, x); } }
/// <summary>Solves linear system Mx = b using conjugate gradient method</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public float[] LinSolveNoCL(CLImgSparseMatrix M, CLImgVector b, float tol) { if (b.Length != M.MatrixDimension) throw new Exception("M and x dimensions not compatible"); int n = b.Length; if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); x = new CLImgVector(n); Ap = new CLImgVector(n); } float alpha, beta, RDotROld, RDotR; //Initialization Ap.VectorData = MultiplyNoCL(M, x); for (int i = 0; i < n; i++) { r.VectorData[i] = b.VectorData[i] - Ap.VectorData[i]; p.VectorData[i] = r.VectorData[i]; } //Loop int count = 0; RDotR = DotProductNoCL(r, r); while ((RDotR > tol) && (count < n * MAXITER)) { RDotROld = RDotR; Ap.VectorData = MultiplyNoCL(M, p); alpha = RDotROld / DotProductNoCL(Ap, p); for (int i = 0; i < n; i++) { x.VectorData[i] += alpha * p.VectorData[i]; r.VectorData[i] -= alpha * Ap.VectorData[i]; } RDotR = DotProductNoCL(r, r); beta = RDotR / RDotROld; for (int i = 0; i < n; i++) { p.VectorData[i] = r.VectorData[i] + beta * p.VectorData[i]; } count++; } float[] resp = new float[n]; for (int i = 0; i < n; i++) resp[i] = x.VectorData[i]; return resp; }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> /// <param name="x">Initial guess</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol, ref CLImgVector x) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); //x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) temp = new CLImgVector(n); if (x == null || x.Length != n) x = new CLImgVector(n); float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while (count<1 || ((RDotR > tol) && (count < n*MAXITER))) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Writes variables to Device memory. Improves solution if accuracy is low.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public float[] LinSolveCL(CLImgSparseMatrix M, CLImgVector b, float tol) { if (b.Length != M.MatrixDimension) throw new Exception("M and x dimensions not compatible"); int n = b.Length; tol = Math.Abs(tol); //Writes M to device memory M.WriteToDevice(); ////Preconditions M //float[] preC = JacobiPrecondition(M); ////Preconditions b using M //JacobiPrecondition(preC, b); //Backs up b data float[] bbkp = new float[n]; for (int i = 0; i < n; i++) bbkp[i] = b.VectorData[i]; //Residue variables double ResidueSumSquares = 1E100; double resAnt = 1E200; double[] dblResidues = new double[n]; float[] Solution = new float[n]; while (ResidueSumSquares > tol && Math.Abs(resAnt - ResidueSumSquares) >= tol && resAnt > ResidueSumSquares) { //Check if solution is not improving anymore resAnt = ResidueSumSquares; b.WriteToDevice(); LinSolveCLStep(M, b, tol, ref x); //Solution x.ReadFromDevice(); if (ResidueSumSquares == tol * 2) { //Copies solution for (int i = 0; i < n; i++) { Solution[i] = x.VectorData[i]; } } else { //Improves solution for (int i = 0; i < n; i++) { Solution[i] -= x.VectorData[i]; x.VectorData[i] = Solution[i]; } } //Compute residue sum of squares and improves solution dblResidues = ExactMultiply(M, x); ResidueSumSquares = 0; for (int i = 0; i < n; i++) { //Computes residues dblResidues[i] = dblResidues[i] - (double)bbkp[i]; ResidueSumSquares += dblResidues[i] * dblResidues[i]; b.VectorData[i] = (float)dblResidues[i]; } } //Restores b data for (int i = 0; i < n; i++) b.VectorData[i] = bbkp[i]; return Solution; }
/// <summary>Computes dot product of 2 vectors without OpenCL, in double precision</summary> public double ExactDotProductNoCL(CLImgVector v1, CLImgVector v2) { double dProd = 0; for (int i = 0; i < v1.Length; i++) dProd += (double)v1.VectorData[i] * (double)v2.VectorData[i]; return dProd; }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Writes variables to Device memory. Improves solution if accuracy is low.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public float[] LinSolveCL(CLImgSparseMatrix M, CLImgVector b, float tol) { if (b.Length != M.MatrixDimension) { throw new Exception("M and x dimensions not compatible"); } int n = b.Length; tol = Math.Abs(tol); //Writes M to device memory M.WriteToDevice(); //Backs up b data float[] bbkp = new float[n]; for (int i = 0; i < n; i++) { bbkp[i] = b.VectorData[i]; } //Residue variables double ResidueSumSquares = 1E100; double resAnt = 1E200; double[] dblResidues = new double[n]; float[] Solution = new float[n]; while (ResidueSumSquares > tol && Math.Abs(resAnt - ResidueSumSquares) >= tol && resAnt > ResidueSumSquares) { //Check if solution is not improving anymore resAnt = ResidueSumSquares; b.WriteToDevice(); LinSolveCLStep(M, b, tol); //Solution x.ReadFromDevice(); if (ResidueSumSquares == tol * 2) { //Copies solution for (int i = 0; i < n; i++) { Solution[i] = x.VectorData[i]; } } else { //Improves solution for (int i = 0; i < n; i++) { Solution[i] -= x.VectorData[i]; x.VectorData[i] = Solution[i]; } } //Compute residue sum of squares and improves solution dblResidues = ExactMultiply(M, x); ResidueSumSquares = 0; for (int i = 0; i < n; i++) { //Computes residues dblResidues[i] = dblResidues[i] - (double)bbkp[i]; ResidueSumSquares += dblResidues[i] * dblResidues[i]; b.VectorData[i] = (float)dblResidues[i]; } } //Restores b data for (int i = 0; i < n; i++) { b.VectorData[i] = bbkp[i]; } return(Solution); }
/// <summary>Solves linear system Mx = b using conjugate gradient method. Doesn't try to improve the solution obtained.</summary> /// <param name="M">Matrix M</param> /// <param name="b">Vector b</param> /// <param name="tol">Error tolerance</param> public void LinSolveCLStep(CLImgSparseMatrix M, CLImgVector b, float tol) { int n = b.Length; int nBy4 = 1 + ((n - 1) >> 2); if (lambda == null) { lambda = new float[1]; CLlambda = new CLCalc.Program.Variable(lambda); } if (r == null || r.Length != n) { r = new CLImgVector(n); p = new CLImgVector(n); x = new CLImgVector(n); Ap = new CLImgVector(n); temp = new CLImgVector(n); } if (temp == null) { temp = new CLImgVector(n); } float alpha, beta, RDotROld, RDotR; //Initialization Multiply(M, x, Ap); CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; kernelInitRP.Execute(args, nBy4); //Loop int count = 0; RDotR = DotProduct(r, r); while ((RDotR > tol) && (count < n * MAXITER)) { RDotROld = RDotR; //if ((count & 0x0080) == 0) //{ // Multiply(M, x, Ap); // args = new CLCalc.Program.MemoryObject[] { b.CLVector, Ap.CLVector, r.CLVector, p.CLVector }; // kernelInitRP.Execute(args, nBy4); //} Multiply(M, p, Ap); alpha = RDotROld / DotProduct(Ap, p); //Update x kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { x.CLVector, temp.CLVector }, nBy4); lambda[0] = alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, p.CLVector, temp.CLVector, x.CLVector }, nBy4); //Update r kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { r.CLVector, temp.CLVector }, nBy4); lambda[0] = -alpha; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, Ap.CLVector, temp.CLVector, r.CLVector }, nBy4); RDotR = DotProduct(r, r); beta = RDotR / RDotROld; //Update p kernelCopyToTemp.Execute(new CLCalc.Program.MemoryObject[] { p.CLVector, temp.CLVector }, nBy4); lambda[0] = beta; CLlambda.WriteToDevice(lambda); kernelMultiplyAdd.Execute(new CLCalc.Program.MemoryObject[] { CLlambda, temp.CLVector, r.CLVector, p.CLVector }, nBy4); count++; } }
/// <summary>Computes dot product of 2 vectors using their OpenCL images. Assumes data has been inserted to VectorData and WriteToDevice() has been called</summary> /// <param name="v1">First vector</param> /// <param name="v2">Second vector</param> public float DotProduct(CLImgVector v1, CLImgVector v2) { if (v1.Length != v2.Length) throw new Exception("Incompatible lengths"); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { CLDotProd(v1, v2); float[] resp = new float[1]; dotProdSum.ReadFromDeviceTo(resp); dprod[0] = resp[0]; } else { for (int i = 0; i < v1.Length; i++) dprod[0] += v1.VectorData[i] * v2.VectorData[i]; dprod[0] = dprod[0]; } return dprod[0]; }