/// <summary> /// Runs one iteration of the Reweighted Least Squares algorithm. /// </summary> /// <param name="inputs">The input data.</param> /// <param name="outputs">The outputs associated with each input vector.</param> /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] outputs) { // Regress using Iteratively Reweighted Least Squares estimation. // References: // - Bishop, Christopher M.; Pattern Recognition // and Machine Learning. Springer; 1st ed. 2006. // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[] errors = new double[N]; double[] weights = new double[N]; double[] coefficients = this.regression.Coefficients; double[] deltas; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[parameterCount]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) { row[j + 1] = inputs[i][j]; } } // Compute errors and weighing matrix for (int i = 0; i < inputs.Length; i++) { double y = regression.Compute(inputs[i]); // Calculate error vector errors[i] = y - outputs[i]; // Calculate weighting matrix weights[i] = y * (1.0 - y); } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) { gradient[i] = 0; for (int j = 0; j < gradient.Length; j++) { hessian[i, j] = 0; } } // (Re-) Compute error gradient for (int j = 0; j < design.Length; j++) { for (int i = 0; i < gradient.Length; i++) { gradient[i] += design[j][i] * errors[j]; } } // (Re-) Compute weighted "Hessian" matrix for (int k = 0; k < weights.Length; k++) { double[] rk = design[k]; for (int j = 0; j < rk.Length; j++) { for (int i = 0; i < rk.Length; i++) { hessian[j, i] += rk[i] * rk[j] * weights[k]; } } } // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. LuDecomposition lu = new LuDecomposition(hessian); // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. if (lu.Nonsingular) { // Solve using LU decomposition deltas = lu.Solve(gradient); decomposition = lu; } else { // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); deltas = decomposition.Solve(gradient); } previous = (double[])coefficients.Clone(); // Update coefficients using the calculated deltas for (int i = 0; i < coefficients.Length; i++) { coefficients[i] -= deltas[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(inverse[i, i]); } } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) { deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); } return(Matrix.Max(deltas)); }
/// <summary> /// Runs one iteration of the Reweighted Least Squares algorithm. /// </summary> /// <param name="inputs">The input data.</param> /// <param name="outputs">The outputs associated with each input vector.</param> /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] outputs) { // Regress using Iteratively Reweighted Least Squares estimation. // References: // - Bishop, Christopher M.; Pattern Recognition // and Machine Learning. Springer; 1st ed. 2006. // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[] errors = new double[N]; double[] weights = new double[N]; double[] coefficients = this.regression.Coefficients; double[] deltas; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[parameterCount]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) row[j + 1] = inputs[i][j]; } // Compute errors and weighing matrix for (int i = 0; i < inputs.Length; i++) { double y = regression.Compute(inputs[i]); // Calculate error vector errors[i] = y - outputs[i]; // Calculate weighting matrix weights[i] = y * (1.0 - y); } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) { gradient[i] = 0; for (int j = 0; j < gradient.Length; j++) hessian[i, j] = 0; } // (Re-) Compute error gradient for (int j = 0; j < design.Length; j++) for (int i = 0; i < gradient.Length; i++) gradient[i] += design[j][i] * errors[j]; // (Re-) Compute weighted "Hessian" matrix for (int k = 0; k < weights.Length; k++) { double[] rk = design[k]; for (int j = 0; j < rk.Length; j++) for (int i = 0; i < rk.Length; i++) hessian[j, i] += rk[i] * rk[j] * weights[k]; } // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. LuDecomposition lu = new LuDecomposition(hessian); // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. if (lu.Nonsingular) { // Solve using LU decomposition deltas = lu.Solve(gradient); decomposition = lu; } else { // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); deltas = decomposition.Solve(gradient); } previous = (double[])coefficients.Clone(); // Update coefficients using the calculated deltas for (int i = 0; i < coefficients.Length; i++) coefficients[i] -= deltas[i]; if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) standardErrors[i] = Math.Sqrt(inverse[i, i]); } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); return Matrix.Max(deltas); }
private double run(double[][] inputs, double[][] outputs) { // Regress using Lower-Bound Newton-Raphson estimation // // The main idea is to replace the Hessian matrix with a // suitable lower bound. Indeed, the Hessian is lower // bounded by a negative definite matrix that does not // even depend on w [Krishnapuram et al]. // // - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf // // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[][] coefficients = this.regression.Coefficients; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[M]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) { row[j + 1] = inputs[i][j]; } } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) { gradient[i] = 0; } if (UpdateLowerBound) { for (int i = 0; i < gradient.Length; i++) { for (int j = 0; j < gradient.Length; j++) { lowerBound[i, j] = 0; } } } // In the multinomial logistic regression, the objective // function is the log-likelihood function l(w). As given // by Krishnapuram et al and Böhning, this is a concave // function with Hessian given by: // // H(w) = -sum(P(w) - p(w)p(w)') (x) xx' // (see referenced paper for proper indices) // // In which (x) denotes the Kronocker product. By using // the lower bound principle, Krishnapuram has shown that // we can replace H(w) with a lower bound approximation B // which does not depend on w (eq. 8 on aforementined paper): // // B = -(1/2) [I - 11/M] (x) sum(xx') // // Thus we can compute and invert this matrix only once. // // For each input sample in the dataset for (int i = 0; i < inputs.Length; i++) { // Grab variables related to the sample double[] x = design[i]; double[] y = outputs[i]; // Compute and estimate outputs this.compute(inputs[i], output); // Compute errors for the sample for (int j = 0; j < errors.Length; j++) { errors[j] = y[j + 1] - output[j]; } // Compute current gradient and Hessian // We can take advantage of the block structure of the // Hessian matrix and gradient vector by employing the // Kronocker product. See [Böhning, 1992] // (Re-) Compute error gradient double[] g = Matrix.KroneckerProduct(errors, x); for (int j = 0; j < g.Length; j++) { gradient[j] += g[j]; } if (UpdateLowerBound) { // Compute xxt matrix for (int k = 0; k < x.Length; k++) { for (int j = 0; j < x.Length; j++) { xxt[k, j] = x[k] * x[j]; } } // (Re-) Compute weighted "Hessian" matrix double[,] h = Matrix.KroneckerProduct(weights, xxt); for (int j = 0; j < parameterCount; j++) { for (int k = 0; k < parameterCount; k++) { lowerBound[j, k] += h[j, k]; } } } } if (UpdateLowerBound) { UpdateLowerBound = false; // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. LuDecomposition lu = new LuDecomposition(lowerBound); // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. if (lu.Nonsingular) { // Solve using LU decomposition deltas = lu.Solve(gradient); decomposition = lu; } else { // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(lowerBound); deltas = decomposition.Solve(gradient); } } else { deltas = decomposition.Solve(gradient); } previous = coefficients.Reshape(1); // Update coefficients using the calculated deltas for (int i = 0, k = 0; i < coefficients.Length; i++) { for (int j = 0; j < coefficients[i].Length; j++) { coefficients[i][j] -= deltas[k++]; } } solution = coefficients.Reshape(1); if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[][] standardErrors = regression.StandardErrors; for (int i = 0, k = 0; i < standardErrors.Length; i++) { for (int j = 0; j < standardErrors[i].Length; j++, k++) { standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k])); } } } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) { deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); } return(Matrix.Max(deltas)); }
private double run(double[][] inputs, double[][] outputs) { // Regress using Lower-Bound Newton-Raphson estimation // // The main idea is to replace the Hessian matrix with a // suitable lower bound. Indeed, the Hessian is lower // bounded by a negative definite matrix that does not // even depend on w [Krishnapuram et al]. // // - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf // // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[][] coefficients = this.regression.Coefficients; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[M]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) row[j + 1] = inputs[i][j]; } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) gradient[i] = 0; if (UpdateLowerBound) { for (int i = 0; i < gradient.Length; i++) for (int j = 0; j < gradient.Length; j++) lowerBound[i, j] = 0; } // In the multinomial logistic regression, the objective // function is the log-likelihood function l(w). As given // by Krishnapuram et al and Böhning, this is a concave // function with Hessian given by: // // H(w) = -sum(P(w) - p(w)p(w)') (x) xx' // (see referenced paper for proper indices) // // In which (x) denotes the Kronocker product. By using // the lower bound principle, Krishnapuram has shown that // we can replace H(w) with a lower bound approximation B // which does not depend on w (eq. 8 on aforementined paper): // // B = -(1/2) [I - 11/M] (x) sum(xx') // // Thus we can compute and invert this matrix only once. // // For each input sample in the dataset for (int i = 0; i < inputs.Length; i++) { // Grab variables related to the sample double[] x = design[i]; double[] y = outputs[i]; // Compute and estimate outputs this.compute(inputs[i], output); // Compute errors for the sample for (int j = 0; j < errors.Length; j++) errors[j] = y[j + 1] - output[j]; // Compute current gradient and Hessian // We can take advantage of the block structure of the // Hessian matrix and gradient vector by employing the // Kronocker product. See [Böhning, 1992] // (Re-) Compute error gradient double[] g = Matrix.KroneckerProduct(errors, x); for (int j = 0; j < g.Length; j++) gradient[j] += g[j]; if (UpdateLowerBound) { // Compute xxt matrix for (int k = 0; k < x.Length; k++) for (int j = 0; j < x.Length; j++) xxt[k, j] = x[k] * x[j]; // (Re-) Compute weighted "Hessian" matrix double[,] h = Matrix.KroneckerProduct(weights, xxt); for (int j = 0; j < parameterCount; j++) for (int k = 0; k < parameterCount; k++) lowerBound[j, k] += h[j, k]; } } if (UpdateLowerBound) { UpdateLowerBound = false; // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. LuDecomposition lu = new LuDecomposition(lowerBound); // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. if (lu.Nonsingular) { // Solve using LU decomposition deltas = lu.Solve(gradient); decomposition = lu; } else { // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(lowerBound); deltas = decomposition.Solve(gradient); } } else { deltas = decomposition.Solve(gradient); } previous = coefficients.Reshape(1); // Update coefficients using the calculated deltas for (int i = 0, k = 0; i < coefficients.Length; i++) for (int j = 0; j < coefficients[i].Length; j++) coefficients[i][j] -= deltas[k++]; solution = coefficients.Reshape(1); if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[][] standardErrors = regression.StandardErrors; for (int i = 0, k = 0; i < standardErrors.Length; i++) for (int j = 0; j < standardErrors[i].Length; j++, k++) standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k])); } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); return Matrix.Max(deltas); }