/// <summary> /// Runs one iteration of the Newton-Raphson update for Cox's hazards learning. /// </summary> /// /// <param name="inputs">The input data.</param> /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] time, int[] censor) { if (inputs.Length != time.Length || time.Length != censor.Length) { throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); } double[] means = new double[parameterCount]; double[] sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) { sdev[i] = 1; } if (normalize) { // Store means as regression centers means = inputs.Mean(); for (int i = 0; i < means.Length; i++) { regression.Offsets[i] = means[i]; } // Convert to unit scores for increased accuracy sdev = Accord.Statistics.Tools.StandardDeviation(inputs); inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true); } // Sort data by time to accelerate performance if (!time.IsSorted(ComparerDirection.Descending)) { sort(ref inputs, ref time, ref censor); } // Compute actual outputs double[] output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) { output[i] = regression.Compute(inputs[i]); } // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { for (int j = 0; j < time.Length; j++) { if (time[j] == time[i]) { ties[i]++; } } } if (parameterCount == 0) { return(createBaseline(time, censor, output)); } CurrentIteration = 0; double smooth = 0.1; do // learning iterations until convergence { // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == 0) { continue; } // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { den += output[j]; } } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) { partialGradient[k] += inputs[j][k] * output[j] / den; } // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) { for (int jj = 0; jj < inputs[j].Length; jj++) { partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) { gradient[j] += inputs[i][j] - partialGradient[j]; } // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) { for (int k = 0; k < partialGradient.Length; k++) { hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } } } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] -= smooth * deltas[i]; } smooth += 0.1; if (smooth > 1) { smooth = 1; } // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] = convergence.OldValues[i]; } } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] /= sdev[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } } if (computeBaselineFunction) { createBaseline(time, censor, output); } return(convergence.Delta); }
private double run(double[][] inputs, double[][] outputs) { // Regress using Lower-Bound Newton-Raphson estimation // // The main idea is to replace the Hessian matrix with a // suitable lower bound. Indeed, the Hessian is lower // bounded by a negative definite matrix that does not // even depend on w [Krishnapuram et al]. // // - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf // // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[][] coefficients = this.regression.Coefficients; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[M]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) { row[j + 1] = inputs[i][j]; } } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) { gradient[i] = 0; } if (UpdateLowerBound) { for (int i = 0; i < gradient.Length; i++) { for (int j = 0; j < gradient.Length; j++) { lowerBound[i, j] = 0; } } } // In the multinomial logistic regression, the objective // function is the log-likelihood function l(w). As given // by Krishnapuram et al and Böhning, this is a concave // function with Hessian given by: // // H(w) = -sum(P(w) - p(w)p(w)') (x) xx' // (see referenced paper for proper indices) // // In which (x) denotes the Kronocker product. By using // the lower bound principle, Krishnapuram has shown that // we can replace H(w) with a lower bound approximation B // which does not depend on w (eq. 8 on aforementined paper): // // B = -(1/2) [I - 11/M] (x) sum(xx') // // Thus we can compute and invert this matrix only once. // // For each input sample in the dataset for (int i = 0; i < inputs.Length; i++) { // Grab variables related to the sample double[] x = design[i]; double[] y = outputs[i]; // Compute and estimate outputs this.compute(inputs[i], output); // Compute errors for the sample for (int j = 0; j < errors.Length; j++) { errors[j] = y[j + 1] - output[j]; } // Compute current gradient and Hessian // We can take advantage of the block structure of the // Hessian matrix and gradient vector by employing the // Kronocker product. See [Böhning, 1992] // (Re-) Compute error gradient double[] g = Matrix.KroneckerProduct(errors, x); for (int j = 0; j < g.Length; j++) { gradient[j] += g[j]; } if (UpdateLowerBound) { // Compute xxt matrix for (int k = 0; k < x.Length; k++) { for (int j = 0; j < x.Length; j++) { xxt[k, j] = x[k] * x[j]; } } // (Re-) Compute weighted "Hessian" matrix double[,] h = Matrix.KroneckerProduct(weights, xxt); for (int j = 0; j < parameterCount; j++) { for (int k = 0; k < parameterCount; k++) { lowerBound[j, k] += h[j, k]; } } } } if (UpdateLowerBound) { UpdateLowerBound = false; // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. LuDecomposition lu = new LuDecomposition(lowerBound); // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. if (lu.Nonsingular) { // Solve using LU decomposition deltas = lu.Solve(gradient); decomposition = lu; } else { // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(lowerBound); deltas = decomposition.Solve(gradient); } } else { deltas = decomposition.Solve(gradient); } previous = coefficients.Reshape(1); // Update coefficients using the calculated deltas for (int i = 0, k = 0; i < coefficients.Length; i++) { for (int j = 0; j < coefficients[i].Length; j++) { coefficients[i][j] -= deltas[k++]; } } solution = coefficients.Reshape(1); if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[][] standardErrors = regression.StandardErrors; for (int i = 0, k = 0; i < standardErrors.Length; i++) { for (int j = 0; j < standardErrors[i].Length; j++, k++) { standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k])); } } } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) { deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); } return(Matrix.Max(deltas)); }
/// <summary> /// Runs one iteration of the Newton-Raphson update for Cox's hazards learning. /// </summary> /// /// <param name="inputs">The input data.</param> /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] time, int[] censor) { if (inputs.Length != time.Length || time.Length != censor.Length) throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); double[] means = new double[parameterCount]; double[] sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) sdev[i] = 1; if (normalize) { // Store means as regression centers means = inputs.Mean(); for (int i = 0; i < means.Length; i++) regression.Offsets[i] = means[i]; // Convert to unit scores for increased accuracy sdev = Accord.Statistics.Tools.StandardDeviation(inputs); inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true); } // Sort data by time to accelerate performance if (!time.IsSorted(ComparerDirection.Descending)) sort(ref inputs, ref time, ref censor); // Compute actual outputs double[] output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) output[i] = regression.Compute(inputs[i]); // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) for (int j = 0; j < time.Length; j++) if (time[j] == time[i]) ties[i]++; if (parameterCount == 0) return createBaseline(time, censor, output); CurrentIteration = 0; double smooth = 0.1; do // learning iterations until convergence { // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == 0) continue; // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) den += output[j]; } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) partialGradient[k] += inputs[j][k] * output[j] / den; // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) for (int jj = 0; jj < inputs[j].Length; jj++) partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) gradient[j] += inputs[i][j] - partialGradient[j]; // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) for (int k = 0; k < partialGradient.Length; k++) hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] -= smooth * deltas[i]; smooth += 0.1; if (smooth > 1) smooth = 1; // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] = convergence.OldValues[i]; } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) sum += regression.Coefficients[j] * inputs[i][j]; output[i] = Math.Exp(sum); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] /= sdev[i]; if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } if (computeBaselineFunction) createBaseline(time, censor, output); return convergence.Delta; }
private double run(double[][] inputs, double[][] outputs) { // Regress using Lower-Bound Newton-Raphson estimation // // The main idea is to replace the Hessian matrix with a // suitable lower bound. Indeed, the Hessian is lower // bounded by a negative definite matrix that does not // even depend on w [Krishnapuram et al]. // // - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf // // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[][] coefficients = this.regression.Coefficients; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[M]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) row[j + 1] = inputs[i][j]; } // Reset Hessian matrix and gradient for (int i = 0; i < gradient.Length; i++) gradient[i] = 0; if (UpdateLowerBound) { for (int i = 0; i < gradient.Length; i++) for (int j = 0; j < gradient.Length; j++) lowerBound[i, j] = 0; } // In the multinomial logistic regression, the objective // function is the log-likelihood function l(w). As given // by Krishnapuram et al and Böhning, this is a concave // function with Hessian given by: // // H(w) = -sum(P(w) - p(w)p(w)') (x) xx' // (see referenced paper for proper indices) // // In which (x) denotes the Kronocker product. By using // the lower bound principle, Krishnapuram has shown that // we can replace H(w) with a lower bound approximation B // which does not depend on w (eq. 8 on aforementined paper): // // B = -(1/2) [I - 11/M] (x) sum(xx') // // Thus we can compute and invert this matrix only once. // // For each input sample in the dataset for (int i = 0; i < inputs.Length; i++) { // Grab variables related to the sample double[] x = design[i]; double[] y = outputs[i]; // Compute and estimate outputs this.compute(inputs[i], output); // Compute errors for the sample for (int j = 0; j < errors.Length; j++) errors[j] = y[j + 1] - output[j]; // Compute current gradient and Hessian // We can take advantage of the block structure of the // Hessian matrix and gradient vector by employing the // Kronocker product. See [Böhning, 1992] // (Re-) Compute error gradient double[] g = Matrix.KroneckerProduct(errors, x); for (int j = 0; j < g.Length; j++) gradient[j] += g[j]; if (UpdateLowerBound) { // Compute xxt matrix for (int k = 0; k < x.Length; k++) for (int j = 0; j < x.Length; j++) xxt[k, j] = x[k] * x[j]; // (Re-) Compute weighted "Hessian" matrix double[,] h = Matrix.KroneckerProduct(weights, xxt); for (int j = 0; j < parameterCount; j++) for (int k = 0; k < parameterCount; k++) lowerBound[j, k] += h[j, k]; } } if (UpdateLowerBound) { UpdateLowerBound = false; // Decompose to solve the linear system. Usually the hessian will // be invertible and LU will succeed. However, sometimes the hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(lowerBound); deltas = decomposition.Solve(gradient); } else { deltas = decomposition.Solve(gradient); } previous = coefficients.Reshape(1); // Update coefficients using the calculated deltas for (int i = 0, k = 0; i < coefficients.Length; i++) for (int j = 0; j < coefficients[i].Length; j++) coefficients[i][j] -= deltas[k++]; solution = coefficients.Reshape(1); if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[][] standardErrors = regression.StandardErrors; for (int i = 0, k = 0; i < standardErrors.Length; i++) for (int j = 0; j < standardErrors[i].Length; j++, k++) standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k])); } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); return Matrix.Max(deltas); }
/// <summary> /// Runs one iteration of the Reweighted Least Squares algorithm. /// </summary> /// <param name="inputs">The input data.</param> /// <param name="outputs">The outputs associated with each input vector.</param> /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] outputs) { // Regress using Iteratively Reweighted Least Squares estimation. // References: // - Bishop, Christopher M.; Pattern Recognition // and Machine Learning. Springer; 1st ed. 2006. // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[] errors = new double[N]; double[] weights = new double[N]; double[] coefficients = this.regression.Coefficients; double[] deltas; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[parameterCount]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) { row[j + 1] = inputs[i][j]; } } // Compute errors and weighting matrix for (int i = 0; i < inputs.Length; i++) { double y = regression.Compute(inputs[i]); // Calculate error vector errors[i] = y - outputs[i]; // Calculate weighting matrix weights[i] = regression.Link.Derivative2(y); } // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // (Re-) Compute error gradient for (int j = 0; j < design.Length; j++) { for (int i = 0; i < gradient.Length; i++) { gradient[i] += design[j][i] * errors[j]; } } // (Re-) Compute weighted "Hessian" matrix for (int k = 0; k < weights.Length; k++) { double[] row = design[k]; for (int j = 0; j < row.Length; j++) { for (int i = 0; i < row.Length; i++) { hessian[j, i] += row[i] * row[j] * weights[k]; } } } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); deltas = decomposition.Solve(gradient); previous = (double[])coefficients.Clone(); // Update coefficients using the calculated deltas for (int i = 0; i < coefficients.Length; i++) { coefficients[i] -= deltas[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(inverse[i, i]); } } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) { deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); } return(Matrix.Max(deltas)); }
/// <summary> /// Runs one iteration of the Reweighted Least Squares algorithm. /// </summary> /// <param name="inputs">The input data.</param> /// <param name="outputs">The outputs associated with each input vector.</param> /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] outputs) { // Regress using Iteratively Reweighted Least Squares estimation. // References: // - Bishop, Christopher M.; Pattern Recognition // and Machine Learning. Springer; 1st ed. 2006. // Initial definitions and memory allocations int N = inputs.Length; double[][] design = new double[N][]; double[] errors = new double[N]; double[] weights = new double[N]; double[] coefficients = this.regression.Coefficients; double[] deltas; // Compute the regression matrix for (int i = 0; i < inputs.Length; i++) { double[] row = design[i] = new double[parameterCount]; row[0] = 1; // for intercept for (int j = 0; j < inputs[i].Length; j++) row[j + 1] = inputs[i][j]; } // Compute errors and weighting matrix for (int i = 0; i < inputs.Length; i++) { double y = regression.Compute(inputs[i]); // Calculate error vector errors[i] = y - outputs[i]; // Calculate weighting matrix weights[i] = regression.Link.Derivative2(y); } // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // (Re-) Compute error gradient for (int j = 0; j < design.Length; j++) for (int i = 0; i < gradient.Length; i++) gradient[i] += design[j][i] * errors[j]; // (Re-) Compute weighted "Hessian" matrix for (int k = 0; k < weights.Length; k++) { double[] row = design[k]; for (int j = 0; j < row.Length; j++) for (int i = 0; i < row.Length; i++) hessian[j, i] += row[i] * row[j] * weights[k]; } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); deltas = decomposition.Solve(gradient); previous = (double[])coefficients.Clone(); // Update coefficients using the calculated deltas for (int i = 0; i < coefficients.Length; i++) coefficients[i] -= deltas[i]; if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) standardErrors[i] = Math.Sqrt(inverse[i, i]); } // Return the relative maximum parameter change for (int i = 0; i < deltas.Length; i++) deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]); return Matrix.Max(deltas); }
private ProportionalHazards innerLearn(double[][] inputs, double[] time, SurvivalOutcome[] censor, double[] weights) { if (weights != null) { throw new ArgumentException(Accord.Properties.Resources.NotSupportedWeights, "weights"); } if (inputs.Length != time.Length || time.Length != censor.Length) { throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); } if (regression == null) { init(new ProportionalHazards(inputs.Columns())); } // Sort data by time to accelerate performance EmpiricalHazardDistribution.Sort(ref time, ref censor, ref inputs); var means = new double[parameterCount]; var sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) { sdev[i] = 1; } if (normalize) { // Store means as regression centers means = inputs.Mean(dimension: 0); for (int i = 0; i < means.Length; i++) { regression.Offsets[i] = means[i]; } // Convert to unit scores for increased accuracy sdev = Measures.StandardDeviation(inputs); inputs = Elementwise.Divide(inputs.Subtract(means, 0), sdev, 0); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] *= sdev[i]; } } // Compute actual outputs var output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { for (int j = 0; j < time.Length; j++) { if (time[j] == time[i]) { ties[i]++; } } } if (parameterCount == 0) { createBaseline(time, censor, output); return(regression); } CurrentIteration = 0; double smooth = Lambda; do { if (Token.IsCancellationRequested) { break; } // learning iterations until convergence // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == SurvivalOutcome.Censored) { continue; } // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { den += output[j]; } } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) { partialGradient[k] += inputs[j][k] * output[j] / den; } // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) { for (int jj = 0; jj < inputs[j].Length; jj++) { partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) { gradient[j] += inputs[i][j] - partialGradient[j]; } // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) { for (int k = 0; k < partialGradient.Length; k++) { hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } } } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); if (convergence.Iterations > 0 || convergence.Tolerance > 0) { // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] -= smooth * deltas[i]; } } smooth += Lambda; if (smooth > 1) { smooth = 1; } // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] = convergence.OldValues[i]; } } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } if (Token.IsCancellationRequested) { return(regression); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] /= sdev[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } } if (computeBaselineFunction) { createBaseline(time, censor, output); } return(regression); }