public double Run(double[][] inputs, double[] time) { var censor = new SurvivalOutcome[time.Length]; System.Diagnostics.Debug.Assert(censor[0] == SurvivalOutcome.Failed); return(Run(inputs, time, censor)); }
/// <summary> /// Computes class-label decisions for each vector in the given <paramref name="input"/>. /// </summary> /// /// <param name="input">The input vectors that should be classified into /// one of the <see cref="ITransform.NumberOfOutputs"/> possible classes.</param> /// public SurvivalOutcome[] Decide(double[][] input) { var result = new SurvivalOutcome[input.Length]; for (int i = 0; i < input.Length; i++) { result[i] = Decide(input[i]); } return(result); }
/// <summary> /// The likelihood ratio test of the overall model, also called the model chi-square test. /// </summary> /// /// <param name="input">A set of input data.</param> /// <param name="time">The time-to-event before the output occurs.</param> /// <param name="output">The corresponding output data.</param> /// /// <remarks> /// <para> /// The Chi-square test, also called the likelihood ratio test or the log-likelihood test /// is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test /// indicates whether there is evidence of the need to move from a simpler model to a more /// complicated one (where the simpler model is nested within the complicated one).</para> /// <para> /// The difference between the log-likelihood ratios for the researcher's model and a /// simpler model is often called the "model chi-square".</para> /// </remarks> /// public ChiSquareTest ChiSquare(double[][] input, double[] time, SurvivalOutcome[] output) { ProportionalHazards regression = new ProportionalHazards(Inputs); double ratio = GetLogLikelihoodRatio(input, time, output, regression); return new ChiSquareTest(ratio, Coefficients.Length); }
/// <summary> /// Gets the Log-Likelihood Ratio between two models. /// </summary> /// /// <remarks> /// The Log-Likelihood ratio is defined as 2*(LL - LL0). /// </remarks> /// /// <param name="input">A set of input data.</param> /// <param name="time">The time-to-event before the output occurs.</param> /// <param name="output">The corresponding output data.</param> /// <param name="hazards">Another Cox Proportional Hazards model.</param> /// /// <returns>The Log-Likelihood ratio (a measure of performance /// between two models) calculated over the given data sets.</returns> /// public double GetLogLikelihoodRatio(double[][] input, double[] time, SurvivalOutcome[] output, ProportionalHazards hazards) { return 2.0 * (this.GetPartialLogLikelihood(input, time, output) - hazards.GetPartialLogLikelihood(input, time, output)); }
/// <summary> /// Gets the Partial Log-Likelihood for the model. /// </summary> /// /// <param name="time">The time-to-event before the output occurs.</param> /// <param name="output">The corresponding output data.</param> /// /// <returns> /// The Partial Log-Likelihood (a measure of performance) /// of the model calculated over the given data set. /// </returns> /// public double GetPartialLogLikelihood(double[] time, SurvivalOutcome[] output) { double sum2 = 0; for (int i = 0; i < time.Length; i++) { if (output[i] == 0) continue; // Compute the second sum double sum = 0; for (int j = 0; j < time.Length; j++) { if (time[j] >= time[i]) sum++; } sum2 += Math.Log(sum); } return -sum2; }
/// <summary> /// Gets the Partial Log-Likelihood for the model. /// </summary> /// /// <param name="inputs">A set of input data.</param> /// <param name="time">The time-to-event before the output occurs.</param> /// <param name="output">The corresponding output data.</param> /// /// <returns> /// The Partial Log-Likelihood (a measure of performance) /// of the model calculated over the given data set. /// </returns> /// public double GetPartialLogLikelihood(double[][] inputs, double[] time, SurvivalOutcome[] output) { double sum1 = 0, sum2 = 0; for (int i = 0; i < inputs.Length; i++) { if (output[i] == 0) continue; // Compute the first sum for (int j = 0; j < Coefficients.Length; j++) sum1 += Coefficients[j] * (inputs[i][j] - Offsets[j]); // Compute the second sum double sum = 0; for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { double s = 0; for (int k = 0; k < Coefficients.Length; k++) s += Coefficients[k] * (inputs[j][k] - Offsets[k]); sum += Math.Exp(s); } } sum2 += Math.Log(sum); } return sum1 - sum2; }
/// <summary> /// Gets the Deviance for the model. /// </summary> /// /// <remarks> /// The deviance is defined as -2*Log-Likelihood. /// </remarks> /// /// <param name="inputs">A set of input data.</param> /// <param name="time">The time-to-event before the output occurs.</param> /// <param name="output">The corresponding output data.</param> /// /// <returns> /// The deviance (a measure of performance) of the model /// calculated over the given data sets. /// </returns> /// public double GetDeviance(double[][] inputs, double[] time, SurvivalOutcome[] output) { return -2.0 * GetPartialLogLikelihood(inputs, time, output); }
/// <summary> /// Initializes a new instance of the <see cref="EmpiricalHazardOptions"/> class. /// </summary> /// public EmpiricalHazardOptions(HazardEstimator estimator, HazardTiesMethod ties, SurvivalOutcome[] outcome) { Estimator = estimator; Outcome = outcome; Ties = ties; }
private static void CreateExample1(out double[] times, out SurvivalOutcome[] censor) { // Example from http://sas-and-r.blogspot.fr/2010/05/example-738-kaplan-meier-survival.html object[,] data = { // time event { 0.5, false }, { 1, true }, { 1, true }, { 2, true }, { 2, false }, { 3, true }, { 4, true }, { 5, false }, { 6, true }, { 7, false }, { 8, true }, { 9, true }, { 10, false }, { 12, true }, { 14, false }, { 14, true }, { 17, false }, { 20, true }, { 21, false }, }; times = data.GetColumn(0).To<double[]>(); censor = data.GetColumn(1).To<SurvivalOutcome[]>(); }
/// <summary> /// Initializes a new instance of the <see cref="EmpiricalHazardOptions"/> class. /// </summary> /// public EmpiricalHazardOptions(HazardEstimator estimator, SurvivalOutcome[] output) { Estimator = estimator; Outcome = output; Ties = DefaultTies; }
private void createBaseline(double[] time, SurvivalOutcome[] censor, double[] output = null) { if (regression.BaselineHazard == null) return; var hazard = regression.BaselineHazard as IFittableDistribution<double, EmpiricalHazardOptions>; if (hazard != null) { // Compute an estimate of the cumulative Hazard // function using the Nelson-Aalen estimator hazard.Fit(time, output, new EmpiricalHazardOptions() { Outcome = censor, Estimator = Estimator, Ties = Ties }); return; } var survival = regression.BaselineHazard as IFittableDistribution<double, SurvivalOptions>; if (survival != null) { // Compute an estimate of the cumulative Hazard // function using the Kaplan-Meier estimator survival.Fit(time, new SurvivalOptions() { Outcome = censor, }); } }
/// <summary> /// Runs the Newton-Raphson update for Cox's hazards learning until convergence. /// </summary> /// /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[] time, SurvivalOutcome[] censor) { if (time.Length != censor.Length) { throw new DimensionMismatchException("time", "The time and output vector must have the same length."); } // Sort data by time to accelerate performance EmpiricalHazardDistribution.Sort(ref time, ref censor); createBaseline(time, censor); return regression.GetPartialLogLikelihood(time, censor); }
/// <summary> /// Runs the Newton-Raphson update for Cox's hazards learning until convergence. /// </summary> /// /// <param name="inputs">The input data.</param> /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] time, SurvivalOutcome[] censor) { if (inputs.Length != time.Length || time.Length != censor.Length) { throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); } // Sort data by time to accelerate performance EmpiricalHazardDistribution.Sort(ref time, ref censor, ref inputs); double[] means = new double[parameterCount]; double[] sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) sdev[i] = 1; if (normalize) { // Store means as regression centers means = inputs.Mean(); for (int i = 0; i < means.Length; i++) regression.Offsets[i] = means[i]; // Convert to unit scores for increased accuracy sdev = Accord.Statistics.Tools.StandardDeviation(inputs); inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true); for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] *= sdev[i]; } // Compute actual outputs double[] output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) sum += regression.Coefficients[j] * inputs[i][j]; output[i] = Math.Exp(sum); } // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) for (int j = 0; j < time.Length; j++) if (time[j] == time[i]) ties[i]++; if (parameterCount == 0) { createBaseline(time, censor, output); return regression.GetPartialLogLikelihood(inputs, time, censor); } CurrentIteration = 0; double smooth = Lambda; do { // learning iterations until convergence // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == SurvivalOutcome.Censored) continue; // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) den += output[j]; } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) partialGradient[k] += inputs[j][k] * output[j] / den; // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) for (int jj = 0; jj < inputs[j].Length; jj++) partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) gradient[j] += inputs[i][j] - partialGradient[j]; // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) for (int k = 0; k < partialGradient.Length; k++) hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); if (convergence.Iterations > 0 || convergence.Tolerance > 0) { // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] -= smooth * deltas[i]; } smooth += Lambda; if (smooth > 1) smooth = 1; // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] = convergence.OldValues[i]; } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) sum += regression.Coefficients[j] * inputs[i][j]; output[i] = Math.Exp(sum); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) regression.Coefficients[i] /= sdev[i]; if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } if (computeBaselineFunction) createBaseline(time, censor, output); return regression.GetPartialLogLikelihood(inputs, time, censor); }
public double Run(double[][] inputs, double[] time) { var censor = new SurvivalOutcome[time.Length]; System.Diagnostics.Debug.Assert(censor[0] == SurvivalOutcome.Failed); return Run(inputs, time, censor); }