public void PredictTest1() { // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html double[,] data = { { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; double[] distHazards = { 0, 0.0351683340828711, 0.0267358118285064, 0, 0.0103643094219679, 0, 0, 0,0, 0.000762266794052363, 0 }; double[] distTimes = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; ProportionalHazards regression = new ProportionalHazards(1, new EmpiricalHazardDistribution(distTimes, distHazards)); regression.Coefficients[0] = 0.37704239281494084; regression.StandardErrors[0] = 0.25415755113043753; regression.Offsets[0] = 51.181818; double[][] inputs = data.GetColumn(0).ToArray(); double[] time = data.GetColumn(1); double[] expected = { 0.000000000000, 0.919466527073, 0.000074105451, 0.000001707560, 0.657371730925, 0.046771996036, 0.000074105451, 0.006836271860, 0.000008042445, 0.339562971888, 2.029832541310 }; double[] actual = new double[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { actual[i] = regression.Compute(inputs[i], time[i]); } for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 1e-6); Assert.IsFalse(Double.IsNaN(actual[i])); } }
public void RunTest() { // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html double[,] data = { { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; ProportionalHazards regression = new ProportionalHazards(1); double[][] inputs = data.GetColumn(0).ToArray(); double[] time = data.GetColumn(1); int[] output = data.GetColumn(2).ToInt32(); ProportionalHazardsNewtonRaphson target = new ProportionalHazardsNewtonRaphson(regression); double error = target.Run(inputs, time, output); double log = -2 * regression.GetPartialLogLikelihood(inputs, time, output); Assert.AreEqual(0.3770, regression.Coefficients[0], 1e-4); Assert.IsFalse(Double.IsNaN(regression.Coefficients[0])); Assert.AreEqual(0.2542, regression.StandardErrors[0], 1e-4); Assert.IsFalse(Double.IsNaN(regression.StandardErrors[0])); double[] actual = new double[inputs.Length]; for (int i = 0; i < actual.Length; i++) { actual[i] = regression.Compute(inputs[i]); } double[] expected = { // Computed using R's predict(fit,type="risk") 0.640442743, 1206.226657448, 0.097217211, 0.002240107, 59.081223025, 0.640442743, 0.097217211, 8.968345353, 0.000722814, 8.968345353, 27.794227993 }; for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 1e-3); Assert.IsFalse(Double.IsNaN(actual[i])); } }
public void PredictTest1() { // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html double[,] data = { { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; ProportionalHazards regression = new ProportionalHazards(1); double[][] inputs = data.GetColumn(0).ToArray(); double[] time = data.GetColumn(1); int[] output = data.GetColumn(2).ToInt32(); ProportionalHazardsNewtonRaphson target = new ProportionalHazardsNewtonRaphson(regression); double error = target.Run(inputs, time, output); double[] expected = { 0.000000000000, 0.919466527073, 0.000074105451, 0.000001707560, 0.657371730925, 0.046771996036, 0.000074105451, 0.006836271860, 0.000008042445, 0.339562971888, 2.029832541310 }; double[] actual = new double[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { actual[i] = regression.Compute(inputs[i], time[i]); } for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 1e-6); Assert.IsFalse(Double.IsNaN(actual[i])); } }
private void computeInformation() { // Store model information this.result = regression.Compute(inputData, timeData); this.deviance = regression.GetDeviance(inputData, timeData, censorData); this.logLikelihood = regression.GetPartialLogLikelihood(inputData, timeData, censorData); this.chiSquare = regression.ChiSquare(inputData, timeData, censorData); // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.standardErrors[i] = regression.StandardErrors[i]; this.waldTests[i] = regression.GetWaldTest(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.hazardRatios[i] = regression.GetHazardRatio(i); } }
/// <summary> /// Runs one iteration of the Newton-Raphson update for Cox's hazards learning. /// </summary> /// /// <param name="inputs">The input data.</param> /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] time, int[] censor) { if (inputs.Length != time.Length || time.Length != censor.Length) { throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); } double[] means = new double[parameterCount]; double[] sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) { sdev[i] = 1; } if (normalize) { // Store means as regression centers means = inputs.Mean(); for (int i = 0; i < means.Length; i++) { regression.Offsets[i] = means[i]; } // Convert to unit scores for increased accuracy sdev = Accord.Statistics.Tools.StandardDeviation(inputs); inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true); } // Sort data by time to accelerate performance if (!time.IsSorted(ComparerDirection.Descending)) { sort(ref inputs, ref time, ref censor); } // Compute actual outputs double[] output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) { output[i] = regression.Compute(inputs[i]); } // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { for (int j = 0; j < time.Length; j++) { if (time[j] == time[i]) { ties[i]++; } } } if (parameterCount == 0) { return(createBaseline(time, censor, output)); } CurrentIteration = 0; double smooth = 0.1; do // learning iterations until convergence { // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == 0) { continue; } // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { den += output[j]; } } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) { partialGradient[k] += inputs[j][k] * output[j] / den; } // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) { for (int jj = 0; jj < inputs[j].Length; jj++) { partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) { gradient[j] += inputs[i][j] - partialGradient[j]; } // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) { for (int k = 0; k < partialGradient.Length; k++) { hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } } } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // // Moreover, the computation of the inverse is optional, as it will // be used only to compute the standard errors of the regression. // Hessian Matrix is singular, try pseudo-inverse solution decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] -= smooth * deltas[i]; } smooth += 0.1; if (smooth > 1) { smooth = 1; } // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] = convergence.OldValues[i]; } } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] /= sdev[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } } if (computeBaselineFunction) { createBaseline(time, censor, output); } return(convergence.Delta); }
public void BaselineHazardTest() { double[,] data = { // t c in { 8, 0, -1.2372626521865966 }, { 4, 1, 0.22623087329625477 }, { 12, 0, -0.8288458543774289 }, { 6, 0, 0.49850873850236665 }, { 10, 0, -0.38639432341749696 }, { 8, 1, 1.0430644689145904 }, { 5, 0, -1.6797141831465285 }, { 5, 0, 1.0770992020653544 }, { 3, 1, 1.0770992020653544 }, { 14, 1, -0.38639432341749696 }, { 8, 0, -0.8969153206789568 }, { 11, 0, 1.6897243987791061 }, { 7, 0, -1.2712973853373605 }, { 7, 0, -0.38639432341749696 }, { 7, 1, -0.45446378971902495 }, { 12, 0, 0.4644740053516027 }, { 8, 0, 1.4514812667237584 }, }; double[] time = data.GetColumn(0); SurvivalOutcome[] censor = data.GetColumn(1).To <SurvivalOutcome[]>(); double[][] inputs = data.GetColumn(2).ToJagged(); var regression = new ProportionalHazards(1); var target = new ProportionalHazardsNewtonRaphson(regression); target.Normalize = false; target.Lambda = 0; regression.Coefficients[0] = 0.47983261821350764; double error = target.Run(inputs, time, censor); /* Tested against http://statpages.org/prophaz2.html * 13, 8, 0 * 56, 4, 1 * 25, 12, 0 * 64, 6, 0 * 38, 10, 0 * 80, 8, 1 * 0 , 5, 0 * 81, 5, 0 * 81, 3, 1 * 38, 14, 1 * 23, 8, 0 * 99, 11, 0 * 12, 7, 0 * 38, 7, 0 * 36, 7, 1 * 63, 12, 0 * 92, 8, 0 */ double[] baseline = { regression.Survival(3), // 0.9465 regression.Survival(4), // 0.8919 regression.Survival(7), // 0.8231 regression.Survival(8), // 0.7436 regression.Survival(12), // 0.7436 regression.Survival(14), // 0.0000 }; Assert.AreEqual(0.9465, baseline[0], 1e-4); Assert.AreEqual(0.8919, baseline[1], 1e-4); Assert.AreEqual(0.8231, baseline[2], 1e-4); Assert.AreEqual(0.7436, baseline[3], 1e-4); Assert.AreEqual(0.7436, baseline[4], 1e-4); Assert.AreEqual(0.0000, baseline[5], 1e-4); // The value of the baseline must be exact the same if it was computed // after the Newton-Raphson or in a standalone EmpiricalHazard computation double[] outputs = inputs.Apply(x => regression.Compute(x)); var empirical = EmpiricalHazardDistribution.Estimate(time, censor, outputs); baseline = new[] { empirical.ComplementaryDistributionFunction(3), // 0.9465 empirical.ComplementaryDistributionFunction(4), // 0.8919 empirical.ComplementaryDistributionFunction(7), // 0.8231 empirical.ComplementaryDistributionFunction(8), // 0.7436 empirical.ComplementaryDistributionFunction(12), // 0.7436 empirical.ComplementaryDistributionFunction(14), // 0.0000 }; Assert.AreEqual(0.9465, baseline[0], 1e-4); Assert.AreEqual(0.8919, baseline[1], 1e-4); Assert.AreEqual(0.8231, baseline[2], 1e-4); Assert.AreEqual(0.7436, baseline[3], 1e-4); Assert.AreEqual(0.7436, baseline[4], 1e-4); Assert.AreEqual(0.0000, baseline[5], 1e-4); }
public void BaselineHazardTestR() { double[,] data = { // t c in { 8, 0, 13 }, { 4, 1, 56 }, { 12, 0, 25 }, { 6, 0, 64 }, { 10, 0, 38 }, { 8, 1, 80 }, { 5, 0, 0 }, { 5, 0, 81 }, { 3, 1, 81 }, { 14, 1, 38 }, { 8, 0, 23 }, { 11, 0, 99 }, { 7, 0, 12 }, { 7, 1, 36 }, { 12, 0, 63 }, { 8, 0, 92 }, { 7, 0, 38 }, }; double[] time = data.GetColumn(0); SurvivalOutcome[] censor = data.GetColumn(1).To <SurvivalOutcome[]>(); double[][] inputs = data.GetColumn(2).ToJagged(); var regression = new ProportionalHazards(1); var target = new ProportionalHazardsNewtonRaphson(regression); double error = target.Run(inputs, time, censor); // Assert.AreEqual(-10.257417973830666, error, 1e-8); /* * library('survival') * options(digits=17) * time <- c(8, 4, 12, 6, 10, 8, 5, 5, 3, 14, 8, 11, 7, 7, 12, 8, 7) * x <- c(13, 56, 25, 64, 38, 80, 0, 81, 81, 38, 23, 99, 12, 36, 63, 92, 38) * c <- c(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0) * * fit <- coxph(Surv(time, c) ~ x, ties="breslow") * * predict(fit,type="risk") * * fit$loglik * * coef exp(coef) se(coef) z p * x 0.01633097532122 1.016465054586 0.01711960930183 0.9539338797573 0.340117112635 * * Likelihood ratio test=0.94 on 1 df, p=0.332836850925 n= 17, number of events= 5 */ // Tested against GNU R Assert.AreEqual(49.352941176470587, regression.Offsets[0]); Assert.AreEqual(0.01633097532122, regression.Coefficients[0], 1e-10); Assert.AreEqual(0.01711960930183, regression.StandardErrors[0], 1e-10); Assert.AreEqual(0.340117112635, regression.GetWaldTest(0).PValue, 1e-5); Assert.AreEqual(-10.2879332934202168, regression.GetPartialLogLikelihood(time, censor)); Assert.AreEqual(-9.8190189050165948, regression.GetPartialLogLikelihood(inputs, time, censor)); double[] actual = inputs.Apply(x => regression.Compute(x)); /* * predict(r,type="risk") * [1] 0.55229166964915244 1.11466393245000361 0.67185866444081555 1.27023351821156782 0.83076808526813917 1.64953983529334769 0.44664925161695829 1.67669959872327912 * [9] 1.67669959872327912 0.83076808526813917 0.65026895029003673 2.24967304521214029 0.54334545703992021 0.80407192663266613 1.24965783376477391 2.00665280971219540 * [17] 0.83076808526813917 */ double[] expected = { 0.55229166964915244, 1.11466393245000361, 0.67185866444081555, 1.27023351821156782, 0.83076808526813917, 1.64953983529334769, 0.44664925161695829, 1.67669959872327912, 1.67669959872327912, 0.83076808526813917, 0.65026895029003673, 2.24967304521214029, 0.54334545703992021, 0.80407192663266613, 1.24965783376477391, 2.00665280971219540, 0.83076808526813917 }; for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.025); } }
public void PredictTest1() { // Data from: http://statpages.org/prophaz2.html double[,] data = { { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; var regression = new ProportionalHazards(1); double[][] inputs = data.GetColumn(0).ToJagged(); double[] time = data.GetColumn(1); int[] censor = data.GetColumn(2).ToInt32(); var target = new ProportionalHazardsNewtonRaphson(regression); double error = target.Run(inputs, time, censor); // Tested against http://statpages.org/prophaz2.html Assert.AreEqual(0.3770, regression.Coefficients[0], 1e-4); Assert.AreEqual(0.2542, regression.StandardErrors[0], 1e-4); Assert.AreEqual(51.18181818181818, regression.Offsets[0]); double mean = regression.Offsets[0]; // Baseline survivor function at predictor means double[] baseline = { regression.Survival(2), regression.Survival(7), regression.Survival(9), regression.Survival(10), }; // Tested against http://statpages.org/prophaz2.html Assert.AreEqual(0.9979, baseline[0], 1e-4); Assert.AreEqual(0.9820, baseline[1], 1e-4); Assert.AreEqual(0.9525, baseline[2], 1e-4); Assert.AreEqual(0.8310, baseline[3], 1e-4); double[] expected = { 0, 2.51908236823927, 0.000203028311170645, 4.67823782106946E-06, 1.07100164957025, 0.118590728553659, 0.000203028311170645, 0.0187294821517496, 1.31028937819308E-05, 0.436716853556834, 5.14665484304978 }; double[] actual = new double[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { double a = actual[i] = regression.Compute(inputs[i], time[i]); double e = expected[i]; Assert.AreEqual(e, a, 1e-3); } // string exStr = actual.ToString(CSharpArrayFormatProvider.InvariantCulture); }