public void EmpiricalHazardConstructorTest3() { double[] times = { 11, 10, 9, 8, 6, 5, 4, 2 }; double[] values = { 0.22, 0.67, 1.00, 0.18, 1.00, 1.00, 1.00, 0.55 }; EmpiricalHazardDistribution distribution = new EmpiricalHazardDistribution(times, values); double mean = distribution.Mean; // 0.93696461879063664 double median = distribution.Median; // 3.9999999151458066 double var = distribution.Variance; // 2.0441627748096289 double chf = distribution.CumulativeHazardFunction(x: 4.2); // 1.55 double cdf = distribution.DistributionFunction(x: 4.2); // 0.7877520261732569 double pdf = distribution.ProbabilityDensityFunction(x: 4.2); // 0.046694554241883471 double lpdf = distribution.LogProbabilityDensityFunction(x: 4.2); // -3.0641277326297756 double hf = distribution.HazardFunction(x: 4.2); // 0.22 double ccdf = distribution.ComplementaryDistributionFunction(x: 4.2); // 0.21224797382674304 double icdf = distribution.InverseDistributionFunction(p: cdf); // 4.3483975243778978 string str = distribution.ToString(); // H(x; v, t) Assert.AreEqual(0.93696461879063664, mean); Assert.AreEqual(3.9999999151458066, median, 1e-6); Assert.AreEqual(2.0441627748096289, var); Assert.AreEqual(1.55, chf); Assert.AreEqual(0.7877520261732569, cdf); Assert.AreEqual(0.046694554241883471, pdf); Assert.AreEqual(-3.0641277326297756, lpdf); Assert.AreEqual(0.22, hf); Assert.AreEqual(0.21224797382674304, ccdf); Assert.AreEqual(4.3483975243778978, icdf, 1e-8); Assert.AreEqual("H(x; v, t)", str); }
public void LeukemiaExampleCensoring_FlemingHarrington_NelsonAalen() { // http://www-personal.umich.edu/~yili/lect2notes.pdf // The following are times of remission (weeks) for 21 leukemia // patients receiving control treatment (Table 1.1 of Cox & Oakes): double[] t = { 6, 6, 6, 6, 7, 9, 10, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 32, 34, 35 }; int[] c = { 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }; var distribution = EmpiricalHazardDistribution.Estimate(t, c, SurvivalEstimator.FlemingHarrington, HazardEstimator.BreslowNelsonAalen); int[] intervals = { 6, 7, 9, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 34, 35 }; double[] expected = { 0.8571, 0.8067, 0.8067, 0.7529, 0.7529, 0.6902, 0.6275, 0.6275, 0.6275, 0.6275, 0.5378, 0.4482, 0.4482, 0.4482, 0.4482, 0.4482 }; for (int i = 0; i < intervals.Length; i++) { double x = intervals[i]; double actual = distribution.ComplementaryDistributionFunction(x); double e = expected[i]; Assert.AreEqual(e, actual, 0.1); } }
public void LeukemiaExampleCensoring_KaplanMeier_FlemingHarrington() { // The following are times of remission (weeks) for 21 leukemia // patients receiving control treatment (Table 1.1 of Cox & Oakes): double[] t = { 6, 6, 6, 6, 7, 9, 10, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 32, 34, 35 }; int[] c = { 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }; var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.FlemingHarrington); distribution.Fit(t, new SurvivalOptions { Outcome = c.To <SurvivalOutcome[]>() }); int[] intervals = { 6, 7, 9, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 34, 35 }; double[] expected = { 0.8571, 0.8067, 0.8067, 0.7529, 0.7529, 0.6902, 0.6275, 0.6275, 0.6275, 0.6275, 0.5378, 0.4482, 0.4482, 0.4482, 0.4482, 0.4482 }; for (int i = 0; i < intervals.Length; i++) { double x = intervals[i]; double actual = distribution.ComplementaryDistributionFunction(x); double e = expected[i]; Assert.AreEqual(e, actual, 0.1); } }
public void KaplanMeierTest1() { // Example from // http://sas-and-r.blogspot.fr/2010/05/example-738-kaplan-meier-survival.html double[] times; SurvivalOutcome[] censor; CreateExample1(out times, out censor); var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.KaplanMeier); Assert.AreEqual(SurvivalEstimator.KaplanMeier, distribution.Estimator); distribution.Fit(times, new EmpiricalHazardOptions(HazardEstimator.KaplanMeier, censor)); int[] t = { 1, 2, 3, 4, 6, 8, 9, 12, 14, 20 }; double[] e = { 0.889, 0.833, 0.774, 0.714, 0.649, 0.577, 0.505, 0.421, 0.337, 0.168 }; double[] actual = t.ToDouble().Apply(distribution.ComplementaryDistributionFunction); for (int i = 0; i < e.Length; i++) { Assert.AreEqual(e[i], actual[i], 1e-3); } // Assert.AreEqual(11.177, distribution.Mean); Assert.AreEqual(12, distribution.Median, 1e-5); }
public void NelsonAalenTest1() { // Example from // http://sas-and-r.blogspot.fr/2010/05/example-738-kaplan-meier-survival.html // http://sas-and-r.blogspot.fr/2010/05/example-739-nelson-aalen-estimate-of.html double[] times; SurvivalOutcome[] censor; CreateExample1(out times, out censor); // Test with Breslow method { var distribution = EmpiricalHazardDistribution.Estimate(times, censor, HazardTiesMethod.Breslow); double[] expectedCHF = { 0.0000000, 0.1111111, 0.1111111, 0.1736111, 0.1736111, 0.2450397, 0.3219628, 0.3219628, 0.4128719, 0.4128719, 0.5239830, 0.6489830, 0.6489830, 0.8156496, 1.0156496, 1.0156496, 1.0156496, 1.5156496, 1.5156496 }; double[] actualCHF = times.Apply(distribution.CumulativeHazardFunction); for (int i = 0; i < actualCHF.Length; i++) { Assert.AreEqual(expectedCHF[i], actualCHF[i], 1e-6); } //Assert.AreEqual(11.177, distribution.Mean); Assert.AreEqual(12, distribution.Median, 1e-5); } // Test with Effron method { var distribution = EmpiricalHazardDistribution.Estimate(times, censor); double[] expectedCHF = { 0.0000000, 0.1111111, 0.1111111, 0.1756496, 0.1756496, 0.2497576, 0.3298003, 0.3298003, 0.4251104, 0.4251104, 0.5428935, 0.6764249, 0.6764249, 0.8587464, 1.0818900, 1.0818900, 1.0818900, 1.7750372, 1.7750372 }; double[] actualCHF = times.Apply(distribution.CumulativeHazardFunction); for (int i = 0; i < actualCHF.Length; i++) { Assert.AreEqual(expectedCHF[i], actualCHF[i], 1e-6); } //Assert.AreEqual(11.177, distribution.Mean); Assert.AreEqual(12, distribution.Median, 1e-5); } }
public void DistributionFunctionTest() { double[] values = { 1.0000000000000000, 0.8724284533876597, 0.9698946958777951, 1.0000000000000000, 0.9840887140861863, 1.0000000000000000, 1.0000000000000000, 1.0000000000000000, 1.0000000000000000, 0.9979137773216293, 1.0000000000000000 }; double[] times = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; EmpiricalHazardDistribution target = EmpiricalHazardDistribution .FromSurvivalValues(times, values); // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html double[] expectedBaselineSurvivalFunction = { 1.0000, 0.9979, 0.9979, 0.9979, 0.9979, 0.9979, 0.9820, 0.9820, 0.9525, 0.8310, 0.8310, }; double[] hazardFunction = new double[expectedBaselineSurvivalFunction.Length]; double[] survivalFunction = new double[expectedBaselineSurvivalFunction.Length]; for (int i = 0; i < 11; i++) { hazardFunction[i] = target.CumulativeHazardFunction(i + 1); } for (int i = 0; i < 11; i++) { survivalFunction[i] = target.ComplementaryDistributionFunction(i + 1); } for (int i = 0; i < expectedBaselineSurvivalFunction.Length; i++) { Assert.AreEqual(expectedBaselineSurvivalFunction[i], survivalFunction[i], 0.01); // Ho = -log(So) Assert.AreEqual(hazardFunction[i], -Math.Log(survivalFunction[i]), 0.01); // So = exp(-Ho) Assert.AreEqual(survivalFunction[i], Math.Exp(-hazardFunction[i]), 0.01); } }
/// <summary> /// Runs the Newton-Raphson update for Cox's hazards learning until convergence. /// </summary> /// /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[] time, SurvivalOutcome[] censor) { if (time.Length != censor.Length) { throw new DimensionMismatchException("time", "The time and output vector must have the same length."); } // Sort data by time to accelerate performance EmpiricalHazardDistribution.Sort(ref time, ref censor); createBaseline(time, censor); return(regression.GetPartialLogLikelihood(time, censor)); }
public void inverse_cdf() { // Consider the following hazard rates, occurring at the given time steps double[] times = { 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 21 }; double[] hazards = { 0, 0.111111111111111, 0.0625, 0.0714285714285714, 0.0769230769230769, 0, 0.0909090909090909, 0, 0.111111111111111, 0.125,0, 0.166666666666667, 0.2, 0, 0.5, 0 }; var distribution = new EmpiricalHazardDistribution(times, hazards); Assert.AreEqual(0, distribution.Support.Min); Assert.AreEqual(22, distribution.Support.Max); Assert.AreEqual(0, distribution.InverseDistributionFunction(0)); Assert.AreEqual(22, distribution.InverseDistributionFunction(1)); Assert.AreEqual(22, distribution.InverseDistributionFunction(0.999)); Assert.AreEqual(0, distribution.DistributionFunction(0)); Assert.AreEqual(0.1051606831856301d, distribution.DistributionFunction(1)); Assert.AreEqual(0.1593762566654946d, distribution.DistributionFunction(2)); Assert.AreEqual(0.78033456236530996d, distribution.DistributionFunction(20)); Assert.AreEqual(0.78033456236530996d, distribution.DistributionFunction(21)); Assert.AreEqual(0.78033456236530996d, distribution.InnerDistributionFunction(21)); Assert.AreEqual(1.0, distribution.DistributionFunction(22)); Assert.AreEqual(1.0, distribution.InnerDistributionFunction(22)); Assert.AreEqual(1.0, distribution.InnerDistributionFunction(23)); Assert.AreEqual(1.0, distribution.InnerDistributionFunction(24)); Assert.AreEqual(1.0, distribution.DistributionFunction(22)); double[] percentiles = Vector.Interval(0.0, 1.0, stepSize: 0.1); for (int i = 0; i < percentiles.Length; i++) { double p = percentiles[i]; double icdf = distribution.InverseDistributionFunction(p); double cdf = distribution.DistributionFunction(icdf); Assert.AreEqual(cdf, p, 0.1); } }
public void LeukemiaExample_KaplanMeier() { // The following are times of remission (weeks) for 21 leukemia // patients receiving control treatment (Table 1.1 of Cox & Oakes): // http://www-personal.umich.edu/~yili/lect2notes.pdf double[] t = { 1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22, 23 }; var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.KaplanMeier); distribution.Fit(t, new EmpiricalHazardOptions { Estimator = HazardEstimator.KaplanMeier }); Assert.AreEqual(1, distribution.Survivals[0]); Assert.AreEqual(0.905, distribution.Survivals[1], 1e-3); Assert.AreEqual(0.809, distribution.Survivals[2], 1e-3); Assert.AreEqual(0.762, distribution.Survivals[3], 1e-3); /* * http://statpages.org/prophaz2.html * 1, 1 * 1, 1 * 2, 1 * 2, 1 * 3, 1 * 4, 1 * 4, 1 * 5, 1 * 5, 1 * 8, 1 * 8, 1 * 8, 1 * 8, 1 * 11, 1 * 11, 1 * 12, 1 * 12, 1 * 15, 1 * 17, 1 * 22, 1 * 23, 1 */ }
public void MedianTest() { double[] values = { 0.0000000000000000, 0.0351683340828711, 0.0267358118285064, 0.0000000000000000, 0.0103643094219679, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.000762266794052363, 0.000000000000000 }; double[] times = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; EmpiricalHazardDistribution target = new EmpiricalHazardDistribution(times, values); Assert.AreEqual(target.Median, target.InverseDistributionFunction(0.5)); }
public void ConstructorTest1() { double[] times; SurvivalOutcome[] censor; CreateExample1(out times, out censor); var distribution = EmpiricalHazardDistribution.Estimate(times, censor, SurvivalEstimator.FlemingHarrington, HazardEstimator.BreslowNelsonAalen); double[] t = distribution.Times; double[] s = distribution.Survivals; double[] h = distribution.Hazards; double[] nt = distribution.Times.Distinct(); double[] nh = nt.Apply(distribution.HazardFunction); var target = new EmpiricalHazardDistribution(nt, nh, SurvivalEstimator.FlemingHarrington); for (int i = 0; i < times.Length; i++) { double expected = distribution.HazardFunction(times[i]); double actual = target.HazardFunction(times[i]); Assert.AreEqual(expected, actual); } for (int i = 0; i < times.Length; i++) { double expected = distribution.CumulativeHazardFunction(times[i]); double actual = target.CumulativeHazardFunction(times[i]); Assert.AreEqual(expected, actual, 1e-5); } for (int i = 0; i < times.Length; i++) { double expected = distribution.ProbabilityDensityFunction(times[i]); double actual = target.ProbabilityDensityFunction(times[i]); Assert.AreEqual(expected, actual, 1e-5); } }
public void MeasuresTest_KaplanMeier() { double[] values = { 0.0000000000000000, 0.0351683340828711, 0.0267358118285064, 0.0000000000000000, 0.0103643094219679, 0.9000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.000762266794052363, 0.000000000000000 }; double[] times = { 11, 1, 9, 8, 7, 3, 6, 5, 4, 2, 10 }; var target = new EmpiricalHazardDistribution(times, values, SurvivalEstimator.KaplanMeier); var general = new GeneralContinuousDistribution(target); //Assert.AreEqual(general.Mean, target.Mean); //Assert.AreEqual(general.Variance, target.Variance); Assert.AreEqual(general.Median, target.Median); for (int i = -10; i < 10; i++) { double x = i; double expected = general.CumulativeHazardFunction(x); double actual = target.CumulativeHazardFunction(x); Assert.AreEqual(expected, actual, 1e-4); } for (int i = -10; i < 10; i++) { double x = i; double expected = general.HazardFunction(x); double actual = target.HazardFunction(x); Assert.AreEqual(expected, actual, 1e-5); } }
public void MedianTest_KaplanMeier() { double[] values = { 0.0000000000000000, 0.0351683340828711, 0.0267358118285064, 0.0000000000000000, 0.0103643094219679, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.000762266794052363, 0.000000000000000 }; double[] times = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; var target = new EmpiricalHazardDistribution(times, values, SurvivalEstimator.KaplanMeier); Assert.AreEqual(target.Median, target.InverseDistributionFunction(0.5)); Assert.AreEqual(1, target.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, target.ComplementaryDistributionFunction(Double.PositiveInfinity)); }
/// <summary> /// Runs the Newton-Raphson update for Cox's hazards learning until convergence. /// </summary> /// /// <param name="inputs">The input data.</param> /// <param name="censor">The output (event) associated with each input vector.</param> /// <param name="time">The time-to-event for the non-censored training samples.</param> /// /// <returns>The maximum relative change in the parameters after the iteration.</returns> /// public double Run(double[][] inputs, double[] time, SurvivalOutcome[] censor) { if (inputs.Length != time.Length || time.Length != censor.Length) { throw new DimensionMismatchException("time", "The inputs, time and output vector must have the same length."); } // Sort data by time to accelerate performance EmpiricalHazardDistribution.Sort(ref time, ref censor, ref inputs); double[] means = new double[parameterCount]; double[] sdev = new double[parameterCount]; for (int i = 0; i < sdev.Length; i++) { sdev[i] = 1; } if (normalize) { // Store means as regression centers means = inputs.Mean(); for (int i = 0; i < means.Length; i++) { regression.Offsets[i] = means[i]; } // Convert to unit scores for increased accuracy sdev = BestCS.Statistics.Tools.StandardDeviation(inputs); inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] *= sdev[i]; } } // Compute actual outputs double[] output = new double[inputs.Length]; for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } // Compute ties int[] ties = new int[inputs.Length]; for (int i = 0; i < inputs.Length; i++) { for (int j = 0; j < time.Length; j++) { if (time[j] == time[i]) { ties[i]++; } } } if (parameterCount == 0) { createBaseline(time, censor, output); return(regression.GetPartialLogLikelihood(inputs, time, censor)); } CurrentIteration = 0; double smooth = Lambda; do { // learning iterations until convergence // or maximum number of iterations reached CurrentIteration++; // Reset Hessian matrix and gradient Array.Clear(gradient, 0, gradient.Length); Array.Clear(hessian, 0, hessian.Length); // For each observation instance for (int i = 0; i < inputs.Length; i++) { // Check if we should censor if (censor[i] == SurvivalOutcome.Censored) { continue; } // Compute partials double den = 0; Array.Clear(partialGradient, 0, partialGradient.Length); Array.Clear(partialHessian, 0, partialHessian.Length); for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { den += output[j]; } } for (int j = 0; j < inputs.Length; j++) { if (time[j] >= time[i]) { // Compute partial gradient for (int k = 0; k < partialGradient.Length; k++) { partialGradient[k] += inputs[j][k] * output[j] / den; } // Compute partial Hessian for (int ii = 0; ii < inputs[j].Length; ii++) { for (int jj = 0; jj < inputs[j].Length; jj++) { partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den; } } } } // Compute gradient vector for (int j = 0; j < gradient.Length; j++) { gradient[j] += inputs[i][j] - partialGradient[j]; } // Compute Hessian matrix for (int j = 0; j < partialGradient.Length; j++) { for (int k = 0; k < partialGradient.Length; k++) { hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k]; } } } // Decompose to solve the linear system. Usually the Hessian will // be invertible and LU will succeed. However, sometimes the Hessian // may be singular and a Singular Value Decomposition may be needed. // The SVD is very stable, but is quite expensive, being on average // about 10-15 times more expensive than LU decomposition. There are // other ways to avoid a singular Hessian. For a very interesting // reading on the subject, please see: // // - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible", // Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87. // Available in: http://gking.harvard.edu/files/help.pdf // decomposition = new SingularValueDecomposition(hessian); double[] deltas = decomposition.Solve(gradient); if (convergence.Iterations > 0 || convergence.Tolerance > 0) { // Update coefficients using the calculated deltas for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] -= smooth * deltas[i]; } } smooth += Lambda; if (smooth > 1) { smooth = 1; } // Check relative maximum parameter change convergence.NewValues = regression.Coefficients; if (convergence.HasDiverged) { // Restore previous coefficients for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] = convergence.OldValues[i]; } } // Recompute current outputs for (int i = 0; i < output.Length; i++) { double sum = 0; for (int j = 0; j < regression.Coefficients.Length; j++) { sum += regression.Coefficients[j] * inputs[i][j]; } output[i] = Math.Exp(sum); } } while (!convergence.HasConverged); for (int i = 0; i < regression.Coefficients.Length; i++) { regression.Coefficients[i] /= sdev[i]; } if (computeStandardErrors) { // Grab the regression information matrix double[,] inverse = decomposition.Inverse(); // Calculate coefficients' standard errors double[] standardErrors = regression.StandardErrors; for (int i = 0; i < standardErrors.Length; i++) { standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i]; } } if (computeBaselineFunction) { createBaseline(time, censor, output); } return(regression.GetPartialLogLikelihood(inputs, time, censor)); }
public void DocumentationExample_KaplanMeier() { // Consider the following hazard rates, occurring at the given time steps double[] times = { 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 21 }; double[] hazards = { 0, 0.111111111111111, 0.0625, 0.0714285714285714, 0.0769230769230769, 0, 0.0909090909090909, 0, 0.111111111111111, 0.125, 0, 0.166666666666667, 0.2, 0, 0.5, 0 }; // Create a new distribution given the observations and event times var distribution = new EmpiricalHazardDistribution(times, hazards, SurvivalEstimator.KaplanMeier); // Common measures double mean = distribution.Mean; // 5.49198237428757 double median = distribution.Median; // 11.999999704601453 double var = distribution.Variance; // 39.83481657555663 // Cumulative distribution functions double cdf = distribution.DistributionFunction(x: 4); // 0.275274821017619 double ccdf = distribution.ComplementaryDistributionFunction(x: 4); // 0.018754904264376961 double icdf = distribution.InverseDistributionFunction(p: cdf); // 4.4588994137113307 // Probability density functions double pdf = distribution.ProbabilityDensityFunction(x: 4); // 0.055748090690952365 double lpdf = distribution.LogProbabilityDensityFunction(x: 4); // -2.8869121169242962 // Hazard (failure rate) functions double hf = distribution.HazardFunction(x: 4); // 0.0769230769230769 double chf = distribution.CumulativeHazardFunction(x: 4); // 0.32196275946275932 string str = distribution.ToString(); // H(x; v, t) try { double mode = distribution.Mode; Assert.Fail(); } catch { } Assert.AreEqual(SurvivalEstimator.KaplanMeier, distribution.Estimator); Assert.AreEqual(1, distribution.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, distribution.ComplementaryDistributionFunction(Double.PositiveInfinity)); Assert.AreEqual(5.49198237428757, mean); Assert.AreEqual(11.999999704601453, median, 1e-6); Assert.AreEqual(39.83481657555663, var); Assert.AreEqual(0.33647223662121273, chf); Assert.AreEqual(0.28571428571428559, cdf); Assert.AreEqual(0.054945054945054937, pdf); Assert.AreEqual(-2.9014215940827497, lpdf); Assert.AreEqual(0.0769230769230769, hf); Assert.AreEqual(0.71428571428571441, ccdf); Assert.AreEqual(5.8785425101214548, icdf, 1e-8); Assert.AreEqual("H(x; v, t)", str); var range1 = distribution.GetRange(0.95); var range2 = distribution.GetRange(0.99); var range3 = distribution.GetRange(0.01); Assert.AreEqual(1, range1.Min, 1e-3); Assert.AreEqual(20.562, range1.Max, 1e-3); Assert.AreEqual(1, range2.Min, 1e-3); Assert.AreEqual(20.562, range2.Max, 1e-3); Assert.AreEqual(1, range3.Min, 1e-3); Assert.AreEqual(20.562, range3.Max, 1e-3); for (int i = 0; i < hazards.Length; i++) Assert.AreEqual(hazards[i], distribution.HazardFunction(times[i])); }
public void LeukemiaExample_KaplanMeier() { // The following are times of remission (weeks) for 21 leukemia // patients receiving control treatment (Table 1.1 of Cox & Oakes): // http://www-personal.umich.edu/~yili/lect2notes.pdf double[] t = { 1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22, 23 }; var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.KaplanMeier); distribution.Fit(t, new EmpiricalHazardOptions { Estimator = HazardEstimator.KaplanMeier }); Assert.AreEqual(1, distribution.Survivals[0]); Assert.AreEqual(0.905, distribution.Survivals[1], 1e-3); Assert.AreEqual(0.809, distribution.Survivals[2], 1e-3); Assert.AreEqual(0.762, distribution.Survivals[3], 1e-3); /* http://statpages.org/prophaz2.html 1, 1 1, 1 2, 1 2, 1 3, 1 4, 1 4, 1 5, 1 5, 1 8, 1 8, 1 8, 1 8, 1 11, 1 11, 1 12, 1 12, 1 15, 1 17, 1 22, 1 23, 1 */ }
public void DocumentationExample_Aalen() { // Consider the following hazard rates, occurring at the given time steps double[] times = { 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 21 }; double[] hazards = { 0, 0.111111111111111, 0.0625, 0.0714285714285714, 0.0769230769230769, 0, 0.0909090909090909, 0, 0.111111111111111, 0.125, 0, 0.166666666666667, 0.2, 0, 0.5, 0 }; // Create a new distribution given the observations and event times var distribution = new EmpiricalHazardDistribution(times, hazards); // Common measures double mean = distribution.Mean; // 6.1658527179584119 double median = distribution.Median; // 11.999999704601453 double var = distribution.Variance; // 44.101147497430993 // Cumulative distribution functions double cdf = distribution.DistributionFunction(x: 4); // 0.275274821017619 double ccdf = distribution.ComplementaryDistributionFunction(x: 4); // 0.724725178982381 double icdf = distribution.InverseDistributionFunction(p: cdf); // 4.4588994137113307 // Probability density functions double pdf = distribution.ProbabilityDensityFunction(x: 4); // 0.055748090690952365 double lpdf = distribution.LogProbabilityDensityFunction(x: 4); // -2.8869121169242962 // Hazard (failure rate) functions double hf = distribution.HazardFunction(x: 4); // 0.0769230769230769 double chf = distribution.CumulativeHazardFunction(x: 4); // 0.32196275946275932 string str = distribution.ToString(); // H(x; v, t) try { double mode = distribution.Mode; Assert.Fail(); } catch { } Assert.AreEqual(SurvivalEstimator.FlemingHarrington, distribution.Estimator); Assert.AreEqual(1, distribution.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, distribution.ComplementaryDistributionFunction(Double.PositiveInfinity)); Assert.AreEqual(6.1658527179584119, mean); Assert.AreEqual(11.999999704601453, median, 1e-6); Assert.AreEqual(44.101147497430993, var); Assert.AreEqual(0.32196275946275932, chf); Assert.AreEqual(0.275274821017619, cdf); Assert.AreEqual(0.055748090690952365, pdf); Assert.AreEqual(-2.8869121169242962, lpdf); Assert.AreEqual(0.0769230769230769, hf); Assert.AreEqual(0.724725178982381, ccdf); Assert.AreEqual(4.4588994137113307, icdf, 1e-8); Assert.AreEqual("H(x; v, t)", str); var range1 = distribution.GetRange(0.95); var range2 = distribution.GetRange(0.99); var range3 = distribution.GetRange(0.01); Assert.AreEqual(1, range1.Min, 1e-3); Assert.AreEqual(20.562, range1.Max, 1e-3); Assert.AreEqual(1, range2.Min, 1e-3); Assert.AreEqual(20.562, range2.Max, 1e-3); Assert.AreEqual(1, range3.Min, 1e-3); Assert.AreEqual(20.562, range3.Max, 1e-3); for (int i = 0; i < hazards.Length; i++) Assert.AreEqual(hazards[i], distribution.HazardFunction(times[i])); }
public void LeukemiaExampleCensoring_KaplanMeier_FlemingHarrington() { // The following are times of remission (weeks) for 21 leukemia // patients receiving control treatment (Table 1.1 of Cox & Oakes): double[] t = { 6, 6, 6, 6, 7, 9, 10, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 32, 34, 35 }; int[] c = { 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 }; var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.FlemingHarrington); distribution.Fit(t, new SurvivalOptions { Outcome = c.To<SurvivalOutcome[]>() }); int[] intervals = { 6, 7, 9, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 34, 35 }; double[] expected = { 0.8571 , 0.8067, 0.8067, 0.7529, 0.7529, 0.6902, 0.6275, 0.6275, 0.6275, 0.6275, 0.5378, 0.4482, 0.4482, 0.4482, 0.4482, 0.4482 }; for (int i = 0; i < intervals.Length; i++) { double x = intervals[i]; double actual = distribution.ComplementaryDistributionFunction(x); double e = expected[i]; Assert.AreEqual(e, actual, 0.1); } }
public void KaplanMeierTest1() { // Example from // http://sas-and-r.blogspot.fr/2010/05/example-738-kaplan-meier-survival.html double[] times; SurvivalOutcome[] censor; CreateExample1(out times, out censor); var distribution = new EmpiricalHazardDistribution(SurvivalEstimator.KaplanMeier); Assert.AreEqual(SurvivalEstimator.KaplanMeier, distribution.Estimator); distribution.Fit(times, new EmpiricalHazardOptions(HazardEstimator.KaplanMeier, censor)); int[] t = { 1, 2, 3, 4, 6, 8, 9, 12, 14, 20 }; double[] e = { 0.889, 0.833, 0.774, 0.714, 0.649, 0.577, 0.505, 0.421, 0.337, 0.168 }; double[] actual = t.ToDouble().Apply(distribution.ComplementaryDistributionFunction); for (int i = 0; i < e.Length; i++) Assert.AreEqual(e[i], actual[i], 1e-3); // Assert.AreEqual(11.177, distribution.Mean); Assert.AreEqual(12, distribution.Median, 1e-5); }
public void DocumentationExample_Aalen() { // Consider the following hazard rates, occurring at the given time steps double[] times = { 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 21 }; double[] hazards = { 0, 0.111111111111111, 0.0625, 0.0714285714285714, 0.0769230769230769, 0, 0.0909090909090909, 0, 0.111111111111111, 0.125,0, 0.166666666666667, 0.2, 0, 0.5, 0 }; // Create a new distribution given the observations and event times var distribution = new EmpiricalHazardDistribution(times, hazards); // Common measures double mean = distribution.Mean; // 6.1658527179584119 double median = distribution.Median; // 11.999999704601453 double var = distribution.Variance; // 44.101147497430993 // Cumulative distribution functions double cdf = distribution.DistributionFunction(x: 4); // 0.275274821017619 double ccdf = distribution.ComplementaryDistributionFunction(x: 4); // 0.724725178982381 double icdf = distribution.InverseDistributionFunction(p: cdf); // 4.4588994137113307 // Probability density functions double pdf = distribution.ProbabilityDensityFunction(x: 4); // 0.055748090690952365 double lpdf = distribution.LogProbabilityDensityFunction(x: 4); // -2.8869121169242962 // Hazard (failure rate) functions double hf = distribution.HazardFunction(x: 4); // 0.0769230769230769 double chf = distribution.CumulativeHazardFunction(x: 4); // 0.32196275946275932 string str = distribution.ToString(); // H(x; v, t) try { double mode = distribution.Mode; Assert.Fail(); } catch { } Assert.AreEqual(SurvivalEstimator.FlemingHarrington, distribution.Estimator); Assert.AreEqual(1, distribution.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, distribution.ComplementaryDistributionFunction(Double.PositiveInfinity)); Assert.AreEqual(6.1658527179584119, mean); Assert.AreEqual(11.999999704601453, median, 1e-6); Assert.AreEqual(44.101147497430993, var); Assert.AreEqual(0.32196275946275932, chf); Assert.AreEqual(0.275274821017619, cdf); Assert.AreEqual(0.055748090690952365, pdf); Assert.AreEqual(-2.8869121169242962, lpdf); Assert.AreEqual(0.0769230769230769, hf); Assert.AreEqual(0.724725178982381, ccdf); Assert.AreEqual(4.4588994137113307, icdf, 1e-8); Assert.AreEqual("H(x; v, t)", str); var range1 = distribution.GetRange(0.95); var range2 = distribution.GetRange(0.99); var range3 = distribution.GetRange(0.01); Assert.AreEqual(1, range1.Min, 1e-3); Assert.AreEqual(20.562, range1.Max, 1e-3); Assert.AreEqual(1, range2.Min, 1e-3); Assert.AreEqual(20.562, range2.Max, 1e-3); Assert.AreEqual(1, range3.Min, 1e-3); Assert.AreEqual(20.562, range3.Max, 1e-3); for (int i = 0; i < hazards.Length; i++) { Assert.AreEqual(hazards[i], distribution.HazardFunction(times[i])); } }
public void DocumentationExample_KaplanMeier() { // Consider the following hazard rates, occurring at the given time steps double[] times = { 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 21 }; double[] hazards = { 0, 0.111111111111111, 0.0625, 0.0714285714285714, 0.0769230769230769, 0, 0.0909090909090909, 0, 0.111111111111111, 0.125,0, 0.166666666666667, 0.2, 0, 0.5, 0 }; // Create a new distribution given the observations and event times var distribution = new EmpiricalHazardDistribution(times, hazards, SurvivalEstimator.KaplanMeier); // Common measures double mean = distribution.Mean; // 5.49198237428757 double median = distribution.Median; // 11.999999704601453 double var = distribution.Variance; // 39.83481657555663 // Cumulative distribution functions double cdf = distribution.DistributionFunction(x: 4); // 0.275274821017619 double ccdf = distribution.ComplementaryDistributionFunction(x: 4); // 0.018754904264376961 double icdf = distribution.InverseDistributionFunction(p: cdf); // 4.4588994137113307 // Probability density functions double pdf = distribution.ProbabilityDensityFunction(x: 4); // 0.055748090690952365 double lpdf = distribution.LogProbabilityDensityFunction(x: 4); // -2.8869121169242962 // Hazard (failure rate) functions double hf = distribution.HazardFunction(x: 4); // 0.0769230769230769 double chf = distribution.CumulativeHazardFunction(x: 4); // 0.32196275946275932 string str = distribution.ToString(); // H(x; v, t) try { double mode = distribution.Mode; Assert.Fail(); } catch { } Assert.AreEqual(SurvivalEstimator.KaplanMeier, distribution.Estimator); Assert.AreEqual(1, distribution.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, distribution.ComplementaryDistributionFunction(Double.PositiveInfinity)); Assert.AreEqual(5.49198237428757, mean); Assert.AreEqual(11.999999704601453, median, 1e-6); Assert.AreEqual(39.83481657555663, var); Assert.AreEqual(0.33647223662121273, chf); Assert.AreEqual(0.28571428571428559, cdf); Assert.AreEqual(0.054945054945054937, pdf); Assert.AreEqual(-2.9014215940827497, lpdf); Assert.AreEqual(0.0769230769230769, hf); Assert.AreEqual(0.71428571428571441, ccdf); Assert.AreEqual(5.8785425101214548, icdf, 1e-8); Assert.AreEqual("H(x; v, t)", str); var range1 = distribution.GetRange(0.95); var range2 = distribution.GetRange(0.99); var range3 = distribution.GetRange(0.01); Assert.AreEqual(1, range1.Min, 1e-3); Assert.AreEqual(20.562, range1.Max, 1e-3); Assert.AreEqual(1, range2.Min, 1e-3); Assert.AreEqual(20.562, range2.Max, 1e-3); Assert.AreEqual(1, range3.Min, 1e-3); Assert.AreEqual(20.562, range3.Max, 1e-3); for (int i = 0; i < hazards.Length; i++) { Assert.AreEqual(hazards[i], distribution.HazardFunction(times[i])); } }
/// <summary> /// Creates a new Cox Proportional-Hazards Model. /// </summary> /// public ProportionalHazards() { BaselineHazard = new EmpiricalHazardDistribution(); }
public void BaselineHazardTest() { double[,] data = { // t c in { 8, 0, 13 }, { 4, 1, 56 }, { 12, 0, 25 }, { 6, 0, 64 }, { 10, 0, 38 }, { 8, 1, 80 }, { 5, 0, 0 }, { 5, 0, 81 }, { 3, 1, 81 }, { 14, 1, 38 }, { 8, 0, 23 }, { 11, 0, 99 }, { 7, 0, 12 }, { 7, 1, 36 }, { 12, 0, 63 }, { 8, 0, 92 }, { 7, 0, 38 }, }; double[] time = data.GetColumn(0); int[] censor = data.GetColumn(1).ToInt32(); double[][] inputs = data.GetColumn(2).ToArray(); ProportionalHazards regression = new ProportionalHazards(1); ProportionalHazardsNewtonRaphson target = new ProportionalHazardsNewtonRaphson(regression); target.Normalize = false; double error = target.Run(inputs, time, censor); double log = -2 * regression.GetPartialLogLikelihood(inputs, time, censor); EmpiricalHazardDistribution baseline = regression.BaselineHazard as EmpiricalHazardDistribution; double[] actual = new double[(int)baseline.Support.Max]; for (int i = (int)baseline.Support.Min; i < baseline.Support.Max; i++) { actual[i] = baseline.CumulativeHazardFunction(i); } Assert.AreEqual(14, actual.Length); double[] expected = { 0, 0, 0, 0.025000345517572315, 0.052363663484639708, 0.052363663484639708, 0.052363663484639708, 0.16317880290786446, 0.34217461190678861, 0.34217461190678861, 0.34217461190678861, 0.34217461190678861, 0.34217461190678861, 0.34217461190678861 }; for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 0.025); } }
public void DistributionFunctionTest2() { double[] values = { 0.0000000000000000, 0.0351683340828711, 0.0267358118285064, 0.0000000000000000, 0.0103643094219679, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.000762266794052363, 0.000000000000000 }; double[] times = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; EmpiricalHazardDistribution target = new EmpiricalHazardDistribution(times, values); double[] expected = { 1.000000000000000, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.98893509519066469, 0.98893509519066469, 0.96284543081744489, 0.92957227114936058, 0.92957227114936058, }; double[] hazardFunction = new double[expected.Length]; double[] survivalFunction = new double[expected.Length]; double[] complementaryDistribution = new double[expected.Length]; for (int i = 0; i < 11; i++) hazardFunction[i] = target.CumulativeHazardFunction(i + 1); for (int i = 0; i < 11; i++) survivalFunction[i] = target.ComplementaryDistributionFunction(i + 1); for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], survivalFunction[i], 1e-5); // Ho = -log(So) Assert.AreEqual(hazardFunction[i], -Math.Log(survivalFunction[i]), 1e-5); // So = exp(-Ho) Assert.AreEqual(survivalFunction[i], Math.Exp(-hazardFunction[i]), 1e-5); } }
public void DistributionFunctionTest2_KaplanMeier() { double[] values = { 0.0000000000000000, 0.0351683340828711, 0.0267358118285064, 0.0000000000000000, 0.0103643094219679, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, 0.000762266794052363, 0.000000000000000 }; double[] times = { 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; var target = new EmpiricalHazardDistribution(times, values, SurvivalEstimator.KaplanMeier); double[] expected = { 1.000000000000000, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.999238023657475, 0.98893509519066469, 0.98893509519066469, 0.96284543081744489, 0.92957227114936058, 0.92957227114936058, }; double[] hazardFunction = new double[expected.Length]; double[] survivalFunction = new double[expected.Length]; for (int i = 0; i < 11; i++) { hazardFunction[i] = target.CumulativeHazardFunction(i + 1); } for (int i = 0; i < 11; i++) { survivalFunction[i] = target.ComplementaryDistributionFunction(i + 1); } for (int i = 0; i < expected.Length; i++) { Assert.AreEqual(expected[i], survivalFunction[i], 1e-3); // Ho = -log(So) Assert.AreEqual(hazardFunction[i], -Math.Log(survivalFunction[i]), 1e-5); // So = exp(-Ho) Assert.AreEqual(survivalFunction[i], Math.Exp(-hazardFunction[i]), 1e-5); } Assert.AreEqual(1, target.ComplementaryDistributionFunction(0)); Assert.AreEqual(0, target.ComplementaryDistributionFunction(Double.PositiveInfinity)); }
public void BaselineHazardTest() { double[,] data = { // t c in { 8, 0, -1.2372626521865966 }, { 4, 1, 0.22623087329625477 }, { 12, 0, -0.8288458543774289 }, { 6, 0, 0.49850873850236665 }, { 10, 0, -0.38639432341749696 }, { 8, 1, 1.0430644689145904 }, { 5, 0, -1.6797141831465285 }, { 5, 0, 1.0770992020653544 }, { 3, 1, 1.0770992020653544 }, { 14, 1, -0.38639432341749696 }, { 8, 0, -0.8969153206789568 }, { 11, 0, 1.6897243987791061 }, { 7, 0, -1.2712973853373605 }, { 7, 0, -0.38639432341749696 }, { 7, 1, -0.45446378971902495 }, { 12, 0, 0.4644740053516027 }, { 8, 0, 1.4514812667237584 }, }; double[] time = data.GetColumn(0); SurvivalOutcome[] censor = data.GetColumn(1).To <SurvivalOutcome[]>(); double[][] inputs = data.GetColumn(2).ToJagged(); var regression = new ProportionalHazards(1); var target = new ProportionalHazardsNewtonRaphson(regression); target.Normalize = false; target.Lambda = 0; regression.Coefficients[0] = 0.47983261821350764; double error = target.Run(inputs, time, censor); /* Tested against http://statpages.org/prophaz2.html * 13, 8, 0 * 56, 4, 1 * 25, 12, 0 * 64, 6, 0 * 38, 10, 0 * 80, 8, 1 * 0 , 5, 0 * 81, 5, 0 * 81, 3, 1 * 38, 14, 1 * 23, 8, 0 * 99, 11, 0 * 12, 7, 0 * 38, 7, 0 * 36, 7, 1 * 63, 12, 0 * 92, 8, 0 */ double[] baseline = { regression.Survival(3), // 0.9465 regression.Survival(4), // 0.8919 regression.Survival(7), // 0.8231 regression.Survival(8), // 0.7436 regression.Survival(12), // 0.7436 regression.Survival(14), // 0.0000 }; Assert.AreEqual(0.9465, baseline[0], 1e-4); Assert.AreEqual(0.8919, baseline[1], 1e-4); Assert.AreEqual(0.8231, baseline[2], 1e-4); Assert.AreEqual(0.7436, baseline[3], 1e-4); Assert.AreEqual(0.7436, baseline[4], 1e-4); Assert.AreEqual(0.0000, baseline[5], 1e-4); // The value of the baseline must be exact the same if it was computed // after the Newton-Raphson or in a standalone EmpiricalHazard computation double[] outputs = inputs.Apply(x => regression.Compute(x)); var empirical = EmpiricalHazardDistribution.Estimate(time, censor, outputs); baseline = new[] { empirical.ComplementaryDistributionFunction(3), // 0.9465 empirical.ComplementaryDistributionFunction(4), // 0.8919 empirical.ComplementaryDistributionFunction(7), // 0.8231 empirical.ComplementaryDistributionFunction(8), // 0.7436 empirical.ComplementaryDistributionFunction(12), // 0.7436 empirical.ComplementaryDistributionFunction(14), // 0.0000 }; Assert.AreEqual(0.9465, baseline[0], 1e-4); Assert.AreEqual(0.8919, baseline[1], 1e-4); Assert.AreEqual(0.8231, baseline[2], 1e-4); Assert.AreEqual(0.7436, baseline[3], 1e-4); Assert.AreEqual(0.7436, baseline[4], 1e-4); Assert.AreEqual(0.0000, baseline[5], 1e-4); }