Example #1
0
        public void PredictTest1()
        {
            // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html

            double[,] data =
            {
                { 50,  1, 0 },
                { 70,  2, 1 },
                { 45,  3, 0 },
                { 35,  5, 0 },
                { 62,  7, 1 },
                { 50, 11, 0 },
                { 45,  4, 0 },
                { 57,  6, 0 },
                { 32,  8, 0 },
                { 57,  9, 1 },
                { 60, 10, 1 },
            };

            double[] distHazards =
            {
                0,                  0.0351683340828711, 0.0267358118285064, 0,
                0.0103643094219679,                  0,                  0, 0,0, 0.000762266794052363, 0
            };

            double[] distTimes =
            {
                11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
            };

            ProportionalHazards regression = new ProportionalHazards(1,
                                                                     new EmpiricalHazardDistribution(distTimes, distHazards));

            regression.Coefficients[0]   = 0.37704239281494084;
            regression.StandardErrors[0] = 0.25415755113043753;
            regression.Offsets[0]        = 51.181818;

            double[][] inputs = data.GetColumn(0).ToArray();
            double[]   time   = data.GetColumn(1);


            double[] expected =
            {
                0.000000000000, 0.919466527073, 0.000074105451, 0.000001707560,
                0.657371730925, 0.046771996036, 0.000074105451, 0.006836271860,
                0.000008042445, 0.339562971888, 2.029832541310
            };

            double[] actual = new double[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                actual[i] = regression.Compute(inputs[i], time[i]);
            }

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 1e-6);
                Assert.IsFalse(Double.IsNaN(actual[i]));
            }
        }
        public void RunTest()
        {
            // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html

            double[,] data =
            {
                { 50,  1, 0 },
                { 70,  2, 1 },
                { 45,  3, 0 },
                { 35,  5, 0 },
                { 62,  7, 1 },
                { 50, 11, 0 },
                { 45,  4, 0 },
                { 57,  6, 0 },
                { 32,  8, 0 },
                { 57,  9, 1 },
                { 60, 10, 1 },
            };

            ProportionalHazards regression = new ProportionalHazards(1);

            double[][] inputs = data.GetColumn(0).ToArray();
            double[]   time   = data.GetColumn(1);
            int[]      output = data.GetColumn(2).ToInt32();

            ProportionalHazardsNewtonRaphson target = new ProportionalHazardsNewtonRaphson(regression);

            double error = target.Run(inputs, time, output);

            double log = -2 * regression.GetPartialLogLikelihood(inputs, time, output);


            Assert.AreEqual(0.3770, regression.Coefficients[0], 1e-4);
            Assert.IsFalse(Double.IsNaN(regression.Coefficients[0]));

            Assert.AreEqual(0.2542, regression.StandardErrors[0], 1e-4);
            Assert.IsFalse(Double.IsNaN(regression.StandardErrors[0]));


            double[] actual = new double[inputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = regression.Compute(inputs[i]);
            }

            double[] expected =
            {
                // Computed using R's predict(fit,type="risk")
                0.640442743,  1206.226657448, 0.097217211, 0.002240107,
                59.081223025,    0.640442743, 0.097217211, 8.968345353,
                0.000722814,     8.968345353, 27.794227993
            };

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 1e-3);
                Assert.IsFalse(Double.IsNaN(actual[i]));
            }
        }
        public void PredictTest1()
        {
            // Data from: http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html

            double[,] data =
            {
                { 50,  1, 0 },
                { 70,  2, 1 },
                { 45,  3, 0 },
                { 35,  5, 0 },
                { 62,  7, 1 },
                { 50, 11, 0 },
                { 45,  4, 0 },
                { 57,  6, 0 },
                { 32,  8, 0 },
                { 57,  9, 1 },
                { 60, 10, 1 },
            };

            ProportionalHazards regression = new ProportionalHazards(1);

            double[][] inputs = data.GetColumn(0).ToArray();
            double[]   time   = data.GetColumn(1);
            int[]      output = data.GetColumn(2).ToInt32();


            ProportionalHazardsNewtonRaphson target = new ProportionalHazardsNewtonRaphson(regression);

            double error = target.Run(inputs, time, output);


            double[] expected =
            {
                0.000000000000, 0.919466527073, 0.000074105451, 0.000001707560,
                0.657371730925, 0.046771996036, 0.000074105451, 0.006836271860,
                0.000008042445, 0.339562971888, 2.029832541310
            };

            double[] actual = new double[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                actual[i] = regression.Compute(inputs[i], time[i]);
            }

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 1e-6);
                Assert.IsFalse(Double.IsNaN(actual[i]));
            }
        }
Example #4
0
        private void computeInformation()
        {
            // Store model information
            this.result        = regression.Compute(inputData, timeData);
            this.deviance      = regression.GetDeviance(inputData, timeData, censorData);
            this.logLikelihood = regression.GetPartialLogLikelihood(inputData, timeData, censorData);
            this.chiSquare     = regression.ChiSquare(inputData, timeData, censorData);

            // Store coefficient information
            for (int i = 0; i < regression.Coefficients.Length; i++)
            {
                this.standardErrors[i] = regression.StandardErrors[i];

                this.waldTests[i]    = regression.GetWaldTest(i);
                this.coefficients[i] = regression.Coefficients[i];
                this.confidences[i]  = regression.GetConfidenceInterval(i);
                this.hazardRatios[i] = regression.GetHazardRatio(i);
            }
        }
        /// <summary>
        ///   Runs one iteration of the Newton-Raphson update for Cox's hazards learning.
        /// </summary>
        ///
        /// <param name="inputs">The input data.</param>
        /// <param name="censor">The output (event) associated with each input vector.</param>
        /// <param name="time">The time-to-event for the non-censored training samples.</param>
        ///
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        ///
        public double Run(double[][] inputs, double[] time, int[] censor)
        {
            if (inputs.Length != time.Length || time.Length != censor.Length)
            {
                throw new DimensionMismatchException("time",
                                                     "The inputs, time and output vector must have the same length.");
            }

            double[] means = new double[parameterCount];
            double[] sdev  = new double[parameterCount];
            for (int i = 0; i < sdev.Length; i++)
            {
                sdev[i] = 1;
            }

            if (normalize)
            {
                // Store means as regression centers
                means = inputs.Mean();
                for (int i = 0; i < means.Length; i++)
                {
                    regression.Offsets[i] = means[i];
                }

                // Convert to unit scores for increased accuracy
                sdev   = Accord.Statistics.Tools.StandardDeviation(inputs);
                inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true);
            }

            // Sort data by time to accelerate performance
            if (!time.IsSorted(ComparerDirection.Descending))
            {
                sort(ref inputs, ref time, ref censor);
            }

            // Compute actual outputs
            double[] output = new double[inputs.Length];
            for (int i = 0; i < output.Length; i++)
            {
                output[i] = regression.Compute(inputs[i]);
            }

            // Compute ties
            int[] ties = new int[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                for (int j = 0; j < time.Length; j++)
                {
                    if (time[j] == time[i])
                    {
                        ties[i]++;
                    }
                }
            }

            if (parameterCount == 0)
            {
                return(createBaseline(time, censor, output));
            }

            CurrentIteration = 0;
            double smooth = 0.1;


            do // learning iterations until convergence
            {  // or maximum number of iterations reached
                CurrentIteration++;

                // Reset Hessian matrix and gradient
                Array.Clear(gradient, 0, gradient.Length);
                Array.Clear(hessian, 0, hessian.Length);

                // For each observation instance
                for (int i = 0; i < inputs.Length; i++)
                {
                    // Check if we should censor
                    if (censor[i] == 0)
                    {
                        continue;
                    }

                    // Compute partials
                    double den = 0;
                    Array.Clear(partialGradient, 0, partialGradient.Length);
                    Array.Clear(partialHessian, 0, partialHessian.Length);

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            den += output[j];
                        }
                    }

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            // Compute partial gradient
                            for (int k = 0; k < partialGradient.Length; k++)
                            {
                                partialGradient[k] += inputs[j][k] * output[j] / den;
                            }

                            // Compute partial Hessian
                            for (int ii = 0; ii < inputs[j].Length; ii++)
                            {
                                for (int jj = 0; jj < inputs[j].Length; jj++)
                                {
                                    partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den;
                                }
                            }
                        }
                    }

                    // Compute gradient vector
                    for (int j = 0; j < gradient.Length; j++)
                    {
                        gradient[j] += inputs[i][j] - partialGradient[j];
                    }

                    // Compute Hessian matrix
                    for (int j = 0; j < partialGradient.Length; j++)
                    {
                        for (int k = 0; k < partialGradient.Length; k++)
                        {
                            hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k];
                        }
                    }
                }


                // Decompose to solve the linear system. Usually the Hessian will
                // be invertible and LU will succeed. However, sometimes the Hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.


                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(hessian);
                double[] deltas = decomposition.Solve(gradient);


                // Update coefficients using the calculated deltas
                for (int i = 0; i < regression.Coefficients.Length; i++)
                {
                    regression.Coefficients[i] -= smooth * deltas[i];
                }

                smooth += 0.1;
                if (smooth > 1)
                {
                    smooth = 1;
                }

                // Check relative maximum parameter change
                convergence.NewValues = regression.Coefficients;


                if (convergence.HasDiverged)
                {
                    // Restore previous coefficients
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                    {
                        regression.Coefficients[i] = convergence.OldValues[i];
                    }
                }


                // Recompute current outputs
                for (int i = 0; i < output.Length; i++)
                {
                    double sum = 0;
                    for (int j = 0; j < regression.Coefficients.Length; j++)
                    {
                        sum += regression.Coefficients[j] * inputs[i][j];
                    }
                    output[i] = Math.Exp(sum);
                }
            } while (!convergence.HasConverged);


            for (int i = 0; i < regression.Coefficients.Length; i++)
            {
                regression.Coefficients[i] /= sdev[i];
            }

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                {
                    standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i];
                }
            }

            if (computeBaselineFunction)
            {
                createBaseline(time, censor, output);
            }

            return(convergence.Delta);
        }
Example #6
0
        public void BaselineHazardTest()
        {
            double[,] data =
            {
                // t   c  in
                {  8, 0,  -1.2372626521865966 },
                {  4, 1,  0.22623087329625477 },
                { 12, 0,  -0.8288458543774289 },
                {  6, 0,  0.49850873850236665 },
                { 10, 0, -0.38639432341749696 },
                {  8, 1,   1.0430644689145904 },
                {  5, 0,  -1.6797141831465285 },
                {  5, 0,   1.0770992020653544 },
                {  3, 1,   1.0770992020653544 },
                { 14, 1, -0.38639432341749696 },
                {  8, 0,  -0.8969153206789568 },
                { 11, 0,   1.6897243987791061 },
                {  7, 0,  -1.2712973853373605 },
                {  7, 0, -0.38639432341749696 },
                {  7, 1, -0.45446378971902495 },
                { 12, 0,   0.4644740053516027 },
                {  8, 0,   1.4514812667237584 },
            };

            double[]          time   = data.GetColumn(0);
            SurvivalOutcome[] censor = data.GetColumn(1).To <SurvivalOutcome[]>();
            double[][]        inputs = data.GetColumn(2).ToJagged();

            var regression = new ProportionalHazards(1);

            var target = new ProportionalHazardsNewtonRaphson(regression);

            target.Normalize           = false;
            target.Lambda              = 0;
            regression.Coefficients[0] = 0.47983261821350764;

            double error = target.Run(inputs, time, censor);

            /* Tested against http://statpages.org/prophaz2.html
             *  13, 8,  0
             *  56, 4,  1
             *  25, 12, 0
             *  64, 6,  0
             *  38, 10, 0
             *  80, 8,  1
             *  0 , 5,  0
             *  81, 5,  0
             *  81, 3,  1
             *  38, 14, 1
             *  23, 8,  0
             *  99, 11, 0
             *  12, 7,  0
             *  38, 7,  0
             *  36, 7,  1
             *  63, 12, 0
             *  92, 8,  0
             */

            double[] baseline =
            {
                regression.Survival(3),  // 0.9465
                regression.Survival(4),  // 0.8919
                regression.Survival(7),  // 0.8231
                regression.Survival(8),  // 0.7436
                regression.Survival(12), // 0.7436
                regression.Survival(14), // 0.0000
            };

            Assert.AreEqual(0.9465, baseline[0], 1e-4);
            Assert.AreEqual(0.8919, baseline[1], 1e-4);
            Assert.AreEqual(0.8231, baseline[2], 1e-4);
            Assert.AreEqual(0.7436, baseline[3], 1e-4);
            Assert.AreEqual(0.7436, baseline[4], 1e-4);
            Assert.AreEqual(0.0000, baseline[5], 1e-4);

            // The value of the baseline must be exact the same if it was computed
            // after the Newton-Raphson or in a standalone EmpiricalHazard computation
            double[] outputs   = inputs.Apply(x => regression.Compute(x));
            var      empirical = EmpiricalHazardDistribution.Estimate(time, censor, outputs);

            baseline = new[]
            {
                empirical.ComplementaryDistributionFunction(3),  // 0.9465
                empirical.ComplementaryDistributionFunction(4),  // 0.8919
                empirical.ComplementaryDistributionFunction(7),  // 0.8231
                empirical.ComplementaryDistributionFunction(8),  // 0.7436
                empirical.ComplementaryDistributionFunction(12), // 0.7436
                empirical.ComplementaryDistributionFunction(14), // 0.0000
            };

            Assert.AreEqual(0.9465, baseline[0], 1e-4);
            Assert.AreEqual(0.8919, baseline[1], 1e-4);
            Assert.AreEqual(0.8231, baseline[2], 1e-4);
            Assert.AreEqual(0.7436, baseline[3], 1e-4);
            Assert.AreEqual(0.7436, baseline[4], 1e-4);
            Assert.AreEqual(0.0000, baseline[5], 1e-4);
        }
Example #7
0
        public void BaselineHazardTestR()
        {
            double[,] data =
            {
                // t   c  in
                {  8, 0, 13 },
                {  4, 1, 56 },
                { 12, 0, 25 },
                {  6, 0, 64 },
                { 10, 0, 38 },
                {  8, 1, 80 },
                {  5, 0,  0 },
                {  5, 0, 81 },
                {  3, 1, 81 },
                { 14, 1, 38 },
                {  8, 0, 23 },
                { 11, 0, 99 },
                {  7, 0, 12 },
                {  7, 1, 36 },
                { 12, 0, 63 },
                {  8, 0, 92 },
                {  7, 0, 38 },
            };


            double[]          time   = data.GetColumn(0);
            SurvivalOutcome[] censor = data.GetColumn(1).To <SurvivalOutcome[]>();
            double[][]        inputs = data.GetColumn(2).ToJagged();

            var regression = new ProportionalHazards(1);

            var target = new ProportionalHazardsNewtonRaphson(regression);

            double error = target.Run(inputs, time, censor);

            // Assert.AreEqual(-10.257417973830666, error, 1e-8);

            /*
             * library('survival')
             * options(digits=17)
             * time <- c(8, 4, 12, 6, 10, 8, 5, 5, 3, 14, 8, 11, 7, 7, 12, 8, 7)
             * x <- c(13, 56, 25, 64, 38, 80, 0, 81, 81, 38, 23, 99, 12, 36, 63, 92, 38)
             * c <- c(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0)
             *
             * fit <- coxph(Surv(time, c) ~ x, ties="breslow")
             *
             * predict(fit,type="risk")
             *
             * fit$loglik
             *
             *      coef           exp(coef)          se(coef)               z              p
             * x 0.01633097532122  1.016465054586   0.01711960930183    0.9539338797573   0.340117112635
             *
             * Likelihood ratio test=0.94  on 1 df, p=0.332836850925  n= 17, number of events= 5
             */

            // Tested against GNU R
            Assert.AreEqual(49.352941176470587, regression.Offsets[0]);
            Assert.AreEqual(0.01633097532122, regression.Coefficients[0], 1e-10);
            Assert.AreEqual(0.01711960930183, regression.StandardErrors[0], 1e-10);
            Assert.AreEqual(0.340117112635, regression.GetWaldTest(0).PValue, 1e-5);
            Assert.AreEqual(-10.2879332934202168, regression.GetPartialLogLikelihood(time, censor));
            Assert.AreEqual(-9.8190189050165948, regression.GetPartialLogLikelihood(inputs, time, censor));

            double[] actual = inputs.Apply(x => regression.Compute(x));

            /*
             * predict(r,type="risk")
             *  [1] 0.55229166964915244 1.11466393245000361 0.67185866444081555 1.27023351821156782 0.83076808526813917 1.64953983529334769 0.44664925161695829 1.67669959872327912
             *  [9] 1.67669959872327912 0.83076808526813917 0.65026895029003673 2.24967304521214029 0.54334545703992021 0.80407192663266613 1.24965783376477391 2.00665280971219540
             *  [17] 0.83076808526813917
             */

            double[] expected =
            {
                0.55229166964915244, 1.11466393245000361, 0.67185866444081555, 1.27023351821156782,
                0.83076808526813917, 1.64953983529334769, 0.44664925161695829, 1.67669959872327912,
                1.67669959872327912, 0.83076808526813917, 0.65026895029003673, 2.24967304521214029,
                0.54334545703992021, 0.80407192663266613, 1.24965783376477391, 2.00665280971219540,
                0.83076808526813917
            };

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.025);
            }
        }
Example #8
0
        public void PredictTest1()
        {
            // Data from: http://statpages.org/prophaz2.html

            double[,] data =
            {
                { 50,  1, 0 },
                { 70,  2, 1 },
                { 45,  3, 0 },
                { 35,  5, 0 },
                { 62,  7, 1 },
                { 50, 11, 0 },
                { 45,  4, 0 },
                { 57,  6, 0 },
                { 32,  8, 0 },
                { 57,  9, 1 },
                { 60, 10, 1 },
            };

            var regression = new ProportionalHazards(1);

            double[][] inputs = data.GetColumn(0).ToJagged();
            double[]   time   = data.GetColumn(1);
            int[]      censor = data.GetColumn(2).ToInt32();


            var target = new ProportionalHazardsNewtonRaphson(regression);

            double error = target.Run(inputs, time, censor);

            // Tested against http://statpages.org/prophaz2.html
            Assert.AreEqual(0.3770, regression.Coefficients[0], 1e-4);
            Assert.AreEqual(0.2542, regression.StandardErrors[0], 1e-4);
            Assert.AreEqual(51.18181818181818, regression.Offsets[0]);

            double mean = regression.Offsets[0];

            // Baseline survivor function at predictor means
            double[] baseline =
            {
                regression.Survival(2),
                regression.Survival(7),
                regression.Survival(9),
                regression.Survival(10),
            };

            // Tested against http://statpages.org/prophaz2.html
            Assert.AreEqual(0.9979, baseline[0], 1e-4);
            Assert.AreEqual(0.9820, baseline[1], 1e-4);
            Assert.AreEqual(0.9525, baseline[2], 1e-4);
            Assert.AreEqual(0.8310, baseline[3], 1e-4);

            double[] expected =
            {
                0,                     2.51908236823927, 0.000203028311170645, 4.67823782106946E-06, 1.07100164957025,
                0.118590728553659, 0.000203028311170645,   0.0187294821517496, 1.31028937819308E-05,
                0.436716853556834, 5.14665484304978
            };

            double[] actual = new double[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                double a = actual[i] = regression.Compute(inputs[i], time[i]);
                double e = expected[i];

                Assert.AreEqual(e, a, 1e-3);
            }
            // string exStr = actual.ToString(CSharpArrayFormatProvider.InvariantCulture);
        }