示例#1
0
        public double Run(double[][] inputs, double[] time)
        {
            var censor = new SurvivalOutcome[time.Length];

            System.Diagnostics.Debug.Assert(censor[0] == SurvivalOutcome.Failed);

            return(Run(inputs, time, censor));
        }
        /// <summary>
        ///   Computes class-label decisions for each vector in the given <paramref name="input"/>.
        /// </summary>
        ///
        /// <param name="input">The input vectors that should be classified into
        ///   one of the <see cref="ITransform.NumberOfOutputs"/> possible classes.</param>
        ///
        public SurvivalOutcome[] Decide(double[][] input)
        {
            var result = new SurvivalOutcome[input.Length];

            for (int i = 0; i < input.Length; i++)
            {
                result[i] = Decide(input[i]);
            }
            return(result);
        }
        /// <summary>
        ///   The likelihood ratio test of the overall model, also called the model chi-square test.
        /// </summary>
        /// 
        /// <param name="input">A set of input data.</param>
        /// <param name="time">The time-to-event before the output occurs.</param>
        /// <param name="output">The corresponding output data.</param>
        /// 
        /// <remarks>
        ///   <para>
        ///   The Chi-square test, also called the likelihood ratio test or the log-likelihood test
        ///   is based on the deviance of the model (-2*log-likelihood). The log-likelihood ratio test 
        ///   indicates whether there is evidence of the need to move from a simpler model to a more
        ///   complicated one (where the simpler model is nested within the complicated one).</para>
        ///   <para>
        ///   The difference between the log-likelihood ratios for the researcher's model and a
        ///   simpler model is often called the "model chi-square".</para>
        /// </remarks>
        /// 
        public ChiSquareTest ChiSquare(double[][] input, double[] time, SurvivalOutcome[] output)
        {
            ProportionalHazards regression = new ProportionalHazards(Inputs);

            double ratio = GetLogLikelihoodRatio(input, time, output, regression);

            return new ChiSquareTest(ratio, Coefficients.Length);
        }
 /// <summary>
 ///   Gets the Log-Likelihood Ratio between two models.
 /// </summary>
 /// 
 /// <remarks>
 ///   The Log-Likelihood ratio is defined as 2*(LL - LL0).
 /// </remarks>
 /// 
 /// <param name="input">A set of input data.</param>
 /// <param name="time">The time-to-event before the output occurs.</param>
 /// <param name="output">The corresponding output data.</param>
 /// <param name="hazards">Another Cox Proportional Hazards model.</param>
 /// 
 /// <returns>The Log-Likelihood ratio (a measure of performance
 /// between two models) calculated over the given data sets.</returns>
 /// 
 public double GetLogLikelihoodRatio(double[][] input, double[] time, SurvivalOutcome[] output, ProportionalHazards hazards)
 {
     return 2.0 * (this.GetPartialLogLikelihood(input, time, output) - hazards.GetPartialLogLikelihood(input, time, output));
 }
        /// <summary>
        ///   Gets the Partial Log-Likelihood for the model.
        /// </summary>
        /// 
        /// <param name="time">The time-to-event before the output occurs.</param>
        /// <param name="output">The corresponding output data.</param>
        ///
        /// <returns>
        ///   The Partial Log-Likelihood (a measure of performance)
        ///   of the model calculated over the given data set.
        /// </returns>
        /// 
        public double GetPartialLogLikelihood(double[] time, SurvivalOutcome[] output)
        {
            double sum2 = 0;
            for (int i = 0; i < time.Length; i++)
            {
                if (output[i] == 0)
                    continue;

                // Compute the second sum
                double sum = 0;
                for (int j = 0; j < time.Length; j++)
                {
                    if (time[j] >= time[i])
                        sum++;
                }

                sum2 += Math.Log(sum);
            }

            return -sum2;
        }
        /// <summary>
        ///   Gets the Partial Log-Likelihood for the model.
        /// </summary>
        /// 
        /// <param name="inputs">A set of input data.</param>
        /// <param name="time">The time-to-event before the output occurs.</param>
        /// <param name="output">The corresponding output data.</param>
        ///
        /// <returns>
        ///   The Partial Log-Likelihood (a measure of performance)
        ///   of the model calculated over the given data set.
        /// </returns>
        /// 
        public double GetPartialLogLikelihood(double[][] inputs, double[] time, SurvivalOutcome[] output)
        {
            double sum1 = 0, sum2 = 0;
            for (int i = 0; i < inputs.Length; i++)
            {
                if (output[i] == 0) continue;

                // Compute the first sum
                for (int j = 0; j < Coefficients.Length; j++)
                    sum1 += Coefficients[j] * (inputs[i][j] - Offsets[j]);

                // Compute the second sum
                double sum = 0;
                for (int j = 0; j < inputs.Length; j++)
                {
                    if (time[j] >= time[i])
                    {
                        double s = 0;
                        for (int k = 0; k < Coefficients.Length; k++)
                            s += Coefficients[k] * (inputs[j][k] - Offsets[k]);
                        sum += Math.Exp(s);
                    }
                }
                sum2 += Math.Log(sum);
            }

            return sum1 - sum2;
        }
 /// <summary>
 ///   Gets the Deviance for the model.
 /// </summary>
 /// 
 /// <remarks>
 ///   The deviance is defined as -2*Log-Likelihood.
 /// </remarks>
 /// 
 /// <param name="inputs">A set of input data.</param>
 /// <param name="time">The time-to-event before the output occurs.</param>
 /// <param name="output">The corresponding output data.</param>
 /// 
 /// <returns>
 ///   The deviance (a measure of performance) of the model
 ///   calculated over the given data sets.
 /// </returns>
 /// 
 public double GetDeviance(double[][] inputs, double[] time, SurvivalOutcome[] output)
 {
     return -2.0 * GetPartialLogLikelihood(inputs, time, output);
 }
示例#8
0
 /// <summary>
 ///   Initializes a new instance of the <see cref="EmpiricalHazardOptions"/> class.
 /// </summary>
 /// 
 public EmpiricalHazardOptions(HazardEstimator estimator, HazardTiesMethod ties, SurvivalOutcome[] outcome)
 {
     Estimator = estimator;
     Outcome = outcome;
     Ties = ties;
 }
        private static void CreateExample1(out double[] times, out SurvivalOutcome[] censor)
        {
            // Example from http://sas-and-r.blogspot.fr/2010/05/example-738-kaplan-meier-survival.html

            object[,] data = 
            {
                // time  event
                { 0.5,   false },
                { 1,     true  },
                { 1,     true  },
                { 2,     true  },
                { 2,     false },
                { 3,     true  },
                { 4,     true  },
                { 5,     false },
                { 6,     true  },
                { 7,     false },
                { 8,     true  },
                { 9,     true  },
                { 10,    false },
                { 12,    true  },
                { 14,    false },
                { 14,    true  },
                { 17,    false },
                { 20,    true  },
                { 21,    false },
            };

            times = data.GetColumn(0).To<double[]>();
            censor = data.GetColumn(1).To<SurvivalOutcome[]>();
        }
示例#10
0
 /// <summary>
 ///   Initializes a new instance of the <see cref="EmpiricalHazardOptions"/> class.
 /// </summary>
 /// 
 public EmpiricalHazardOptions(HazardEstimator estimator, SurvivalOutcome[] output)
 {
     Estimator = estimator;
     Outcome = output;
     Ties = DefaultTies;
 }
        private void createBaseline(double[] time, SurvivalOutcome[] censor, double[] output = null)
        {
            if (regression.BaselineHazard == null)
                return;

            var hazard = regression.BaselineHazard as IFittableDistribution<double, EmpiricalHazardOptions>;
            if (hazard != null)
            {
                // Compute an estimate of the cumulative Hazard
                //   function using the Nelson-Aalen estimator
                hazard.Fit(time, output, new EmpiricalHazardOptions()
                {
                    Outcome = censor,
                    Estimator = Estimator,
                    Ties = Ties
                });
                return;
            }

            var survival = regression.BaselineHazard as IFittableDistribution<double, SurvivalOptions>;
            if (survival != null)
            {
                // Compute an estimate of the cumulative Hazard
                //   function using the Kaplan-Meier estimator
                survival.Fit(time, new SurvivalOptions()
                {
                    Outcome = censor,
                });
            }
        }
        /// <summary>
        ///   Runs the Newton-Raphson update for Cox's hazards learning until convergence.
        /// </summary>
        /// 
        /// <param name="censor">The output (event) associated with each input vector.</param>
        /// <param name="time">The time-to-event for the non-censored training samples.</param>
        /// 
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        /// 
        public double Run(double[] time, SurvivalOutcome[] censor)
        {
            if (time.Length != censor.Length)
            {
                throw new DimensionMismatchException("time",
                    "The time and output vector must have the same length.");
            }

            // Sort data by time to accelerate performance
            EmpiricalHazardDistribution.Sort(ref time, ref censor);

            createBaseline(time, censor);

            return regression.GetPartialLogLikelihood(time, censor);
        }
        /// <summary>
        ///   Runs the Newton-Raphson update for Cox's hazards learning until convergence.
        /// </summary>
        /// 
        /// <param name="inputs">The input data.</param>
        /// <param name="censor">The output (event) associated with each input vector.</param>
        /// <param name="time">The time-to-event for the non-censored training samples.</param>
        /// 
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        /// 
        public double Run(double[][] inputs, double[] time, SurvivalOutcome[] censor)
        {
            if (inputs.Length != time.Length || time.Length != censor.Length)
            {
                throw new DimensionMismatchException("time",
                    "The inputs, time and output vector must have the same length.");
            }


            // Sort data by time to accelerate performance
            EmpiricalHazardDistribution.Sort(ref time, ref censor, ref inputs);


            double[] means = new double[parameterCount];
            double[] sdev = new double[parameterCount];
            for (int i = 0; i < sdev.Length; i++)
                sdev[i] = 1;

            if (normalize)
            {
                // Store means as regression centers
                means = inputs.Mean();
                for (int i = 0; i < means.Length; i++)
                    regression.Offsets[i] = means[i];

                // Convert to unit scores for increased accuracy
                sdev = Accord.Statistics.Tools.StandardDeviation(inputs);
                inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true);

                for (int i = 0; i < regression.Coefficients.Length; i++)
                    regression.Coefficients[i] *= sdev[i];
            }



            // Compute actual outputs
            double[] output = new double[inputs.Length];
            for (int i = 0; i < output.Length; i++)
            {
                double sum = 0;
                for (int j = 0; j < regression.Coefficients.Length; j++)
                    sum += regression.Coefficients[j] * inputs[i][j];
                output[i] = Math.Exp(sum);
            }

            // Compute ties
            int[] ties = new int[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
                for (int j = 0; j < time.Length; j++)
                    if (time[j] == time[i]) ties[i]++;

            if (parameterCount == 0)
            {
                createBaseline(time, censor, output);
                return regression.GetPartialLogLikelihood(inputs, time, censor);
            }

            CurrentIteration = 0;
            double smooth = Lambda;

            do
            {
                // learning iterations until convergence
                // or maximum number of iterations reached

                CurrentIteration++;

                // Reset Hessian matrix and gradient
                Array.Clear(gradient, 0, gradient.Length);
                Array.Clear(hessian, 0, hessian.Length);

                // For each observation instance
                for (int i = 0; i < inputs.Length; i++)
                {
                    // Check if we should censor
                    if (censor[i] == SurvivalOutcome.Censored)
                        continue;

                    // Compute partials 
                    double den = 0;
                    Array.Clear(partialGradient, 0, partialGradient.Length);
                    Array.Clear(partialHessian, 0, partialHessian.Length);

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                            den += output[j];
                    }

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            // Compute partial gradient
                            for (int k = 0; k < partialGradient.Length; k++)
                                partialGradient[k] += inputs[j][k] * output[j] / den;

                            // Compute partial Hessian
                            for (int ii = 0; ii < inputs[j].Length; ii++)
                                for (int jj = 0; jj < inputs[j].Length; jj++)
                                    partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den;
                        }
                    }

                    // Compute gradient vector
                    for (int j = 0; j < gradient.Length; j++)
                        gradient[j] += inputs[i][j] - partialGradient[j];

                    // Compute Hessian matrix
                    for (int j = 0; j < partialGradient.Length; j++)
                        for (int k = 0; k < partialGradient.Length; k++)
                            hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k];
                }


                // Decompose to solve the linear system. Usually the Hessian will
                // be invertible and LU will succeed. However, sometimes the Hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting 
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                decomposition = new SingularValueDecomposition(hessian);
                double[] deltas = decomposition.Solve(gradient);

                if (convergence.Iterations > 0 || convergence.Tolerance > 0)
                {
                    // Update coefficients using the calculated deltas
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                        regression.Coefficients[i] -= smooth * deltas[i];
                }

                smooth += Lambda;
                if (smooth > 1)
                    smooth = 1;

                // Check relative maximum parameter change
                convergence.NewValues = regression.Coefficients;


                if (convergence.HasDiverged)
                {
                    // Restore previous coefficients
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                        regression.Coefficients[i] = convergence.OldValues[i];
                }

                // Recompute current outputs
                for (int i = 0; i < output.Length; i++)
                {
                    double sum = 0;
                    for (int j = 0; j < regression.Coefficients.Length; j++)
                        sum += regression.Coefficients[j] * inputs[i][j];
                    output[i] = Math.Exp(sum);
                }

            } while (!convergence.HasConverged);


            for (int i = 0; i < regression.Coefficients.Length; i++)
                regression.Coefficients[i] /= sdev[i];

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                    standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i];
            }

            if (computeBaselineFunction)
                createBaseline(time, censor, output);

            return regression.GetPartialLogLikelihood(inputs, time, censor);
        }
        public double Run(double[][] inputs, double[] time)
        {
            var censor = new SurvivalOutcome[time.Length];

            System.Diagnostics.Debug.Assert(censor[0] == SurvivalOutcome.Failed);

            return Run(inputs, time, censor);
        }