/// <summary>
        ///   Runs one iteration of the Newton-Raphson update for Cox's hazards learning.
        /// </summary>
        ///
        /// <param name="inputs">The input data.</param>
        /// <param name="censor">The output (event) associated with each input vector.</param>
        /// <param name="time">The time-to-event for the non-censored training samples.</param>
        ///
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        ///
        public double Run(double[][] inputs, double[] time, int[] censor)
        {
            if (inputs.Length != time.Length || time.Length != censor.Length)
            {
                throw new DimensionMismatchException("time",
                                                     "The inputs, time and output vector must have the same length.");
            }

            double[] means = new double[parameterCount];
            double[] sdev  = new double[parameterCount];
            for (int i = 0; i < sdev.Length; i++)
            {
                sdev[i] = 1;
            }

            if (normalize)
            {
                // Store means as regression centers
                means = inputs.Mean();
                for (int i = 0; i < means.Length; i++)
                {
                    regression.Offsets[i] = means[i];
                }

                // Convert to unit scores for increased accuracy
                sdev   = Accord.Statistics.Tools.StandardDeviation(inputs);
                inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true);
            }

            // Sort data by time to accelerate performance
            if (!time.IsSorted(ComparerDirection.Descending))
            {
                sort(ref inputs, ref time, ref censor);
            }

            // Compute actual outputs
            double[] output = new double[inputs.Length];
            for (int i = 0; i < output.Length; i++)
            {
                output[i] = regression.Compute(inputs[i]);
            }

            // Compute ties
            int[] ties = new int[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                for (int j = 0; j < time.Length; j++)
                {
                    if (time[j] == time[i])
                    {
                        ties[i]++;
                    }
                }
            }

            if (parameterCount == 0)
            {
                return(createBaseline(time, censor, output));
            }

            CurrentIteration = 0;
            double smooth = 0.1;


            do // learning iterations until convergence
            {  // or maximum number of iterations reached
                CurrentIteration++;

                // Reset Hessian matrix and gradient
                Array.Clear(gradient, 0, gradient.Length);
                Array.Clear(hessian, 0, hessian.Length);

                // For each observation instance
                for (int i = 0; i < inputs.Length; i++)
                {
                    // Check if we should censor
                    if (censor[i] == 0)
                    {
                        continue;
                    }

                    // Compute partials
                    double den = 0;
                    Array.Clear(partialGradient, 0, partialGradient.Length);
                    Array.Clear(partialHessian, 0, partialHessian.Length);

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            den += output[j];
                        }
                    }

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            // Compute partial gradient
                            for (int k = 0; k < partialGradient.Length; k++)
                            {
                                partialGradient[k] += inputs[j][k] * output[j] / den;
                            }

                            // Compute partial Hessian
                            for (int ii = 0; ii < inputs[j].Length; ii++)
                            {
                                for (int jj = 0; jj < inputs[j].Length; jj++)
                                {
                                    partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den;
                                }
                            }
                        }
                    }

                    // Compute gradient vector
                    for (int j = 0; j < gradient.Length; j++)
                    {
                        gradient[j] += inputs[i][j] - partialGradient[j];
                    }

                    // Compute Hessian matrix
                    for (int j = 0; j < partialGradient.Length; j++)
                    {
                        for (int k = 0; k < partialGradient.Length; k++)
                        {
                            hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k];
                        }
                    }
                }


                // Decompose to solve the linear system. Usually the Hessian will
                // be invertible and LU will succeed. However, sometimes the Hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.


                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(hessian);
                double[] deltas = decomposition.Solve(gradient);


                // Update coefficients using the calculated deltas
                for (int i = 0; i < regression.Coefficients.Length; i++)
                {
                    regression.Coefficients[i] -= smooth * deltas[i];
                }

                smooth += 0.1;
                if (smooth > 1)
                {
                    smooth = 1;
                }

                // Check relative maximum parameter change
                convergence.NewValues = regression.Coefficients;


                if (convergence.HasDiverged)
                {
                    // Restore previous coefficients
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                    {
                        regression.Coefficients[i] = convergence.OldValues[i];
                    }
                }


                // Recompute current outputs
                for (int i = 0; i < output.Length; i++)
                {
                    double sum = 0;
                    for (int j = 0; j < regression.Coefficients.Length; j++)
                    {
                        sum += regression.Coefficients[j] * inputs[i][j];
                    }
                    output[i] = Math.Exp(sum);
                }
            } while (!convergence.HasConverged);


            for (int i = 0; i < regression.Coefficients.Length; i++)
            {
                regression.Coefficients[i] /= sdev[i];
            }

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                {
                    standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i];
                }
            }

            if (computeBaselineFunction)
            {
                createBaseline(time, censor, output);
            }

            return(convergence.Delta);
        }
Exemplo n.º 2
0
        private double run(double[][] inputs, double[][] outputs)
        {
            // Regress using Lower-Bound Newton-Raphson estimation
            //
            // The main idea is to replace the Hessian matrix with a
            //   suitable lower bound. Indeed, the Hessian is lower
            //   bounded by a negative definite matrix that does not
            //   even depend on w [Krishnapuram et al].
            //
            //   - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf
            //


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design       = new double[N][];
            double[][] coefficients = this.regression.Coefficients;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[M];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    row[j + 1] = inputs[i][j];
                }
            }


            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
            {
                gradient[i] = 0;
            }

            if (UpdateLowerBound)
            {
                for (int i = 0; i < gradient.Length; i++)
                {
                    for (int j = 0; j < gradient.Length; j++)
                    {
                        lowerBound[i, j] = 0;
                    }
                }
            }

            // In the multinomial logistic regression, the objective
            // function is the log-likelihood function l(w). As given
            // by Krishnapuram et al and Böhning, this is a concave
            // function with Hessian given by:
            //
            //       H(w) = -sum(P(w) - p(w)p(w)')  (x)  xx'
            //      (see referenced paper for proper indices)
            //
            // In which (x) denotes the Kronocker product. By using
            // the lower bound principle, Krishnapuram has shown that
            // we can replace H(w) with a lower bound approximation B
            // which does not depend on w (eq. 8 on aforementined paper):
            //
            //      B = -(1/2) [I - 11/M]  (x)  sum(xx')
            //
            // Thus we can compute and invert this matrix only once.
            //


            // For each input sample in the dataset
            for (int i = 0; i < inputs.Length; i++)
            {
                // Grab variables related to the sample
                double[] x = design[i];
                double[] y = outputs[i];

                // Compute and estimate outputs
                this.compute(inputs[i], output);

                // Compute errors for the sample
                for (int j = 0; j < errors.Length; j++)
                {
                    errors[j] = y[j + 1] - output[j];
                }


                // Compute current gradient and Hessian
                //   We can take advantage of the block structure of the
                //   Hessian matrix and gradient vector by employing the
                //   Kronocker product. See [Böhning, 1992]

                // (Re-) Compute error gradient
                double[] g = Matrix.KroneckerProduct(errors, x);
                for (int j = 0; j < g.Length; j++)
                {
                    gradient[j] += g[j];
                }

                if (UpdateLowerBound)
                {
                    // Compute xxt matrix
                    for (int k = 0; k < x.Length; k++)
                    {
                        for (int j = 0; j < x.Length; j++)
                        {
                            xxt[k, j] = x[k] * x[j];
                        }
                    }

                    // (Re-) Compute weighted "Hessian" matrix
                    double[,] h = Matrix.KroneckerProduct(weights, xxt);
                    for (int j = 0; j < parameterCount; j++)
                    {
                        for (int k = 0; k < parameterCount; k++)
                        {
                            lowerBound[j, k] += h[j, k];
                        }
                    }
                }
            }


            if (UpdateLowerBound)
            {
                UpdateLowerBound = false;

                // Decompose to solve the linear system. Usually the hessian will
                // be invertible and LU will succeed. However, sometimes the hessian
                // may be singular and a Singular Value Decomposition may be needed.

                LuDecomposition lu = new LuDecomposition(lowerBound);

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.

                if (lu.Nonsingular)
                {
                    // Solve using LU decomposition
                    deltas        = lu.Solve(gradient);
                    decomposition = lu;
                }
                else
                {
                    // Hessian Matrix is singular, try pseudo-inverse solution
                    decomposition = new SingularValueDecomposition(lowerBound);
                    deltas        = decomposition.Solve(gradient);
                }
            }
            else
            {
                deltas = decomposition.Solve(gradient);
            }


            previous = coefficients.Reshape(1);

            // Update coefficients using the calculated deltas
            for (int i = 0, k = 0; i < coefficients.Length; i++)
            {
                for (int j = 0; j < coefficients[i].Length; j++)
                {
                    coefficients[i][j] -= deltas[k++];
                }
            }

            solution = coefficients.Reshape(1);


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[][] standardErrors = regression.StandardErrors;
                for (int i = 0, k = 0; i < standardErrors.Length; i++)
                {
                    for (int j = 0; j < standardErrors[i].Length; j++, k++)
                    {
                        standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k]));
                    }
                }
            }



            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
            {
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
            }

            return(Matrix.Max(deltas));
        }
Exemplo n.º 3
0
        /// <summary>
        ///   Runs one iteration of the Newton-Raphson update for Cox's hazards learning.
        /// </summary>
        /// 
        /// <param name="inputs">The input data.</param>
        /// <param name="censor">The output (event) associated with each input vector.</param>
        /// <param name="time">The time-to-event for the non-censored training samples.</param>
        /// 
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        /// 
        public double Run(double[][] inputs, double[] time, int[] censor)
        {
            if (inputs.Length != time.Length || time.Length != censor.Length)
                throw new DimensionMismatchException("time",
                    "The inputs, time and output vector must have the same length.");

            double[] means = new double[parameterCount];
            double[] sdev = new double[parameterCount];
            for (int i = 0; i < sdev.Length; i++)
                sdev[i] = 1;

            if (normalize)
            {
                // Store means as regression centers
                means = inputs.Mean();
                for (int i = 0; i < means.Length; i++)
                    regression.Offsets[i] = means[i];

                // Convert to unit scores for increased accuracy
                sdev = Accord.Statistics.Tools.StandardDeviation(inputs);
                inputs = inputs.Subtract(means, 0).ElementwiseDivide(sdev, 0, inPlace: true);
            }

            // Sort data by time to accelerate performance
            if (!time.IsSorted(ComparerDirection.Descending))
                sort(ref inputs, ref time, ref censor);

            // Compute actual outputs
            double[] output = new double[inputs.Length];
            for (int i = 0; i < output.Length; i++)
                output[i] = regression.Compute(inputs[i]);

            // Compute ties
            int[] ties = new int[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
                for (int j = 0; j < time.Length; j++)
                    if (time[j] == time[i]) ties[i]++;

            if (parameterCount == 0)
                return createBaseline(time, censor, output);

            CurrentIteration = 0;
            double smooth = 0.1;


            do // learning iterations until convergence
            {  // or maximum number of iterations reached

                CurrentIteration++;

                // Reset Hessian matrix and gradient
                Array.Clear(gradient, 0, gradient.Length);
                Array.Clear(hessian, 0, hessian.Length);

                // For each observation instance
                for (int i = 0; i < inputs.Length; i++)
                {
                    // Check if we should censor
                    if (censor[i] == 0) continue;

                    // Compute partials 
                    double den = 0;
                    Array.Clear(partialGradient, 0, partialGradient.Length);
                    Array.Clear(partialHessian, 0, partialHessian.Length);

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                            den += output[j];
                    }

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            // Compute partial gradient
                            for (int k = 0; k < partialGradient.Length; k++)
                                partialGradient[k] += inputs[j][k] * output[j] / den;

                            // Compute partial Hessian
                            for (int ii = 0; ii < inputs[j].Length; ii++)
                                for (int jj = 0; jj < inputs[j].Length; jj++)
                                    partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den;
                        }
                    }

                    // Compute gradient vector
                    for (int j = 0; j < gradient.Length; j++)
                        gradient[j] += inputs[i][j] - partialGradient[j];

                    // Compute Hessian matrix
                    for (int j = 0; j < partialGradient.Length; j++)
                        for (int k = 0; k < partialGradient.Length; k++)
                            hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k];
                }


                // Decompose to solve the linear system. Usually the Hessian will
                // be invertible and LU will succeed. However, sometimes the Hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting 
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.


                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(hessian);
                double[] deltas = decomposition.Solve(gradient);


                // Update coefficients using the calculated deltas
                for (int i = 0; i < regression.Coefficients.Length; i++)
                    regression.Coefficients[i] -= smooth * deltas[i];

                smooth += 0.1;
                if (smooth > 1)
                    smooth = 1;

                // Check relative maximum parameter change
                convergence.NewValues = regression.Coefficients;


                if (convergence.HasDiverged)
                {
                    // Restore previous coefficients
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                        regression.Coefficients[i] = convergence.OldValues[i];
                }


                // Recompute current outputs
                for (int i = 0; i < output.Length; i++)
                {
                    double sum = 0;
                    for (int j = 0; j < regression.Coefficients.Length; j++)
                        sum += regression.Coefficients[j] * inputs[i][j];
                    output[i] = Math.Exp(sum);
                }

            } while (!convergence.HasConverged);


            for (int i = 0; i < regression.Coefficients.Length; i++)
                regression.Coefficients[i] /= sdev[i];

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                    standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i];
            }

            if (computeBaselineFunction)
                createBaseline(time, censor, output);

            return convergence.Delta;
        }
Exemplo n.º 4
0
        private double run(double[][] inputs, double[][] outputs)
        {
            // Regress using Lower-Bound Newton-Raphson estimation
            //
            // The main idea is to replace the Hessian matrix with a 
            //   suitable lower bound. Indeed, the Hessian is lower
            //   bounded by a negative definite matrix that does not
            //   even depend on w [Krishnapuram et al].
            //
            //   - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf
            //


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design = new double[N][];
            double[][] coefficients = this.regression.Coefficients;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[M];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                    row[j + 1] = inputs[i][j];
            }


            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
                gradient[i] = 0;

            if (UpdateLowerBound)
            {
                for (int i = 0; i < gradient.Length; i++)
                    for (int j = 0; j < gradient.Length; j++)
                        lowerBound[i, j] = 0;
            }

            // In the multinomial logistic regression, the objective
            // function is the log-likelihood function l(w). As given
            // by Krishnapuram et al and Böhning, this is a concave 
            // function with Hessian given by:
            //
            //       H(w) = -sum(P(w) - p(w)p(w)')  (x)  xx'
            //      (see referenced paper for proper indices)
            //       
            // In which (x) denotes the Kronocker product. By using
            // the lower bound principle, Krishnapuram has shown that
            // we can replace H(w) with a lower bound approximation B
            // which does not depend on w (eq. 8 on aforementined paper):
            // 
            //      B = -(1/2) [I - 11/M]  (x)  sum(xx')
            //
            // Thus we can compute and invert this matrix only once.
            //


            // For each input sample in the dataset
            for (int i = 0; i < inputs.Length; i++)
            {
                // Grab variables related to the sample
                double[] x = design[i];
                double[] y = outputs[i];

                // Compute and estimate outputs
                this.compute(inputs[i], output);

                // Compute errors for the sample
                for (int j = 0; j < errors.Length; j++)
                    errors[j] = y[j + 1] - output[j];


                // Compute current gradient and Hessian
                //   We can take advantage of the block structure of the 
                //   Hessian matrix and gradient vector by employing the
                //   Kronocker product. See [Böhning, 1992]

                // (Re-) Compute error gradient
                double[] g = Matrix.KroneckerProduct(errors, x);
                for (int j = 0; j < g.Length; j++)
                    gradient[j] += g[j];

                if (UpdateLowerBound)
                {
                    // Compute xxt matrix
                    for (int k = 0; k < x.Length; k++)
                        for (int j = 0; j < x.Length; j++)
                            xxt[k, j] = x[k] * x[j];

                    // (Re-) Compute weighted "Hessian" matrix 
                    double[,] h = Matrix.KroneckerProduct(weights, xxt);
                    for (int j = 0; j < parameterCount; j++)
                        for (int k = 0; k < parameterCount; k++)
                            lowerBound[j, k] += h[j, k];
                }
            }


            if (UpdateLowerBound)
            {
                UpdateLowerBound = false;

                // Decompose to solve the linear system. Usually the hessian will
                // be invertible and LU will succeed. However, sometimes the hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting 
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.


                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(lowerBound);
                deltas = decomposition.Solve(gradient);
            }
            else
            {
                deltas = decomposition.Solve(gradient);
            }


            previous = coefficients.Reshape(1);

            // Update coefficients using the calculated deltas
            for (int i = 0, k = 0; i < coefficients.Length; i++)
                for (int j = 0; j < coefficients[i].Length; j++)
                    coefficients[i][j] -= deltas[k++];

            solution = coefficients.Reshape(1);


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[][] standardErrors = regression.StandardErrors;
                for (int i = 0, k = 0; i < standardErrors.Length; i++)
                    for (int j = 0; j < standardErrors[i].Length; j++, k++)
                        standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k]));
            }



            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);

            return Matrix.Max(deltas);
        }
        /// <summary>
        ///   Runs one iteration of the Reweighted Least Squares algorithm.
        /// </summary>
        /// <param name="inputs">The input data.</param>
        /// <param name="outputs">The outputs associated with each input vector.</param>
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        ///
        public double Run(double[][] inputs, double[] outputs)
        {
            // Regress using Iteratively Reweighted Least Squares estimation.

            // References:
            //  - Bishop, Christopher M.; Pattern Recognition
            //    and Machine Learning. Springer; 1st ed. 2006.


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design       = new double[N][];
            double[]   errors       = new double[N];
            double[]   weights      = new double[N];
            double[]   coefficients = this.regression.Coefficients;
            double[]   deltas;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[parameterCount];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    row[j + 1] = inputs[i][j];
                }
            }


            // Compute errors and weighting matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double y = regression.Compute(inputs[i]);

                // Calculate error vector
                errors[i] = y - outputs[i];

                // Calculate weighting matrix
                weights[i] = regression.Link.Derivative2(y);
            }


            // Reset Hessian matrix and gradient
            Array.Clear(gradient, 0, gradient.Length);
            Array.Clear(hessian, 0, hessian.Length);


            // (Re-) Compute error gradient
            for (int j = 0; j < design.Length; j++)
            {
                for (int i = 0; i < gradient.Length; i++)
                {
                    gradient[i] += design[j][i] * errors[j];
                }
            }

            // (Re-) Compute weighted "Hessian" matrix
            for (int k = 0; k < weights.Length; k++)
            {
                double[] row = design[k];
                for (int j = 0; j < row.Length; j++)
                {
                    for (int i = 0; i < row.Length; i++)
                    {
                        hessian[j, i] += row[i] * row[j] * weights[k];
                    }
                }
            }


            // Decompose to solve the linear system. Usually the Hessian will
            // be invertible and LU will succeed. However, sometimes the Hessian
            // may be singular and a Singular Value Decomposition may be needed.

            // The SVD is very stable, but is quite expensive, being on average
            // about 10-15 times more expensive than LU decomposition. There are
            // other ways to avoid a singular Hessian. For a very interesting
            // reading on the subject, please see:
            //
            //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
            //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
            //    Available in: http://gking.harvard.edu/files/help.pdf
            //

            // Moreover, the computation of the inverse is optional, as it will
            // be used only to compute the standard errors of the regression.

            // Hessian Matrix is singular, try pseudo-inverse solution
            decomposition = new SingularValueDecomposition(hessian);
            deltas        = decomposition.Solve(gradient);


            previous = (double[])coefficients.Clone();

            // Update coefficients using the calculated deltas
            for (int i = 0; i < coefficients.Length; i++)
            {
                coefficients[i] -= deltas[i];
            }


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                {
                    standardErrors[i] = Math.Sqrt(inverse[i, i]);
                }
            }


            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
            {
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
            }

            return(Matrix.Max(deltas));
        }
        /// <summary>
        ///   Runs one iteration of the Reweighted Least Squares algorithm.
        /// </summary>
        /// <param name="inputs">The input data.</param>
        /// <param name="outputs">The outputs associated with each input vector.</param>
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        /// 
        public double Run(double[][] inputs, double[] outputs)
        {
            // Regress using Iteratively Reweighted Least Squares estimation.

            // References:
            //  - Bishop, Christopher M.; Pattern Recognition 
            //    and Machine Learning. Springer; 1st ed. 2006.


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design = new double[N][];
            double[] errors = new double[N];
            double[] weights = new double[N];
            double[] coefficients = this.regression.Coefficients;
            double[] deltas;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[parameterCount];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                    row[j + 1] = inputs[i][j];
            }


            // Compute errors and weighting matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double y = regression.Compute(inputs[i]);

                // Calculate error vector
                errors[i] = y - outputs[i];

                // Calculate weighting matrix
                weights[i] = regression.Link.Derivative2(y);
            }


            // Reset Hessian matrix and gradient
            Array.Clear(gradient, 0, gradient.Length);
            Array.Clear(hessian, 0, hessian.Length);


            // (Re-) Compute error gradient
            for (int j = 0; j < design.Length; j++)
                for (int i = 0; i < gradient.Length; i++)
                    gradient[i] += design[j][i] * errors[j];

            // (Re-) Compute weighted "Hessian" matrix 
            for (int k = 0; k < weights.Length; k++)
            {
                double[] row = design[k];
                for (int j = 0; j < row.Length; j++)
                    for (int i = 0; i < row.Length; i++)
                        hessian[j, i] += row[i] * row[j] * weights[k];
            }


            // Decompose to solve the linear system. Usually the Hessian will
            // be invertible and LU will succeed. However, sometimes the Hessian
            // may be singular and a Singular Value Decomposition may be needed.

            // The SVD is very stable, but is quite expensive, being on average
            // about 10-15 times more expensive than LU decomposition. There are
            // other ways to avoid a singular Hessian. For a very interesting 
            // reading on the subject, please see:
            //
            //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
            //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
            //    Available in: http://gking.harvard.edu/files/help.pdf
            //

            // Moreover, the computation of the inverse is optional, as it will
            // be used only to compute the standard errors of the regression.

            // Hessian Matrix is singular, try pseudo-inverse solution
            decomposition = new SingularValueDecomposition(hessian);
            deltas = decomposition.Solve(gradient);


            previous = (double[])coefficients.Clone();

            // Update coefficients using the calculated deltas
            for (int i = 0; i < coefficients.Length; i++)
                coefficients[i] -= deltas[i];


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                    standardErrors[i] = Math.Sqrt(inverse[i, i]);
            }


            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);

            return Matrix.Max(deltas);
        }
        private ProportionalHazards innerLearn(double[][] inputs, double[] time, SurvivalOutcome[] censor, double[] weights)
        {
            if (weights != null)
            {
                throw new ArgumentException(Accord.Properties.Resources.NotSupportedWeights, "weights");
            }

            if (inputs.Length != time.Length || time.Length != censor.Length)
            {
                throw new DimensionMismatchException("time",
                                                     "The inputs, time and output vector must have the same length.");
            }

            if (regression == null)
            {
                init(new ProportionalHazards(inputs.Columns()));
            }

            // Sort data by time to accelerate performance
            EmpiricalHazardDistribution.Sort(ref time, ref censor, ref inputs);


            var means = new double[parameterCount];
            var sdev  = new double[parameterCount];

            for (int i = 0; i < sdev.Length; i++)
            {
                sdev[i] = 1;
            }

            if (normalize)
            {
                // Store means as regression centers
                means = inputs.Mean(dimension: 0);
                for (int i = 0; i < means.Length; i++)
                {
                    regression.Offsets[i] = means[i];
                }

                // Convert to unit scores for increased accuracy
                sdev   = Measures.StandardDeviation(inputs);
                inputs = Elementwise.Divide(inputs.Subtract(means, 0), sdev, 0);

                for (int i = 0; i < regression.Coefficients.Length; i++)
                {
                    regression.Coefficients[i] *= sdev[i];
                }
            }

            // Compute actual outputs
            var output = new double[inputs.Length];

            for (int i = 0; i < output.Length; i++)
            {
                double sum = 0;
                for (int j = 0; j < regression.Coefficients.Length; j++)
                {
                    sum += regression.Coefficients[j] * inputs[i][j];
                }
                output[i] = Math.Exp(sum);
            }

            // Compute ties
            int[] ties = new int[inputs.Length];
            for (int i = 0; i < inputs.Length; i++)
            {
                for (int j = 0; j < time.Length; j++)
                {
                    if (time[j] == time[i])
                    {
                        ties[i]++;
                    }
                }
            }

            if (parameterCount == 0)
            {
                createBaseline(time, censor, output);
                return(regression);
            }

            CurrentIteration = 0;
            double smooth = Lambda;

            do
            {
                if (Token.IsCancellationRequested)
                {
                    break;
                }

                // learning iterations until convergence
                // or maximum number of iterations reached

                CurrentIteration++;

                // Reset Hessian matrix and gradient
                Array.Clear(gradient, 0, gradient.Length);
                Array.Clear(hessian, 0, hessian.Length);

                // For each observation instance
                for (int i = 0; i < inputs.Length; i++)
                {
                    // Check if we should censor
                    if (censor[i] == SurvivalOutcome.Censored)
                    {
                        continue;
                    }

                    // Compute partials
                    double den = 0;
                    Array.Clear(partialGradient, 0, partialGradient.Length);
                    Array.Clear(partialHessian, 0, partialHessian.Length);

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            den += output[j];
                        }
                    }

                    for (int j = 0; j < inputs.Length; j++)
                    {
                        if (time[j] >= time[i])
                        {
                            // Compute partial gradient
                            for (int k = 0; k < partialGradient.Length; k++)
                            {
                                partialGradient[k] += inputs[j][k] * output[j] / den;
                            }

                            // Compute partial Hessian
                            for (int ii = 0; ii < inputs[j].Length; ii++)
                            {
                                for (int jj = 0; jj < inputs[j].Length; jj++)
                                {
                                    partialHessian[ii, jj] += inputs[j][ii] * inputs[j][jj] * output[j] / den;
                                }
                            }
                        }
                    }

                    // Compute gradient vector
                    for (int j = 0; j < gradient.Length; j++)
                    {
                        gradient[j] += inputs[i][j] - partialGradient[j];
                    }

                    // Compute Hessian matrix
                    for (int j = 0; j < partialGradient.Length; j++)
                    {
                        for (int k = 0; k < partialGradient.Length; k++)
                        {
                            hessian[j, k] -= partialHessian[j, k] - partialGradient[j] * partialGradient[k];
                        }
                    }
                }


                // Decompose to solve the linear system. Usually the Hessian will
                // be invertible and LU will succeed. However, sometimes the Hessian
                // may be singular and a Singular Value Decomposition may be needed.

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                decomposition = new SingularValueDecomposition(hessian);
                double[] deltas = decomposition.Solve(gradient);

                if (convergence.Iterations > 0 || convergence.Tolerance > 0)
                {
                    // Update coefficients using the calculated deltas
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                    {
                        regression.Coefficients[i] -= smooth * deltas[i];
                    }
                }

                smooth += Lambda;
                if (smooth > 1)
                {
                    smooth = 1;
                }

                // Check relative maximum parameter change
                convergence.NewValues = regression.Coefficients;


                if (convergence.HasDiverged)
                {
                    // Restore previous coefficients
                    for (int i = 0; i < regression.Coefficients.Length; i++)
                    {
                        regression.Coefficients[i] = convergence.OldValues[i];
                    }
                }

                // Recompute current outputs
                for (int i = 0; i < output.Length; i++)
                {
                    double sum = 0;
                    for (int j = 0; j < regression.Coefficients.Length; j++)
                    {
                        sum += regression.Coefficients[j] * inputs[i][j];
                    }
                    output[i] = Math.Exp(sum);
                }

                if (Token.IsCancellationRequested)
                {
                    return(regression);
                }
            } while (!convergence.HasConverged);


            for (int i = 0; i < regression.Coefficients.Length; i++)
            {
                regression.Coefficients[i] /= sdev[i];
            }

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                {
                    standardErrors[i] = Math.Sqrt(Math.Abs(inverse[i, i])) / sdev[i];
                }
            }

            if (computeBaselineFunction)
            {
                createBaseline(time, censor, output);
            }

            return(regression);
        }