Example #1
0
        /// <summary>
        ///   Runs one iteration of the Reweighted Least Squares algorithm.
        /// </summary>
        /// <param name="inputs">The input data.</param>
        /// <param name="outputs">The outputs associated with each input vector.</param>
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        ///
        public double Run(double[][] inputs, double[] outputs)
        {
            // Regress using Iteratively Reweighted Least Squares estimation.

            // References:
            //  - Bishop, Christopher M.; Pattern Recognition
            //    and Machine Learning. Springer; 1st ed. 2006.


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design       = new double[N][];
            double[]   errors       = new double[N];
            double[]   weights      = new double[N];
            double[]   coefficients = this.regression.Coefficients;
            double[]   deltas;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[parameterCount];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    row[j + 1] = inputs[i][j];
                }
            }


            // Compute errors and weighing matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double y = regression.Compute(inputs[i]);

                // Calculate error vector
                errors[i] = y - outputs[i];

                // Calculate weighting matrix
                weights[i] = y * (1.0 - y);
            }


            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
            {
                gradient[i] = 0;
                for (int j = 0; j < gradient.Length; j++)
                {
                    hessian[i, j] = 0;
                }
            }


            // (Re-) Compute error gradient
            for (int j = 0; j < design.Length; j++)
            {
                for (int i = 0; i < gradient.Length; i++)
                {
                    gradient[i] += design[j][i] * errors[j];
                }
            }

            // (Re-) Compute weighted "Hessian" matrix
            for (int k = 0; k < weights.Length; k++)
            {
                double[] rk = design[k];

                for (int j = 0; j < rk.Length; j++)
                {
                    for (int i = 0; i < rk.Length; i++)
                    {
                        hessian[j, i] += rk[i] * rk[j] * weights[k];
                    }
                }
            }


            // Decompose to solve the linear system. Usually the hessian will
            // be invertible and LU will succeed. However, sometimes the hessian
            // may be singular and a Singular Value Decomposition may be needed.

            LuDecomposition lu = new LuDecomposition(hessian);

            // The SVD is very stable, but is quite expensive, being on average
            // about 10-15 times more expensive than LU decomposition. There are
            // other ways to avoid a singular Hessian. For a very interesting
            // reading on the subject, please see:
            //
            //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
            //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
            //    Available in: http://gking.harvard.edu/files/help.pdf
            //

            // Moreover, the computation of the inverse is optional, as it will
            // be used only to compute the standard errors of the regression.

            if (lu.Nonsingular)
            {
                // Solve using LU decomposition
                deltas        = lu.Solve(gradient);
                decomposition = lu;
            }
            else
            {
                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(hessian);
                deltas        = decomposition.Solve(gradient);
            }

            previous = (double[])coefficients.Clone();

            // Update coefficients using the calculated deltas
            for (int i = 0; i < coefficients.Length; i++)
            {
                coefficients[i] -= deltas[i];
            }


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                {
                    standardErrors[i] = Math.Sqrt(inverse[i, i]);
                }
            }


            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
            {
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
            }

            return(Matrix.Max(deltas));
        }
        /// <summary>
        ///   Runs one iteration of the Reweighted Least Squares algorithm.
        /// </summary>
        /// <param name="inputs">The input data.</param>
        /// <param name="outputs">The outputs associated with each input vector.</param>
        /// <returns>The maximum relative change in the parameters after the iteration.</returns>
        /// 
        public double Run(double[][] inputs, double[] outputs)
        {
            // Regress using Iteratively Reweighted Least Squares estimation.

            // References:
            //  - Bishop, Christopher M.; Pattern Recognition
            //    and Machine Learning. Springer; 1st ed. 2006.

            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design = new double[N][];
            double[] errors = new double[N];
            double[] weights = new double[N];
            double[] coefficients = this.regression.Coefficients;
            double[] deltas;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[parameterCount];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                    row[j + 1] = inputs[i][j];
            }

            // Compute errors and weighing matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double y = regression.Compute(inputs[i]);

                // Calculate error vector
                errors[i] = y - outputs[i];

                // Calculate weighting matrix
                weights[i] = y * (1.0 - y);
            }

            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
            {
                gradient[i] = 0;
                for (int j = 0; j < gradient.Length; j++)
                    hessian[i, j] = 0;
            }

            // (Re-) Compute error gradient
            for (int j = 0; j < design.Length; j++)
                for (int i = 0; i < gradient.Length; i++)
                    gradient[i] += design[j][i] * errors[j];

            // (Re-) Compute weighted "Hessian" matrix
            for (int k = 0; k < weights.Length; k++)
            {
                double[] rk = design[k];

                for (int j = 0; j < rk.Length; j++)
                    for (int i = 0; i < rk.Length; i++)
                        hessian[j, i] += rk[i] * rk[j] * weights[k];
            }

            // Decompose to solve the linear system. Usually the hessian will
            // be invertible and LU will succeed. However, sometimes the hessian
            // may be singular and a Singular Value Decomposition may be needed.

            LuDecomposition lu = new LuDecomposition(hessian);

            // The SVD is very stable, but is quite expensive, being on average
            // about 10-15 times more expensive than LU decomposition. There are
            // other ways to avoid a singular Hessian. For a very interesting
            // reading on the subject, please see:
            //
            //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
            //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
            //    Available in: http://gking.harvard.edu/files/help.pdf
            //

            // Moreover, the computation of the inverse is optional, as it will
            // be used only to compute the standard errors of the regression.

            if (lu.Nonsingular)
            {
                // Solve using LU decomposition
                deltas = lu.Solve(gradient);
                decomposition = lu;
            }
            else
            {
                // Hessian Matrix is singular, try pseudo-inverse solution
                decomposition = new SingularValueDecomposition(hessian);
                deltas = decomposition.Solve(gradient);
            }

            previous = (double[])coefficients.Clone();

            // Update coefficients using the calculated deltas
            for (int i = 0; i < coefficients.Length; i++)
                coefficients[i] -= deltas[i];

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[] standardErrors = regression.StandardErrors;
                for (int i = 0; i < standardErrors.Length; i++)
                    standardErrors[i] = Math.Sqrt(inverse[i, i]);
            }

            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);

            return Matrix.Max(deltas);
        }
Example #3
0
        private double run(double[][] inputs, double[][] outputs)
        {
            // Regress using Lower-Bound Newton-Raphson estimation
            //
            // The main idea is to replace the Hessian matrix with a
            //   suitable lower bound. Indeed, the Hessian is lower
            //   bounded by a negative definite matrix that does not
            //   even depend on w [Krishnapuram et al].
            //
            //   - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf
            //


            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design       = new double[N][];
            double[][] coefficients = this.regression.Coefficients;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[M];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                {
                    row[j + 1] = inputs[i][j];
                }
            }


            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
            {
                gradient[i] = 0;
            }

            if (UpdateLowerBound)
            {
                for (int i = 0; i < gradient.Length; i++)
                {
                    for (int j = 0; j < gradient.Length; j++)
                    {
                        lowerBound[i, j] = 0;
                    }
                }
            }

            // In the multinomial logistic regression, the objective
            // function is the log-likelihood function l(w). As given
            // by Krishnapuram et al and Böhning, this is a concave
            // function with Hessian given by:
            //
            //       H(w) = -sum(P(w) - p(w)p(w)')  (x)  xx'
            //      (see referenced paper for proper indices)
            //
            // In which (x) denotes the Kronocker product. By using
            // the lower bound principle, Krishnapuram has shown that
            // we can replace H(w) with a lower bound approximation B
            // which does not depend on w (eq. 8 on aforementined paper):
            //
            //      B = -(1/2) [I - 11/M]  (x)  sum(xx')
            //
            // Thus we can compute and invert this matrix only once.
            //


            // For each input sample in the dataset
            for (int i = 0; i < inputs.Length; i++)
            {
                // Grab variables related to the sample
                double[] x = design[i];
                double[] y = outputs[i];

                // Compute and estimate outputs
                this.compute(inputs[i], output);

                // Compute errors for the sample
                for (int j = 0; j < errors.Length; j++)
                {
                    errors[j] = y[j + 1] - output[j];
                }


                // Compute current gradient and Hessian
                //   We can take advantage of the block structure of the
                //   Hessian matrix and gradient vector by employing the
                //   Kronocker product. See [Böhning, 1992]

                // (Re-) Compute error gradient
                double[] g = Matrix.KroneckerProduct(errors, x);
                for (int j = 0; j < g.Length; j++)
                {
                    gradient[j] += g[j];
                }

                if (UpdateLowerBound)
                {
                    // Compute xxt matrix
                    for (int k = 0; k < x.Length; k++)
                    {
                        for (int j = 0; j < x.Length; j++)
                        {
                            xxt[k, j] = x[k] * x[j];
                        }
                    }

                    // (Re-) Compute weighted "Hessian" matrix
                    double[,] h = Matrix.KroneckerProduct(weights, xxt);
                    for (int j = 0; j < parameterCount; j++)
                    {
                        for (int k = 0; k < parameterCount; k++)
                        {
                            lowerBound[j, k] += h[j, k];
                        }
                    }
                }
            }


            if (UpdateLowerBound)
            {
                UpdateLowerBound = false;

                // Decompose to solve the linear system. Usually the hessian will
                // be invertible and LU will succeed. However, sometimes the hessian
                // may be singular and a Singular Value Decomposition may be needed.

                LuDecomposition lu = new LuDecomposition(lowerBound);

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.

                if (lu.Nonsingular)
                {
                    // Solve using LU decomposition
                    deltas        = lu.Solve(gradient);
                    decomposition = lu;
                }
                else
                {
                    // Hessian Matrix is singular, try pseudo-inverse solution
                    decomposition = new SingularValueDecomposition(lowerBound);
                    deltas        = decomposition.Solve(gradient);
                }
            }
            else
            {
                deltas = decomposition.Solve(gradient);
            }


            previous = coefficients.Reshape(1);

            // Update coefficients using the calculated deltas
            for (int i = 0, k = 0; i < coefficients.Length; i++)
            {
                for (int j = 0; j < coefficients[i].Length; j++)
                {
                    coefficients[i][j] -= deltas[k++];
                }
            }

            solution = coefficients.Reshape(1);


            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[][] standardErrors = regression.StandardErrors;
                for (int i = 0, k = 0; i < standardErrors.Length; i++)
                {
                    for (int j = 0; j < standardErrors[i].Length; j++, k++)
                    {
                        standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k]));
                    }
                }
            }



            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
            {
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);
            }

            return(Matrix.Max(deltas));
        }
        private double run(double[][] inputs, double[][] outputs)
        {
            // Regress using Lower-Bound Newton-Raphson estimation
            //
            // The main idea is to replace the Hessian matrix with a
            //   suitable lower bound. Indeed, the Hessian is lower
            //   bounded by a negative definite matrix that does not
            //   even depend on w [Krishnapuram et al].
            //
            //   - http://www.lx.it.pt/~mtf/Krishnapuram_Carin_Figueiredo_Hartemink_2005.pdf
            //

            // Initial definitions and memory allocations
            int N = inputs.Length;

            double[][] design = new double[N][];
            double[][] coefficients = this.regression.Coefficients;

            // Compute the regression matrix
            for (int i = 0; i < inputs.Length; i++)
            {
                double[] row = design[i] = new double[M];

                row[0] = 1; // for intercept
                for (int j = 0; j < inputs[i].Length; j++)
                    row[j + 1] = inputs[i][j];
            }

            // Reset Hessian matrix and gradient
            for (int i = 0; i < gradient.Length; i++)
                gradient[i] = 0;

            if (UpdateLowerBound)
            {
                for (int i = 0; i < gradient.Length; i++)
                    for (int j = 0; j < gradient.Length; j++)
                        lowerBound[i, j] = 0;
            }

            // In the multinomial logistic regression, the objective
            // function is the log-likelihood function l(w). As given
            // by Krishnapuram et al and Böhning, this is a concave
            // function with Hessian given by:
            //
            //       H(w) = -sum(P(w) - p(w)p(w)')  (x)  xx'
            //      (see referenced paper for proper indices)
            //
            // In which (x) denotes the Kronocker product. By using
            // the lower bound principle, Krishnapuram has shown that
            // we can replace H(w) with a lower bound approximation B
            // which does not depend on w (eq. 8 on aforementined paper):
            //
            //      B = -(1/2) [I - 11/M]  (x)  sum(xx')
            //
            // Thus we can compute and invert this matrix only once.
            //

            // For each input sample in the dataset
            for (int i = 0; i < inputs.Length; i++)
            {
                // Grab variables related to the sample
                double[] x = design[i];
                double[] y = outputs[i];

                // Compute and estimate outputs
                this.compute(inputs[i], output);

                // Compute errors for the sample
                for (int j = 0; j < errors.Length; j++)
                    errors[j] = y[j + 1] - output[j];

                // Compute current gradient and Hessian
                //   We can take advantage of the block structure of the
                //   Hessian matrix and gradient vector by employing the
                //   Kronocker product. See [Böhning, 1992]

                // (Re-) Compute error gradient
                double[] g = Matrix.KroneckerProduct(errors, x);
                for (int j = 0; j < g.Length; j++)
                    gradient[j] += g[j];

                if (UpdateLowerBound)
                {
                    // Compute xxt matrix
                    for (int k = 0; k < x.Length; k++)
                        for (int j = 0; j < x.Length; j++)
                            xxt[k, j] = x[k] * x[j];

                    // (Re-) Compute weighted "Hessian" matrix
                    double[,] h = Matrix.KroneckerProduct(weights, xxt);
                    for (int j = 0; j < parameterCount; j++)
                        for (int k = 0; k < parameterCount; k++)
                            lowerBound[j, k] += h[j, k];
                }
            }

            if (UpdateLowerBound)
            {
                UpdateLowerBound = false;

                // Decompose to solve the linear system. Usually the hessian will
                // be invertible and LU will succeed. However, sometimes the hessian
                // may be singular and a Singular Value Decomposition may be needed.

                LuDecomposition lu = new LuDecomposition(lowerBound);

                // The SVD is very stable, but is quite expensive, being on average
                // about 10-15 times more expensive than LU decomposition. There are
                // other ways to avoid a singular Hessian. For a very interesting
                // reading on the subject, please see:
                //
                //  - Jeff Gill & Gary King, "What to Do When Your Hessian Is Not Invertible",
                //    Sociological Methods & Research, Vol 33, No. 1, August 2004, 54-87.
                //    Available in: http://gking.harvard.edu/files/help.pdf
                //

                // Moreover, the computation of the inverse is optional, as it will
                // be used only to compute the standard errors of the regression.

                if (lu.Nonsingular)
                {
                    // Solve using LU decomposition
                    deltas = lu.Solve(gradient);
                    decomposition = lu;
                }
                else
                {
                    // Hessian Matrix is singular, try pseudo-inverse solution
                    decomposition = new SingularValueDecomposition(lowerBound);
                    deltas = decomposition.Solve(gradient);
                }
            }
            else
            {
                deltas = decomposition.Solve(gradient);
            }

            previous = coefficients.Reshape(1);

            // Update coefficients using the calculated deltas
            for (int i = 0, k = 0; i < coefficients.Length; i++)
                for (int j = 0; j < coefficients[i].Length; j++)
                    coefficients[i][j] -= deltas[k++];

            solution = coefficients.Reshape(1);

            if (computeStandardErrors)
            {
                // Grab the regression information matrix
                double[,] inverse = decomposition.Inverse();

                // Calculate coefficients' standard errors
                double[][] standardErrors = regression.StandardErrors;
                for (int i = 0, k = 0; i < standardErrors.Length; i++)
                    for (int j = 0; j < standardErrors[i].Length; j++, k++)
                        standardErrors[i][j] = Math.Sqrt(Math.Abs(inverse[k, k]));
            }

            // Return the relative maximum parameter change
            for (int i = 0; i < deltas.Length; i++)
                deltas[i] = Math.Abs(deltas[i]) / Math.Abs(previous[i]);

            return Matrix.Max(deltas);
        }