Beispiel #1
0
        private void compute_regression(double[,] x, double[] y, double[] w)
        {
            int m = x.GetLength(1);

            Math.Linalg.Matrix A = null;

            int n = x.GetLength(0);

            if (m_intercept)
            {
                A = new Linalg.Matrix(n, m + 1);

                for (int i = 0; i < n; i++)
                {
                    for (int j = 0; j < m; j++)
                    {
                        A[i, j] = x[i, j];
                    }
                    A[i, m] = 1.0;
                }
            }
            else
            {
                A = new Linalg.Matrix(x, false);
            }

            compute_coefficients(A, y, w);
        }
Beispiel #2
0
        /// <summary>
        /// 1D constructor
        /// </summary>
        /// <param name="x"></param>
        /// <param name="y"></param>
        /// <param name="w"></param>
        /// <param name="intercept"></param>
        public LinearRegression(double[] x, double[] y, double[] w, bool intercept)
        {
            if (x == null || y == null || x.Length != y.Length)
            {
                throw new ArgumentException("LinearRegression: check input arguments");
            }

            bool use_weights = (w != null);

            if (use_weights && w.Length != y.Length)
            {
                throw new ArgumentException("LinearRegression: array of weights should have the same length");
            }

            m_intercept = intercept;

            //init variable names
            m_names    = new string[1 + (m_intercept ? 1 : 0)];
            m_names[0] = "c1";

            if (m_intercept)
            {
                m_names[1] = "(intercept)";
            }

            Math.Linalg.Matrix A = null;

            int n = x.Length;

            if (m_intercept)
            {
                int m = 1;

                A = new Linalg.Matrix(n, m + 1);

                for (int i = 0; i < n; i++)
                {
                    A[i, 0] = x[0];
                    A[i, 1] = 1.0; //intercept
                }
            }
            else
            {
                A = new Linalg.Matrix(x, false);
            }

            compute_coefficients(A, y, w);
        }
Beispiel #3
0
        private void compute_coefficients(Linalg.Matrix A, double[] y, double[] w)
        {
            Linalg.Matrix b = new Linalg.Matrix(y, false);

            Linalg.Matrix          coeffs;
            Linalg.Matrix          cov;
            Linalg.SvDecomposition sv;
            if (A.Rows > A.Columns)
            {
                sv     = new Linalg.SvDecomposition(A);
                coeffs = sv.Solve(b, false);
            }
            else
            {
                sv     = new Linalg.SvDecomposition(A.Transpose());
                coeffs = sv.Solve(b, true);
            }
            cov = sv.Cov();

            Linalg.Matrix fit       = A * coeffs;
            Linalg.Matrix residuals = fit - b;


            double mss = m_intercept ? Math.Stats.SumOfSquaredDev(fit.RowPackedData(), w) : Math.Stats.SumOfSquares(fit.RowPackedData());
            double tss = m_intercept ? Math.Stats.SumOfSquaredDev(y, w) : Math.Stats.SumOfSquares(y);
            double rss = Math.Stats.SumOfSquares(residuals.RowPackedData()); //without intercept residuals dont add up to zero

            m_observations = A.Rows;
            m_coeffs       = coeffs.RowPackedData();

            int p             = m_coeffs.Length;
            int n             = m_observations;
            int dof           = n - p;
            int dof_intercept = m_intercept ? 1 : 0;

            m_coeffs_stderr = new double[p];             //coefficient standard errors

            double stderr = System.Math.Sqrt(rss / dof); //residual sum of squares

            for (int i = 0; i < p; i++)
            {
                m_coeffs_stderr[i] = stderr * System.Math.Sqrt(cov[i, i]);
            }

            double r2     = mss / (mss + rss);
            double r2_adj = 1.0 - (n - dof_intercept) * (1.0 - r2) / dof;
            double fvalue = (mss / (p - dof_intercept)) / (rss / dof);
            double aic    = n + n * System.Math.Log(2 * System.Math.PI) + n * System.Math.Log(rss / n) + 2 * (m_coeffs.Length + 1); // sum(log(w))

            //generate summary
            m_summary = new Dictionary <string, double>();

            m_summary["n"]      = m_observations;
            m_summary["p"]      = m_coeffs.Length;
            m_summary["dof"]    = dof;
            m_summary["stderr"] = stderr;
            m_summary["rss"]    = rss;
            m_summary["tss"]    = tss;
            m_summary["mss"]    = mss;
            m_summary["r2"]     = r2;
            m_summary["r2_adj"] = r2_adj;
            m_summary["fvalue"] = fvalue;
            m_summary["aic"]    = aic;

            for (int i = 0; i < p; i++)
            {
                double coef     = m_coeffs[i];
                double coef_std = m_coeffs_stderr[i];
                double tvalue   = coef / coef_std;
                double pvalue   = Math.Special.incbet(0.5 * dof, 0.5, dof / (dof + tvalue * tvalue));

                m_summary[m_names[i]]             = coef;
                m_summary[m_names[i] + "_se"]     = coef_std;
                m_summary[m_names[i] + "_tvalue"] = tvalue;
                m_summary[m_names[i] + "_pvalue"] = pvalue;
            }

            m_summary["intercept"] = m_intercept ? 1.0 : 0.0;
        }
Beispiel #4
0
        List <double> m_gamma;  //step sizes

        /// <summary>
        /// First Lars model, (includes intercept)
        /// </summary>
        /// <param name="x"></param>
        /// <param name="y"></param>
        public Lars(double[,] x, double[] y)
        {
            bool lasso = false;

            int n = x.GetLength(0);                  //number of observations
            int p = x.GetLength(1);                  //number of variables

            int maxvars = System.Math.Min(n - 1, p); //maximum number of variables
            int maxit   = 8 * maxvars;

            m_beta  = new List <double[]>(); // this are coefficients on each step
            m_gamma = new List <double>();

            SortedSet <int> c_set = new SortedSet <int>(); //candidate set
            SortedSet <int> a_set = new SortedSet <int>(); //active set

            //initialize candidate set with all available variables
            for (int i = 0; i < p; i++)
            {
                c_set.Add(i);
            }

            //compute gramm matrix (gram = x' * x)
            Linalg.Matrix full_gram = new Linalg.Matrix(p, p);

            for (int i = 0; i < p; i++)
            {
                for (int j = 0; j < p; j++)
                {
                    double sum = 0.0;
                    for (int k = 0; k < n; k++)
                    {
                        sum += x[k, i] * x[k, j];
                    }
                    full_gram[i, j] = sum;
                }
            }

            //bool stop_flag = false;

            double[] mu = new double[n]; //lars regression vector
            double[] c  = new double[p]; //correlations

            for (int it = 0; it < maxit; it++)
            {
                //compute correlations
                for (int j = 0; j < p; j++)
                {
                    double sum = 0.0;
                    for (int i = 0; i < n; i++)
                    {
                        sum += x[i, j] * (y[i] - mu[i]);
                    }
                    c[j] = sum;
                }

                //find abs max corr from candidate set
                double max_abs_c       = 0.0;
                int    max_abs_c_index = -1;

                foreach (int i in c_set)
                {
                    double abs_c = System.Math.Abs(c[i]);

                    if (abs_c > max_abs_c)
                    {
                        max_abs_c       = abs_c;
                        max_abs_c_index = i;
                    }
                }

                //exit if there is no correlation
                if (max_abs_c < ACQ.Math.Const.epsilon)
                {
                    break;
                }

                a_set.Add(max_abs_c_index);
                c_set.Remove(max_abs_c_index);

                int vars = a_set.Count;

                double[] s = new double[vars];

                foreach (int i in a_set)
                {
                    s[i] = ACQ.Math.Utils.Sign(c[i]);
                }

                //compute partical Gram matrix, Gram = X(active_columns)' * X(active_columns)
                int[]         active_indices           = a_set.ToArray();
                Linalg.Matrix gram                     = full_gram.Submatrix(active_indices, active_indices);
                Linalg.CholeskyDecomposition gram_chol = new Linalg.CholeskyDecomposition(gram);
                Linalg.Matrix inv_gram                 = gram_chol.Solve(s);

                //compute coefficients of equiangular vector
                double[] w = new double[vars];

                double norm = 0.0;

                for (int i = 0; i < vars; i++)
                {
                    w[i]  = s[i] * inv_gram[i, 0];
                    norm += w[i];
                }

                double scale = 1.0 / System.Math.Sqrt(norm);

                for (int i = 0; i < vars; i++)
                {
                    w[i] = scale * w[i];
                }
                //compute equiangular vector
                double[] u = new double[n];

                for (int i = 0; i < n; i++)
                {
                    double sum = 0.0;
                    for (int j = 0; j < vars; j++)
                    {
                        sum += x[i, active_indices[j]] * w[j];
                    }
                    u[i] = sum;
                }

                double gamma = max_abs_c / scale; // set gamma to the largest value (i.e. use regular least squares)

                //correlation (angle) between equiangular vector and all remaining variables
                foreach (int i in c_set)
                {
                    double angle = 0.0;
                    for (int j = 0; j < n; j++)
                    {
                        angle += x[j, i] * u[j];
                    }

                    double t1 = (max_abs_c - c[i]) / (scale - angle);
                    double t2 = (max_abs_c + c[i]) / (scale + angle);

                    if (t1 > 0)
                    {
                        gamma = System.Math.Min(t1, gamma);
                    }

                    if (t2 > 0)
                    {
                        gamma = System.Math.Min(t2, gamma);
                    }
                }

                //LASSO code here
                if (lasso)
                {
                }

                //update coefficients
                double[] beta = new double[p];

                if (m_beta.Count > 0)
                {
                    double[] pev_beta = m_beta[m_beta.Count - 1];

                    for (int i = 0; i < vars; i++)
                    {
                        int index = active_indices[i];
                        beta[index] = pev_beta[index];
                    }
                }

                for (int i = 0; i < vars; i++)
                {
                    beta[active_indices[i]] += gamma * w[i];
                }

                m_beta.Add(beta);
                m_gamma.Add(gamma);


                //update lars vector
                for (int i = 0; i < n; i++)
                {
                    mu[i] += gamma * u[i];
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// 1D constructor
        /// </summary>
        /// <param name="x"></param>
        /// <param name="y"></param>
        /// <param name="w"></param>
        /// <param name="intercept"></param>
        public LinearRegression(double[] x, double[] y, double[] w, bool intercept)
        {
            if (x == null || y == null || x.Length != y.Length)
            {
                throw new ArgumentException("LinearRegression: check input arguments (x and y can't be null and must have the same size)");
            }

            m_weighted = (w != null);

            if (m_weighted && w.Length != y.Length)
            {
                throw new ArgumentException("LinearRegression: array of weights should have the same length as x and y");
            }

            m_intercept = intercept;

            //init variable names
            m_names    = new string[1 + (m_intercept ? 1 : 0)];
            m_names[0] = "c1";

            if (m_intercept)
            {
                m_names[1] = "(intercept)";
            }

            Math.Linalg.Matrix A = null;

            int n = x.Length;

            //check input for NaN
            for (int i = 0; i < x.Length; i++)
            {
                if (Double.IsNaN(x[i]) || Double.IsNaN(y[i]))
                {
                    throw new ArgumentException("LinearRegression: there should not be NaN values in x or y");
                }

                if (m_weighted && Double.IsNaN(w[i]))
                {
                    throw new ArgumentException("LinearRegression: weights vector should not have NaN values");
                }
            }

            if (m_intercept)
            {
                int m = 1;

                A = new Linalg.Matrix(n, m + 1);

                for (int i = 0; i < n; i++)
                {
                    A[i, 0] = x[i];
                    A[i, 1] = 1.0; //intercept
                }
            }
            else
            {
                A = new Linalg.Matrix(x);
            }

            compute_coefficients(A, y, w);
        }
Beispiel #6
0
        private void compute_coefficients(Linalg.Matrix A, double[] y, double[] w)
        {
            Linalg.Matrix b = new Linalg.Matrix(y, false);

            double[] w_sqrt = null;
            if (m_weighted)
            {
                w_sqrt = new double[b.Rows];
                for (int i = 0; i < b.Rows; i++)
                {
                    w_sqrt[i] = System.Math.Sqrt(w[i]);

                    b[i, 0] = b[i, 0] * w_sqrt[i]; //here we assume that specified weights are sigmas (i.e. not sigma square)

                    for (int j = 0; j < A.Columns; j++)
                    {
                        A[i, j] = A[i, j] * w_sqrt[i];
                    }
                }
            }

            Linalg.Matrix          coeffs;
            Linalg.Matrix          cov;
            Linalg.SvDecomposition sv;
            if (A.Rows > A.Columns)
            {
                sv     = new Linalg.SvDecomposition(A);
                coeffs = sv.Solve(b, false);
            }
            else
            {
                sv     = new Linalg.SvDecomposition(A.Transpose());
                coeffs = sv.Solve(b, true);
            }
            cov = sv.Cov();

            Linalg.Matrix fit       = A * coeffs;
            Linalg.Matrix residuals = fit - b;

            double aic_weight_correction = 0;
            double weight_sum            = fit.Rows;

            if (m_weighted)
            {
                for (int i = 0; i < fit.Rows; i++)
                {
                    fit[i, 0]       = fit[i, 0] / w_sqrt[i];
                    residuals[i, 0] = fit[i, 0] - b[i, 0] / w_sqrt[i];

                    aic_weight_correction += System.Math.Log(w[i]);
                }
                weight_sum = Math.Stats.Utils.Sum(w);
            }

            double mss = Math.Stats.Utils.SumOfSquaredDev(fit.RowPackedData(), w);
            double rss = Math.Stats.Utils.SumOfSquares(residuals.RowPackedData(), w); //without intercept residuals dont add up to zero
            double tss = Math.Stats.Utils.SumOfSquaredDev(y, w);


            m_observations = A.Rows;
            m_coeffs       = coeffs.RowPackedData();

            int p             = m_coeffs.Length;
            int n             = m_observations;
            int dof           = n - p;
            int dof_intercept = m_intercept ? 1 : 0;

            m_coeffs_stderr = new double[p];             //coefficient standard errors

            double stderr = System.Math.Sqrt(rss / dof); //residual sum of squares

            for (int i = 0; i < p; i++)
            {
                m_coeffs_stderr[i] = stderr * System.Math.Sqrt(cov[i, i]);
            }

            double r2      = mss / (mss + rss);
            double r2_adj  = 1.0 - (n - dof_intercept) * (1.0 - r2) / dof;
            double fvalue  = (mss / (p - dof_intercept)) / (rss / dof);
            double aic_bic = n + n * System.Math.Log(2 * System.Math.PI) + n * System.Math.Log(rss / n) - aic_weight_correction;
            double aic     = aic_bic + 2 * (m_coeffs.Length + 1);
            double bic     = aic_bic + System.Math.Log(m_observations) * (m_coeffs.Length + 1);

            //generate summary
            m_summary = new Dictionary <string, double>();

            m_summary["n"]      = m_observations;
            m_summary["p"]      = m_coeffs.Length;
            m_summary["dof"]    = dof;
            m_summary["stderr"] = stderr;
            m_summary["rss"]    = rss;
            m_summary["tss"]    = tss;
            m_summary["mss"]    = mss;
            m_summary["r2"]     = r2;
            m_summary["r2_adj"] = r2_adj;
            m_summary["fvalue"] = fvalue;
            m_summary["aic"]    = aic;
            m_summary["bic"]    = bic;

            for (int i = 0; i < p; i++)
            {
                double coef     = m_coeffs[i];
                double coef_std = m_coeffs_stderr[i];
                double tvalue   = coef / coef_std;
                double pvalue   = Math.Special.incbet(0.5 * dof, 0.5, dof / (dof + tvalue * tvalue));

                m_summary[m_names[i]]             = coef;
                m_summary[m_names[i] + "_se"]     = coef_std;
                m_summary[m_names[i] + "_tvalue"] = tvalue;
                m_summary[m_names[i] + "_pvalue"] = pvalue;
            }

            m_summary["intercept"] = m_intercept ? 1.0 : 0.0;
        }