Example #1
0
        internal override OneWayAnovaResult CreateAnova()
        {
            AnovaRow          fit      = new AnovaRow(SSF, 1);
            AnovaRow          residual = new AnovaRow(SSR, n - 2);
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            return(anova);
        }
Example #2
0
 internal OneWayAnovaResult(AnovaRow factor, AnovaRow residual, AnovaRow total)
 {
     Debug.Assert(factor != null);
     Debug.Assert(residual != null);
     Debug.Assert(total != null);
     Debug.Assert(factor.DegreesOfFreedom + residual.DegreesOfFreedom == total.DegreesOfFreedom);
     this.Factor   = new AnovaTestRow(factor.SumOfSquares, factor.DegreesOfFreedom, this);
     this.Residual = residual;
     this.Total    = total;
 }
        internal override OneWayAnovaResult CreateAnova()
        {
            // Use sums-of-squares to do ANOVA
            AnovaRow          fit      = new AnovaRow(SSF, m);
            AnovaRow          residual = new AnovaRow(SSR, n - (m + 1));
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            return(anova);
        }
Example #4
0
 internal TwoWayAnovaResult(AnovaRow row, AnovaRow column, AnovaRow interaction, AnovaRow residual)
 {
     Debug.Assert(row != null);
     Debug.Assert(column != null);
     Debug.Assert(interaction != null);
     Debug.Assert(residual != null);
     this.row         = row;
     this.column      = column;
     this.interaction = interaction;
     this.residual    = residual;
     this.total       = new AnovaRow(
         row.SumOfSquares + column.SumOfSquares + interaction.SumOfSquares + residual.SumOfSquares,
         row.DegreesOfFreedom + column.DegreesOfFreedom + interaction.DegreesOfFreedom + residual.DegreesOfFreedom
         );
 }
Example #5
0
 internal OneWayAnovaResult(AnovaRow factor, AnovaRow residual, AnovaRow total)
 {
     this.Factor   = new AnovaTestRow(factor.SumOfSquares, factor.DegreesOfFreedom, this);
     this.Residual = residual;
     this.Total    = total;
 }
        /// <summary>
        /// Computes the polynomial of given degree which best fits the data.
        /// </summary>
        /// <param name="m">The degree, which must be non-negative.</param>
        /// <returns>The fit result.</returns>
        /// <exception cref="ArgumentOutOfRangeException"><paramref name="m"/> is negative.</exception>
        /// <exception cref="InsufficientDataException">There are fewer data points than coefficients to be fit.</exception>
        public PolynomialRegressionResult PolynomialRegression(int m)
        {
            if (m < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(m));
            }

            int n = Count;

            if (n < (m + 1))
            {
                throw new InsufficientDataException();
            }

            // Construct the n X m design matrix X_{ij} = x_{i}^{j}
            RectangularMatrix X = new RectangularMatrix(n, m + 1);
            ColumnVector      y = new ColumnVector(n);

            for (int i = 0; i < n; i++)
            {
                double x = xData[i];
                X[i, 0] = 1.0;
                for (int j = 1; j <= m; j++)
                {
                    X[i, j] = X[i, j - 1] * x;
                }
                y[i] = yData[i];
            }

            // Use X = QR to solve X b = y and compute C
            ColumnVector    b;
            SymmetricMatrix C;

            QRDecomposition.SolveLinearSystem(X, y, out b, out C);

            // Compute residuals
            double       SSR       = 0.0;
            double       SSF       = 0.0;
            ColumnVector yHat      = X * b;
            Sample       residuals = new Sample();

            for (int i = 0; i < n; i++)
            {
                double z = yData[i] - yHat[i];
                residuals.Add(z);
                SSR += z * z;
                SSF += MoreMath.Sqr(yHat[i] - yData.Mean);
            }
            double sigma2 = SSR / (n - (m + 1));

            // Scale up C by \sigma^2
            // (It sure would be great to be able to overload *=.)
            for (int i = 0; i <= m; i++)
            {
                for (int j = i; j <= m; j++)
                {
                    C[i, j] = C[i, j] * sigma2;
                }
            }

            // Compute remaing sums-of-squares
            double SST = yData.Variance * n;

            // Use sums-of-squares to do ANOVA
            AnovaRow          fit      = new AnovaRow(SSF, m);
            AnovaRow          residual = new AnovaRow(SSR, n - (m + 1));
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            string[] names = new string[m + 1];
            names[0] = "1";
            if (m > 0)
            {
                names[1] = "x";
            }
            for (int i = 2; i <= m; i++)
            {
                names[i] = $"x^{i}";
            }
            ParameterCollection parameters = new ParameterCollection(names, b, C);

            return(new PolynomialRegressionResult(parameters, anova, residuals));
        }
        /// <summary>
        /// Computes the best-fit linear regression from the data.
        /// </summary>
        /// <returns>The result of the fit.</returns>
        /// <remarks>
        /// <para>Linear regression assumes that the data have been generated by a function y = a + b x + e, where e is
        /// normally distributed noise, and determines the values of a and b that best fit the data. It also
        /// determines an error matrix on the parameters a and b, and does an F-test to</para>
        /// <para>The fit result is two-dimensional. The first parameter is the intercept a, the second is the slope b.
        /// The goodness-of-fit test is a F-test comparing the variance accounted for by the model to the remaining,
        /// unexplained variance.</para>
        /// </remarks>
        /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception>
        public LinearRegressionResult LinearRegression()
        {
            int n = this.Count;

            if (n < 3)
            {
                throw new InsufficientDataException();
            }

            // The means and covariances are the inputs to most of the regression formulas.
            double mx  = xData.Mean;
            double my  = yData.Mean;
            double cxx = xData.Variance;
            double cyy = yData.Variance;
            double cxy = this.Covariance;

            Debug.Assert(cxx >= 0.0);
            Debug.Assert(cyy >= 0.0);

            // Compute the best-fit parameters
            double b = cxy / cxx;
            double a = my - b * mx;
            // Since cov(x,y) = (n S_xy - S_x S_y)/n^2 and var(x) = (n S_xx - S_x^2) / n^2,
            // these formulas are equivilent to the
            // to the usual formulas for a and b involving sums, but it is more stable against round-off
            ColumnVector v = new ColumnVector(a, b);

            v.IsReadOnly = true;

            // Compute Pearson r value
            double     r     = cxy / Math.Sqrt(cxx * cyy);
            TestResult rTest = new TestResult("r", r, TestType.TwoTailed, new Distributions.PearsonRDistribution(n));

            // Compute residuals and other sum-of-squares
            double SSR       = 0.0;
            double SSF       = 0.0;
            Sample residuals = new Sample();

            foreach (XY point in this)
            {
                double y = a + b * point.X;
                double z = point.Y - y;
                SSR += z * z;
                residuals.Add(z);
                SSF += MoreMath.Sqr(y - my);
            }
            double SST = cyy * n;
            // Note SST = SSF + SSR because \sum_{i} ( y_i - \bar{y})^2 = \sum_i (y_i - f_i)^2 + \sum_i (f_i - \bar{y})^2

            // Use sums-of-squares to do ANOVA
            AnovaRow          fit      = new AnovaRow(SSF, 1);
            AnovaRow          residual = new AnovaRow(SSR, n - 2);
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            // Compute covariance of parameters matrix
            double s2  = SSR / (n - 2);
            double cbb = s2 / cxx / n;
            double cab = -mx * cbb;
            double caa = (cxx + mx * mx) * cbb;

            SymmetricMatrix C = new SymmetricMatrix(2);

            C[0, 0]      = caa;
            C[1, 1]      = cbb;
            C[0, 1]      = cab;
            C.IsReadOnly = true;

            // Package the parameters
            ParameterCollection parameters = new ParameterCollection(
                new string[] { "Intercept", "Slope" }, v, C
                );

            // Prepare the prediction function
            Func <double, UncertainValue> predict = (double x) => {
                double y = a + b * x;
                return(new UncertainValue(y, Math.Sqrt(s2 * (1.0 + (1.0 + MoreMath.Sqr(x - mx) / cxx) / n))));
            };

            return(new LinearRegressionResult(parameters, rTest, anova, residuals, predict));
        }
        // the internal linear regression routine, which assumes inputs are entirely valid

        private MultiLinearRegressionResult LinearRegression_Internal(int outputIndex)
        {
            // To do a fit, we need more data than parameters.
            if (Count < Dimension)
            {
                throw new InsufficientDataException();
            }

            // Compute the design matrix X.
            int n = Count;
            int m = Dimension;
            RectangularMatrix X = new RectangularMatrix(n, m);
            ColumnVector      y = new ColumnVector(n);

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < m; j++)
                {
                    if (j == outputIndex)
                    {
                        X[i, j] = 1.0;
                    }
                    else
                    {
                        X[i, j] = storage[j][i];
                    }
                }
                y[i] = storage[outputIndex][i];
            }

            // Use X = QR to solve X b = y and compute C.
            ColumnVector    b;
            SymmetricMatrix C;

            QRDecomposition.SolveLinearSystem(X, y, out b, out C);

            // Compute residuals
            double       SSR       = 0.0;
            double       SSF       = 0.0;
            ColumnVector yHat      = X * b;
            Sample       residuals = new Sample();

            for (int i = 0; i < n; i++)
            {
                double z = storage[outputIndex][i] - yHat[i];
                residuals.Add(z);
                SSR += z * z;
                SSF += MoreMath.Sqr(yHat[i] - storage[outputIndex].Mean);
            }
            double sigma2 = SSR / (n - m);


            // Scale up C by \sigma^2
            // (It sure would be great to be able to overload *=.)
            for (int i = 0; i < m; i++)
            {
                for (int j = i; j < m; j++)
                {
                    C[i, j] = C[i, j] * sigma2;
                }
            }

            // Compute remaing sums-of-squares
            double SST = storage[outputIndex].Variance * n;

            // Use sums-of-squares to do ANOVA
            AnovaRow          fit      = new AnovaRow(SSF, m - 1);
            AnovaRow          residual = new AnovaRow(SSR, n - m);
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            string[] names = new string[m];
            for (int j = 0; j < m; j++)
            {
                if (j == outputIndex)
                {
                    names[j] = "Intercept";
                }
                else
                {
                    names[j] = $"[{j}]";
                }
            }
            ParameterCollection parameters = new ParameterCollection(names, b, C);

            return(new MultiLinearRegressionResult(parameters, anova, residuals));
        }
Example #9
0
 internal OneWayAnovaResult(AnovaRow factor, AnovaRow residual, AnovaRow total)
 {
     this.Factor = new AnovaTestRow(factor.SumOfSquares, factor.DegreesOfFreedom, this);
     this.Residual = residual;
     this.Total = total;
 }