internal RegressionResult(ParameterCollection parameters, OneWayAnovaResult anova, Sample residuals) { Debug.Assert(parameters != null); Debug.Assert(anova != null); Debug.Assert(residuals != null); this.parameters = parameters; this.anova = anova; this.residuals = residuals; }
internal override OneWayAnovaResult CreateAnova() { AnovaRow fit = new AnovaRow(SSF, 1); AnovaRow residual = new AnovaRow(SSR, n - 2); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); return(anova); }
internal override OneWayAnovaResult CreateAnova() { // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, m); AnovaRow residual = new AnovaRow(SSR, n - (m + 1)); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); return(anova); }
internal LinearRegressionResult( ParameterCollection parameters, TestResult rTest, OneWayAnovaResult anova, Sample residuals, Func <double, UncertainValue> predict ) : base(parameters, anova, residuals) { this.rTest = rTest; this.predict = predict; }
internal PolynomialRegressionResult(ParameterCollection parameters, OneWayAnovaResult anova, Sample residuals) : base(parameters, anova, residuals) { }
internal MultiLinearRegressionResult(ParameterCollection parameters, OneWayAnovaResult anova, Sample residuals) : base(parameters, anova, residuals) { }
internal AnovaTestRow(double sumOfSquares, int degreesOfFreedom, OneWayAnovaResult result) : base(sumOfSquares, degreesOfFreedom) { this.result = result; }
/// <summary> /// Computes the polynomial of given degree which best fits the data. /// </summary> /// <param name="m">The degree, which must be non-negative.</param> /// <returns>The fit result.</returns> /// <exception cref="ArgumentOutOfRangeException"><paramref name="m"/> is negative.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than coefficients to be fit.</exception> public PolynomialRegressionResult PolynomialRegression(int m) { if (m < 0) { throw new ArgumentOutOfRangeException(nameof(m)); } int n = Count; if (n < (m + 1)) { throw new InsufficientDataException(); } // Construct the n X m design matrix X_{ij} = x_{i}^{j} RectangularMatrix X = new RectangularMatrix(n, m + 1); ColumnVector y = new ColumnVector(n); for (int i = 0; i < n; i++) { double x = xData[i]; X[i, 0] = 1.0; for (int j = 1; j <= m; j++) { X[i, j] = X[i, j - 1] * x; } y[i] = yData[i]; } // Use X = QR to solve X b = y and compute C ColumnVector b; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(X, y, out b, out C); // Compute residuals double SSR = 0.0; double SSF = 0.0; ColumnVector yHat = X * b; Sample residuals = new Sample(); for (int i = 0; i < n; i++) { double z = yData[i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - yData.Mean); } double sigma2 = SSR / (n - (m + 1)); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i <= m; i++) { for (int j = i; j <= m; j++) { C[i, j] = C[i, j] * sigma2; } } // Compute remaing sums-of-squares double SST = yData.Variance * n; // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, m); AnovaRow residual = new AnovaRow(SSR, n - (m + 1)); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); string[] names = new string[m + 1]; names[0] = "1"; if (m > 0) { names[1] = "x"; } for (int i = 2; i <= m; i++) { names[i] = $"x^{i}"; } ParameterCollection parameters = new ParameterCollection(names, b, C); return(new PolynomialRegressionResult(parameters, anova, residuals)); }
/// <summary> /// Computes the best-fit linear regression from the data. /// </summary> /// <returns>The result of the fit.</returns> /// <remarks> /// <para>Linear regression assumes that the data have been generated by a function y = a + b x + e, where e is /// normally distributed noise, and determines the values of a and b that best fit the data. It also /// determines an error matrix on the parameters a and b, and does an F-test to</para> /// <para>The fit result is two-dimensional. The first parameter is the intercept a, the second is the slope b. /// The goodness-of-fit test is a F-test comparing the variance accounted for by the model to the remaining, /// unexplained variance.</para> /// </remarks> /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception> public LinearRegressionResult LinearRegression() { int n = this.Count; if (n < 3) { throw new InsufficientDataException(); } // The means and covariances are the inputs to most of the regression formulas. double mx = xData.Mean; double my = yData.Mean; double cxx = xData.Variance; double cyy = yData.Variance; double cxy = this.Covariance; Debug.Assert(cxx >= 0.0); Debug.Assert(cyy >= 0.0); // Compute the best-fit parameters double b = cxy / cxx; double a = my - b * mx; // Since cov(x,y) = (n S_xy - S_x S_y)/n^2 and var(x) = (n S_xx - S_x^2) / n^2, // these formulas are equivilent to the // to the usual formulas for a and b involving sums, but it is more stable against round-off ColumnVector v = new ColumnVector(a, b); v.IsReadOnly = true; // Compute Pearson r value double r = cxy / Math.Sqrt(cxx * cyy); TestResult rTest = new TestResult("r", r, TestType.TwoTailed, new Distributions.PearsonRDistribution(n)); // Compute residuals and other sum-of-squares double SSR = 0.0; double SSF = 0.0; Sample residuals = new Sample(); foreach (XY point in this) { double y = a + b * point.X; double z = point.Y - y; SSR += z * z; residuals.Add(z); SSF += MoreMath.Sqr(y - my); } double SST = cyy * n; // Note SST = SSF + SSR because \sum_{i} ( y_i - \bar{y})^2 = \sum_i (y_i - f_i)^2 + \sum_i (f_i - \bar{y})^2 // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, 1); AnovaRow residual = new AnovaRow(SSR, n - 2); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); // Compute covariance of parameters matrix double s2 = SSR / (n - 2); double cbb = s2 / cxx / n; double cab = -mx * cbb; double caa = (cxx + mx * mx) * cbb; SymmetricMatrix C = new SymmetricMatrix(2); C[0, 0] = caa; C[1, 1] = cbb; C[0, 1] = cab; C.IsReadOnly = true; // Package the parameters ParameterCollection parameters = new ParameterCollection( new string[] { "Intercept", "Slope" }, v, C ); // Prepare the prediction function Func <double, UncertainValue> predict = (double x) => { double y = a + b * x; return(new UncertainValue(y, Math.Sqrt(s2 * (1.0 + (1.0 + MoreMath.Sqr(x - mx) / cxx) / n)))); }; return(new LinearRegressionResult(parameters, rTest, anova, residuals, predict)); }
// the internal linear regression routine, which assumes inputs are entirely valid private MultiLinearRegressionResult LinearRegression_Internal(int outputIndex) { // To do a fit, we need more data than parameters. if (Count < Dimension) { throw new InsufficientDataException(); } // Compute the design matrix X. int n = Count; int m = Dimension; RectangularMatrix X = new RectangularMatrix(n, m); ColumnVector y = new ColumnVector(n); for (int i = 0; i < n; i++) { for (int j = 0; j < m; j++) { if (j == outputIndex) { X[i, j] = 1.0; } else { X[i, j] = storage[j][i]; } } y[i] = storage[outputIndex][i]; } // Use X = QR to solve X b = y and compute C. ColumnVector b; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(X, y, out b, out C); // Compute residuals double SSR = 0.0; double SSF = 0.0; ColumnVector yHat = X * b; Sample residuals = new Sample(); for (int i = 0; i < n; i++) { double z = storage[outputIndex][i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - storage[outputIndex].Mean); } double sigma2 = SSR / (n - m); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i < m; i++) { for (int j = i; j < m; j++) { C[i, j] = C[i, j] * sigma2; } } // Compute remaing sums-of-squares double SST = storage[outputIndex].Variance * n; // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, m - 1); AnovaRow residual = new AnovaRow(SSR, n - m); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); string[] names = new string[m]; for (int j = 0; j < m; j++) { if (j == outputIndex) { names[j] = "Intercept"; } else { names[j] = $"[{j}]"; } } ParameterCollection parameters = new ParameterCollection(names, b, C); return(new MultiLinearRegressionResult(parameters, anova, residuals)); }