/// <summary> /// Fits the data to a linear combination of fit functions. /// </summary> /// <param name="functions">The component functions.</param> /// <returns>A fit result containing the best-fit coefficients of the component functions and a χ<sup>2</sup> test /// of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="functions"/> is null.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> public UncertainMeasurementFitResult FitToLinearFunction(Func <T, double>[] functions) { if (functions == null) { throw new ArgumentNullException(nameof(functions)); } if (functions.Length > data.Count) { throw new InsufficientDataException(); } // construct the design matrix RectangularMatrix A = new RectangularMatrix(data.Count, functions.Length); for (int r = 0; r < data.Count; r++) { for (int c = 0; c < functions.Length; c++) { A[r, c] = functions[c](data[r].X) / data[r].Y.Uncertainty; } } // construct the right-hand-side ColumnVector b = new ColumnVector(data.Count); for (int r = 0; r < data.Count; r++) { b[r] = data[r].Y.Value / data[r].Y.Uncertainty; } // Solve the system via QR ColumnVector a; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(A, b, out a, out C); // do a chi^2 test double chi2 = 0.0; for (int i = 0; i < data.Count; i++) { double f = 0.0; for (int j = 0; j < functions.Length; j++) { f += functions[j](data[i].X) * a[j]; } chi2 += Math.Pow((data[i].Y.Value - f) / data[i].Y.Uncertainty, 2); } TestResult test = new TestResult("χ²", chi2, new ChiSquaredDistribution(data.Count - functions.Length), TestType.RightTailed); // return the results string[] names = new string[functions.Length]; for (int j = 0; j < names.Length; j++) { names[j] = j.ToString(); } ParameterCollection parameters = new ParameterCollection(names, a, C); return(new UncertainMeasurementFitResult(parameters, test)); }
/// <summary> /// Computes the polynomial of given degree which best fits the data. /// </summary> /// <param name="m">The degree, which must be non-negative.</param> /// <returns>The fit result.</returns> /// <exception cref="ArgumentOutOfRangeException"><paramref name="m"/> is negative.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than coefficients to be fit.</exception> public FitResult PolynomialRegression(int m) { if (m < 0) { throw new ArgumentOutOfRangeException("m"); } int n = Count; if (n < m + 1) { throw new InsufficientDataException(); } // Construct the n X m design matrix A_{ij} = x_{i}^{j} RectangularMatrix A = new RectangularMatrix(n, m + 1); ColumnVector y = new ColumnVector(n); for (int i = 0; i < n; i++) { double x = xData[i]; A[i, 0] = 1.0; for (int j = 1; j <= m; j++) { A[i, j] = A[i, j - 1] * x; } y[i] = yData[i]; } ColumnVector a; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(A, y, out a, out C); // Compute the residual vector r and s^2 = r^2 / dof ColumnVector r = A * a - y; double V = r.Transpose() * r; double ss2 = V / (n - (m + 1)); // Scale up the covariance by s^2 for (int i = 0; i <= m; i++) { for (int j = i; j <= m; j++) { C[i, j] = C[i, j] * ss2; } } // compute F-statistic // total variance dof = n - 1, explained variance dof = m, unexplained variance dof = n - (m + 1) double totalVarianceSum = yData.Variance * n; double unexplainedVarianceSum = V; double explainedVarianceSum = totalVarianceSum - unexplainedVarianceSum; double unexplainedVarianceDof = n - (m + 1); double explainedVarianceDof = m; double F = (explainedVarianceSum / explainedVarianceDof) / (unexplainedVarianceSum / unexplainedVarianceDof); TestResult test = new TestResult("F", F, TestType.RightTailed, new FisherDistribution(explainedVarianceDof, unexplainedVarianceDof)); return(new FitResult(a, C, test)); }
/// <summary> /// Fits the data to a linear combination of fit functions. /// </summary> /// <param name="functions">The component functions.</param> /// <returns>A fit result containing the best-fit coefficients of the component functions and a χ<sup>2</sup> test /// of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="functions"/> is null.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> public FitResult FitToLinearFunction(Func <T, double>[] functions) { if (functions == null) { throw new ArgumentNullException(nameof(functions)); } if (functions.Length > data.Count) { throw new InsufficientDataException(); } // construct the design matrix RectangularMatrix A = new RectangularMatrix(data.Count, functions.Length); for (int r = 0; r < data.Count; r++) { for (int c = 0; c < functions.Length; c++) { A[r, c] = functions[c](data[r].X) / data[r].Y.Uncertainty; } } // construct the right-hand-side ColumnVector b = new ColumnVector(data.Count); for (int r = 0; r < data.Count; r++) { b[r] = data[r].Y.Value / data[r].Y.Uncertainty; } // Solve the system via QR ColumnVector a; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(A, b, out a, out C); /* * // construct the data matrix * SymmetricMatrix A = new SymmetricMatrix(functions.Length); * for (int i = 0; i < A.Dimension; i++) { * for (int j = 0; j <= i; j++) { * double Aij = 0.0; * for (int k = 0; k < data.Count; k++) { * Aij += functions[i](data[k].X) * functions[j](data[k].X) / Math.Pow(data[k].Y.Uncertainty, 2); * } * A[i, j] = Aij; * } * } * * // construct the rhs * double[] b = new double[functions.Length]; * for (int i = 0; i < b.Length; i++) { * b[i] = 0.0; * for (int j = 0; j < data.Count; j++) { * b[i] += data[j].Y.Value * functions[i](data[j].X) / Math.Pow(data[j].Y.Uncertainty, 2); * } * } * * // solve the system * CholeskyDecomposition CD = A.CholeskyDecomposition(); * if (CD == null) throw new InvalidOperationException(); * Debug.Assert(CD != null); * SymmetricMatrix C = CD.Inverse(); * ColumnVector a = CD.Solve(b); */ // do a chi^2 test double chi2 = 0.0; for (int i = 0; i < data.Count; i++) { double f = 0.0; for (int j = 0; j < functions.Length; j++) { f += functions[j](data[i].X) * a[j]; } chi2 += Math.Pow((data[i].Y.Value - f) / data[i].Y.Uncertainty, 2); } TestResult test = new TestResult("ChiSquare", chi2, TestType.RightTailed, new ChiSquaredDistribution(data.Count - functions.Length)); // return the results FitResult fit = new FitResult(a, C, test); return(fit); }
internal MultiLinearRegressionResult(IReadOnlyList <double> yColumn, IReadOnlyList <IReadOnlyList <double> > xColumns, IReadOnlyList <string> xNames) : base() { Debug.Assert(yColumn != null); Debug.Assert(xColumns != null); Debug.Assert(xColumns.Count > 0); Debug.Assert(xNames.Count == xColumns.Count); n = yColumn.Count; m = xColumns.Count; if (n <= m) { throw new InsufficientDataException(); } // Compute the design matrix X. interceptIndex = -1; RectangularMatrix X = new RectangularMatrix(n, m); for (int c = 0; c < m; c++) { IReadOnlyList <double> xColumn = xColumns[c]; if (xColumn == null) { Debug.Assert(xNames[c] == "Intercept"); Debug.Assert(interceptIndex < 0); for (int r = 0; r < n; r++) { X[r, c] = 1.0; } interceptIndex = c; } else { Debug.Assert(xNames[c] != null); if (xColumn.Count != n) { throw new DimensionMismatchException(); } for (int r = 0; r < n; r++) { X[r, c] = xColumn[r]; } } } Debug.Assert(interceptIndex >= 0); ColumnVector v = new ColumnVector(yColumn); // Use X = QR to solve X b = y and compute C. QRDecomposition.SolveLinearSystem(X, v, out b, out C); // For ANOVA, we will need mean and variance of y int yn; double ym; Univariate.ComputeMomentsUpToSecond(yColumn, out yn, out ym, out SST); // Compute residuals SSR = 0.0; SSF = 0.0; ColumnVector yHat = X * b; residuals = new List <double>(n); for (int i = 0; i < n; i++) { double z = yColumn[i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - ym); } sigma2 = SSR / (n - m); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i < m; i++) { for (int j = i; j < m; j++) { C[i, j] = C[i, j] * sigma2; } } names = xNames; }
internal PolynomialRegressionResult(IReadOnlyList <double> x, IReadOnlyList <double> y, int degree) : base() { Debug.Assert(x != null); Debug.Assert(y != null); Debug.Assert(x.Count == y.Count); Debug.Assert(degree >= 0); m = degree; n = x.Count; if (n < (m + 1)) { throw new InsufficientDataException(); } // Construct the n X m design matrix X_{ij} = x_{i}^{j} RectangularMatrix X = new RectangularMatrix(n, m + 1); ColumnVector Y = new ColumnVector(n); for (int i = 0; i < n; i++) { double x_i = x[i]; X[i, 0] = 1.0; for (int j = 1; j <= m; j++) { X[i, j] = X[i, j - 1] * x_i; } double y_i = y[i]; Y[i] = y_i; } // Use X = QR to solve X b = y and compute C QRDecomposition.SolveLinearSystem(X, Y, out b, out C); // Compute mean and total sum of squares. // This could be done inside loop above, but this way we get to re-use code from Univariate. double yMean; Univariate.ComputeMomentsUpToSecond(y, out n, out yMean, out SST); // Compute residuals SSR = 0.0; SSF = 0.0; ColumnVector yHat = X * b; residuals = new List <double>(n); for (int i = 0; i < n; i++) { double z = y[i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - yMean); } sigma2 = SSR / (n - (m + 1)); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i <= m; i++) { for (int j = i; j <= m; j++) { C[i, j] = C[i, j] * sigma2; } } }
/// <summary> /// Computes the polynomial of given degree which best fits the data. /// </summary> /// <param name="m">The degree, which must be non-negative.</param> /// <returns>The fit result.</returns> /// <exception cref="ArgumentOutOfRangeException"><paramref name="m"/> is negative.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than coefficients to be fit.</exception> public PolynomialRegressionResult PolynomialRegression(int m) { if (m < 0) { throw new ArgumentOutOfRangeException(nameof(m)); } int n = Count; if (n < (m + 1)) { throw new InsufficientDataException(); } // Construct the n X m design matrix X_{ij} = x_{i}^{j} RectangularMatrix X = new RectangularMatrix(n, m + 1); ColumnVector y = new ColumnVector(n); for (int i = 0; i < n; i++) { double x = xData[i]; X[i, 0] = 1.0; for (int j = 1; j <= m; j++) { X[i, j] = X[i, j - 1] * x; } y[i] = yData[i]; } // Use X = QR to solve X b = y and compute C ColumnVector b; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(X, y, out b, out C); // Compute residuals double SSR = 0.0; double SSF = 0.0; ColumnVector yHat = X * b; Sample residuals = new Sample(); for (int i = 0; i < n; i++) { double z = yData[i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - yData.Mean); } double sigma2 = SSR / (n - (m + 1)); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i <= m; i++) { for (int j = i; j <= m; j++) { C[i, j] = C[i, j] * sigma2; } } // Compute remaing sums-of-squares double SST = yData.Variance * n; // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, m); AnovaRow residual = new AnovaRow(SSR, n - (m + 1)); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); string[] names = new string[m + 1]; names[0] = "1"; if (m > 0) { names[1] = "x"; } for (int i = 2; i <= m; i++) { names[i] = $"x^{i}"; } ParameterCollection parameters = new ParameterCollection(names, b, C); return(new PolynomialRegressionResult(parameters, anova, residuals)); }
// the internal linear regression routine, which assumes inputs are entirely valid private MultiLinearRegressionResult LinearRegression_Internal(int outputIndex) { // To do a fit, we need more data than parameters. if (Count < Dimension) { throw new InsufficientDataException(); } // Compute the design matrix X. int n = Count; int m = Dimension; RectangularMatrix X = new RectangularMatrix(n, m); ColumnVector y = new ColumnVector(n); for (int i = 0; i < n; i++) { for (int j = 0; j < m; j++) { if (j == outputIndex) { X[i, j] = 1.0; } else { X[i, j] = storage[j][i]; } } y[i] = storage[outputIndex][i]; } // Use X = QR to solve X b = y and compute C. ColumnVector b; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(X, y, out b, out C); // Compute residuals double SSR = 0.0; double SSF = 0.0; ColumnVector yHat = X * b; Sample residuals = new Sample(); for (int i = 0; i < n; i++) { double z = storage[outputIndex][i] - yHat[i]; residuals.Add(z); SSR += z * z; SSF += MoreMath.Sqr(yHat[i] - storage[outputIndex].Mean); } double sigma2 = SSR / (n - m); // Scale up C by \sigma^2 // (It sure would be great to be able to overload *=.) for (int i = 0; i < m; i++) { for (int j = i; j < m; j++) { C[i, j] = C[i, j] * sigma2; } } // Compute remaing sums-of-squares double SST = storage[outputIndex].Variance * n; // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, m - 1); AnovaRow residual = new AnovaRow(SSR, n - m); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); string[] names = new string[m]; for (int j = 0; j < m; j++) { if (j == outputIndex) { names[j] = "Intercept"; } else { names[j] = $"[{j}]"; } } ParameterCollection parameters = new ParameterCollection(names, b, C); return(new MultiLinearRegressionResult(parameters, anova, residuals)); }