/// <summary> /// Performs a linear logistic regression analysis. /// </summary> /// <param name="outputIndex">The index of the column to predict.</param> /// <returns></returns> /// <remarks>Logistic linear regression is suited to situations where multiple input variables, either continuous or binary indicators, are used to predict /// the value of a binary output variable. Like a linear regression, a logistic linear regression tries to find a model that predicts the output variable using /// a linear combination of input variables. Unlike a simple linear regression, the model does not assume that this linear /// function predicts the output directly; instead it assumes that this function value is then fed into a logit link function, which /// maps the real numbers into the interval (0, 1), and interprets the value of this link function as the probability of obtaining success value /// for the output variable.</remarks> /// <exception cref="InvalidOperationException">The column to be predicted contains values other than 0 and 1.</exception> /// <exception cref="InsufficientDataException">There are not more rows in the sample than columns.</exception> public FitResult LogisticLinearRegression(int outputIndex) { if ((outputIndex < 0) || (outputIndex >= this.Dimension)) { throw new ArgumentOutOfRangeException("outputIndex"); } if (this.Count <= this.Dimension) { throw new InsufficientDataException(); } // Define the log likelihood as a function of the parameter set Func <IList <double>, double> logLikelihood = (IList <double> a) => { double L = 0.0; for (int k = 0; k < this.Count; k++) { double z = 0.0; for (int i = 0; i < this.storage.Length; i++) { if (i == outputIndex) { z += a[i]; } else { z += a[i] * this.storage[i][k]; } } double ez = Math.Exp(z); double y = this.storage[outputIndex][k]; if (y == 0.0) { L -= Math.Log(1.0 + ez); } else if (y == 1.0) { L -= Math.Log(1.0 + 1.0 / ez); } else { throw new InvalidOperationException(); } } return(L); }; double[] start = new double[this.Dimension]; //for (int i = 0; i < start.Length; i++) { // if (i != outputIndex) start[i] = this.TwoColumns(i, outputIndex).Covariance / this.Column(i).Variance / this.Column(outputIndex).Variance; //} MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start); FitResult result = new FitResult(maximum.Location, maximum.HessianMatrix.CholeskyDecomposition().Inverse(), null); return(result); }
/// <summary> /// Fits the data to an arbitrary parameterized function. /// </summary> /// <param name="function">The fit function.</param> /// <param name="start">An initial guess at the parameters.</param> /// <returns>A fit result containing the best-fitting function parameters /// and a χ<sup>2</sup> test of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public FitResult FitToFunction(Func <double[], T, double> function, double[] start) { if (function == null) { throw new ArgumentNullException(nameof(function)); } if (start == null) { throw new ArgumentNullException(nameof(start)); } // you can't do a fit with less data than parameters if (this.Count < start.Length) { throw new InsufficientDataException(); } /* * Func<IList<double>, double> function0 = (IList<double> x0) => { * double[] x = new double[x0.Count]; * x0.CopyTo(x, 0); * return(function(x)); * }; * MultiExtremum minimum0 = MultiFunctionMath.FindMinimum(function0, start); */ // create a chi^2 fit metric and minimize it FitMetric <T> metric = new FitMetric <T>(this, function); SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start); // compute the covariance (Hessian) matrix by inverting the curvature matrix SymmetricMatrix A = 0.5 * minimum.Curvature(); CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum if (CD == null) { throw new DivideByZeroException(); } SymmetricMatrix C = CD.Inverse(); // package up the results and return them TestResult test = new TestResult("ChiSquare", minimum.Value, TestType.RightTailed, new ChiSquaredDistribution(this.Count - minimum.Dimension)); FitResult fit = new FitResult(minimum.Location(), C, test); return(fit); }
/// <summary> /// Finds the parameterized function that best fits the data. /// </summary> /// <param name="f">The parameterized function.</param> /// <param name="start">An initial guess for the parameters.</param> /// <returns>The fit result.</returns> /// <remarks> /// <para> /// In the returned <see cref="FitResult"/>, the parameters appear in the same order as in /// the supplied fit function and initial guess vector. No goodness-of-fit test is returned. /// </para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="f"/> or <paramref name="start"/> is null.</exception> /// <exception cref="InsufficientDataException">There are not more data points than fit parameters.</exception> /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of /// one or more parameters, or that two or more parameters are linearly dependent.</exception> public FitResult NonlinearRegression(Func <IList <double>, double, double> f, IList <double> start) { if (f == null) { throw new ArgumentNullException(nameof(f)); } if (start == null) { throw new ArgumentNullException(nameof(start)); } int n = this.Count; int d = start.Count; if (n <= d) { throw new InsufficientDataException(); } MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IList <double> a) => { double ss = 0.0; for (int i = 0; i < n; i++) { double r = yData[i] - f(a, xData[i]); ss += r * r; } return(ss); }, start); CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition(); if (cholesky == null) { throw new DivideByZeroException(); } SymmetricMatrix curvature = cholesky.Inverse(); curvature = (2.0 * min.Value / (n - d)) * curvature; FitResult result = new FitResult(min.Location, curvature, null); return(result); }
/// <summary> /// Fits the data to a linear combination of fit functions. /// </summary> /// <param name="functions">The component functions.</param> /// <returns>A fit result containing the best-fit coefficients of the component functions and a χ<sup>2</sup> test /// of the quality of the fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="functions"/> is null.</exception> /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception> public FitResult FitToLinearFunction(Func <T, double>[] functions) { if (functions == null) { throw new ArgumentNullException(nameof(functions)); } if (functions.Length > data.Count) { throw new InsufficientDataException(); } // construct the design matrix RectangularMatrix A = new RectangularMatrix(data.Count, functions.Length); for (int r = 0; r < data.Count; r++) { for (int c = 0; c < functions.Length; c++) { A[r, c] = functions[c](data[r].X) / data[r].Y.Uncertainty; } } // construct the right-hand-side ColumnVector b = new ColumnVector(data.Count); for (int r = 0; r < data.Count; r++) { b[r] = data[r].Y.Value / data[r].Y.Uncertainty; } // Solve the system via QR ColumnVector a; SymmetricMatrix C; QRDecomposition.SolveLinearSystem(A, b, out a, out C); /* * // construct the data matrix * SymmetricMatrix A = new SymmetricMatrix(functions.Length); * for (int i = 0; i < A.Dimension; i++) { * for (int j = 0; j <= i; j++) { * double Aij = 0.0; * for (int k = 0; k < data.Count; k++) { * Aij += functions[i](data[k].X) * functions[j](data[k].X) / Math.Pow(data[k].Y.Uncertainty, 2); * } * A[i, j] = Aij; * } * } * * // construct the rhs * double[] b = new double[functions.Length]; * for (int i = 0; i < b.Length; i++) { * b[i] = 0.0; * for (int j = 0; j < data.Count; j++) { * b[i] += data[j].Y.Value * functions[i](data[j].X) / Math.Pow(data[j].Y.Uncertainty, 2); * } * } * * // solve the system * CholeskyDecomposition CD = A.CholeskyDecomposition(); * if (CD == null) throw new InvalidOperationException(); * Debug.Assert(CD != null); * SymmetricMatrix C = CD.Inverse(); * ColumnVector a = CD.Solve(b); */ // do a chi^2 test double chi2 = 0.0; for (int i = 0; i < data.Count; i++) { double f = 0.0; for (int j = 0; j < functions.Length; j++) { f += functions[j](data[i].X) * a[j]; } chi2 += Math.Pow((data[i].Y.Value - f) / data[i].Y.Uncertainty, 2); } TestResult test = new TestResult("ChiSquare", chi2, TestType.RightTailed, new ChiSquaredDistribution(data.Count - functions.Length)); // return the results FitResult fit = new FitResult(a, C, test); return(fit); }
/// <summary> /// Fits an MA(1) model to the time series. /// </summary> /// <returns>The fit with parameters lag-1 coefficient, mean, and standard deviation.</returns> public FitResult FitToMA1() { if (data.Count < 4) { throw new InsufficientDataException(); } // MA(1) model is // y_t - \mu = u_t + \beta u_{t-1} // where u_t ~ N(0, \sigma) are IID. // It's easy to show that // m = E(y_t) = \mu // c_0 = V(y_t) = E((y_t - m)^2) = (1 + \beta^2) \sigma^2 // c_1 = V(y_t, y_{t-1}) = \beta \sigma^2 // So the method of moments gives // \beta = \frac{1 - \sqrt{1 - (2 g_1)^2}}{2} // \mu = m // \sigma^2 = \frac{c_0}{1 + \beta^2} // It turns out these are very poor (high bias, high variance) estimators, // but they illustrate a basic requirement that g_1 < 1/2. // The MLE estimator int n = data.Count; double m = data.Mean; Func <double, double> fnc = (double theta) => { double s = 0.0; double uPrevious = 0.0; for (int i = 0; i < data.Count; i++) { double u = data[i] - m - theta * uPrevious; s += u * u; uPrevious = u; } ; return(s); }; Extremum minimum = FunctionMath.FindMinimum(fnc, Interval.FromEndpoints(-1.0, 1.0)); double beta = minimum.Location; double sigma2 = minimum.Value / (data.Count - 3); // While there is significant evidence that the MLE value for \beta is biased // for small-n, I know of no anlytic correction. double[] parameters = new double[] { beta, m, Math.Sqrt(sigma2) }; // The calculation of the variance for \mu can be improved over the MLE // result by plugging the values for \gamma_0 and \gamma_1 into the // exact formula. SymmetricMatrix covariances = new SymmetricMatrix(3); if (minimum.Curvature > 0.0) { covariances[0, 0] = sigma2 / minimum.Curvature; } else { covariances[0, 0] = MoreMath.Sqr(1.0 - beta * beta) / n; } covariances[1, 1] = sigma2 * (MoreMath.Sqr(1.0 + beta) - 2.0 * beta / n) / n; covariances[2, 2] = sigma2 / 2.0 / n; TimeSeries residuals = new TimeSeries(); double u1 = 0.0; for (int i = 0; i < data.Count; i++) { double u0 = data[i] - m - beta * u1; residuals.Add(u0); u1 = u0; } ; TestResult test = residuals.LjungBoxTest(); FitResult result = new FitResult( parameters, covariances, test ); return(result); }