Beispiel #1
0
        /// <summary>
        /// Performs a linear logistic regression analysis.
        /// </summary>
        /// <param name="outputIndex">The index of the column to predict.</param>
        /// <returns></returns>
        /// <remarks>Logistic linear regression is suited to situations where multiple input variables, either continuous or binary indicators, are used to predict
        /// the value of a binary output variable. Like a linear regression, a logistic linear regression tries to find a model that predicts the output variable using
        /// a linear combination of input variables. Unlike a simple linear regression, the model does not assume that this linear
        /// function predicts the output directly; instead it assumes that this function value is then fed into a logit link function, which
        /// maps the real numbers into the interval (0, 1), and interprets the value of this link function as the probability of obtaining success value
        /// for the output variable.</remarks>
        /// <exception cref="InvalidOperationException">The column to be predicted contains values other than 0 and 1.</exception>
        /// <exception cref="InsufficientDataException">There are not more rows in the sample than columns.</exception>
        public FitResult LogisticLinearRegression(int outputIndex)
        {
            if ((outputIndex < 0) || (outputIndex >= this.Dimension))
            {
                throw new ArgumentOutOfRangeException("outputIndex");
            }
            if (this.Count <= this.Dimension)
            {
                throw new InsufficientDataException();
            }

            // Define the log likelihood as a function of the parameter set
            Func <IList <double>, double> logLikelihood = (IList <double> a) => {
                double L = 0.0;
                for (int k = 0; k < this.Count; k++)
                {
                    double z = 0.0;
                    for (int i = 0; i < this.storage.Length; i++)
                    {
                        if (i == outputIndex)
                        {
                            z += a[i];
                        }
                        else
                        {
                            z += a[i] * this.storage[i][k];
                        }
                    }
                    double ez = Math.Exp(z);

                    double y = this.storage[outputIndex][k];
                    if (y == 0.0)
                    {
                        L -= Math.Log(1.0 + ez);
                    }
                    else if (y == 1.0)
                    {
                        L -= Math.Log(1.0 + 1.0 / ez);
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }
                }
                return(L);
            };

            double[] start = new double[this.Dimension];
            //for (int i = 0; i < start.Length; i++) {
            //    if (i != outputIndex) start[i] = this.TwoColumns(i, outputIndex).Covariance / this.Column(i).Variance / this.Column(outputIndex).Variance;
            //}

            MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logLikelihood, start);

            FitResult result = new FitResult(maximum.Location, maximum.HessianMatrix.CholeskyDecomposition().Inverse(), null);

            return(result);
        }
        /// <summary>
        /// Fits the data to an arbitrary parameterized function.
        /// </summary>
        /// <param name="function">The fit function.</param>
        /// <param name="start">An initial guess at the parameters.</param>
        /// <returns>A fit result containing the best-fitting function parameters
        /// and a &#x3C7;<sup>2</sup> test of the quality of the fit.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="function"/> or <paramref name="start"/> are <see langword="null"/>.</exception>
        /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public FitResult FitToFunction(Func <double[], T, double> function, double[] start)
        {
            if (function == null)
            {
                throw new ArgumentNullException(nameof(function));
            }
            if (start == null)
            {
                throw new ArgumentNullException(nameof(start));
            }

            // you can't do a fit with less data than parameters
            if (this.Count < start.Length)
            {
                throw new InsufficientDataException();
            }

            /*
             * Func<IList<double>, double> function0 = (IList<double> x0) => {
             *  double[] x = new double[x0.Count];
             *  x0.CopyTo(x, 0);
             *  return(function(x));
             * };
             * MultiExtremum minimum0 = MultiFunctionMath.FindMinimum(function0, start);
             */

            // create a chi^2 fit metric and minimize it
            FitMetric <T> metric  = new FitMetric <T>(this, function);
            SpaceExtremum minimum = FunctionMath.FindMinimum(new Func <double[], double>(metric.Evaluate), start);

            // compute the covariance (Hessian) matrix by inverting the curvature matrix
            SymmetricMatrix       A  = 0.5 * minimum.Curvature();
            CholeskyDecomposition CD = A.CholeskyDecomposition(); // should not return null if we were at a minimum

            if (CD == null)
            {
                throw new DivideByZeroException();
            }
            SymmetricMatrix C = CD.Inverse();

            // package up the results and return them
            TestResult test = new TestResult("ChiSquare", minimum.Value, TestType.RightTailed, new ChiSquaredDistribution(this.Count - minimum.Dimension));
            FitResult  fit  = new FitResult(minimum.Location(), C, test);

            return(fit);
        }
        /// <summary>
        /// Finds the parameterized function that best fits the data.
        /// </summary>
        /// <param name="f">The parameterized function.</param>
        /// <param name="start">An initial guess for the parameters.</param>
        /// <returns>The fit result.</returns>
        /// <remarks>
        /// <para>
        /// In the returned <see cref="FitResult"/>, the parameters appear in the same order as in
        /// the supplied fit function and initial guess vector. No goodness-of-fit test is returned.
        /// </para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="f"/> or <paramref name="start"/> is null.</exception>
        /// <exception cref="InsufficientDataException">There are not more data points than fit parameters.</exception>
        /// <exception cref="DivideByZeroException">The curvature matrix is singular, indicating that the data is independent of
        /// one or more parameters, or that two or more parameters are linearly dependent.</exception>
        public FitResult NonlinearRegression(Func <IList <double>, double, double> f, IList <double> start)
        {
            if (f == null)
            {
                throw new ArgumentNullException(nameof(f));
            }
            if (start == null)
            {
                throw new ArgumentNullException(nameof(start));
            }

            int n = this.Count;
            int d = start.Count;

            if (n <= d)
            {
                throw new InsufficientDataException();
            }

            MultiExtremum min = MultiFunctionMath.FindLocalMinimum((IList <double> a) => {
                double ss = 0.0;
                for (int i = 0; i < n; i++)
                {
                    double r = yData[i] - f(a, xData[i]);
                    ss      += r * r;
                }
                return(ss);
            }, start);

            CholeskyDecomposition cholesky = min.HessianMatrix.CholeskyDecomposition();

            if (cholesky == null)
            {
                throw new DivideByZeroException();
            }
            SymmetricMatrix curvature = cholesky.Inverse();

            curvature = (2.0 * min.Value / (n - d)) * curvature;

            FitResult result = new FitResult(min.Location, curvature, null);

            return(result);
        }
        /// <summary>
        /// Fits the data to a linear combination of fit functions.
        /// </summary>
        /// <param name="functions">The component functions.</param>
        /// <returns>A fit result containing the best-fit coefficients of the component functions and a &#x3C7;<sup>2</sup> test
        /// of the quality of the fit.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="functions"/> is null.</exception>
        /// <exception cref="InsufficientDataException">There are fewer data points than fit parameters.</exception>
        public FitResult FitToLinearFunction(Func <T, double>[] functions)
        {
            if (functions == null)
            {
                throw new ArgumentNullException(nameof(functions));
            }
            if (functions.Length > data.Count)
            {
                throw new InsufficientDataException();
            }

            // construct the design matrix
            RectangularMatrix A = new RectangularMatrix(data.Count, functions.Length);

            for (int r = 0; r < data.Count; r++)
            {
                for (int c = 0; c < functions.Length; c++)
                {
                    A[r, c] = functions[c](data[r].X) / data[r].Y.Uncertainty;
                }
            }

            // construct the right-hand-side
            ColumnVector b = new ColumnVector(data.Count);

            for (int r = 0; r < data.Count; r++)
            {
                b[r] = data[r].Y.Value / data[r].Y.Uncertainty;
            }

            // Solve the system via QR
            ColumnVector    a;
            SymmetricMatrix C;

            QRDecomposition.SolveLinearSystem(A, b, out a, out C);

            /*
             * // construct the data matrix
             * SymmetricMatrix A = new SymmetricMatrix(functions.Length);
             * for (int i = 0; i < A.Dimension; i++) {
             *  for (int j = 0; j <= i; j++) {
             *      double Aij = 0.0;
             *      for (int k = 0; k < data.Count; k++) {
             *          Aij += functions[i](data[k].X) * functions[j](data[k].X) / Math.Pow(data[k].Y.Uncertainty, 2);
             *      }
             *      A[i, j] = Aij;
             *  }
             * }
             *
             * // construct the rhs
             * double[] b = new double[functions.Length];
             * for (int i = 0; i < b.Length; i++) {
             *  b[i] = 0.0;
             *  for (int j = 0; j < data.Count; j++) {
             *      b[i] += data[j].Y.Value * functions[i](data[j].X) / Math.Pow(data[j].Y.Uncertainty, 2);
             *  }
             * }
             *
             * // solve the system
             * CholeskyDecomposition CD = A.CholeskyDecomposition();
             * if (CD == null) throw new InvalidOperationException();
             * Debug.Assert(CD != null);
             * SymmetricMatrix C = CD.Inverse();
             * ColumnVector a = CD.Solve(b);
             */

            // do a chi^2 test
            double chi2 = 0.0;

            for (int i = 0; i < data.Count; i++)
            {
                double f = 0.0;
                for (int j = 0; j < functions.Length; j++)
                {
                    f += functions[j](data[i].X) * a[j];
                }
                chi2 += Math.Pow((data[i].Y.Value - f) / data[i].Y.Uncertainty, 2);
            }
            TestResult test = new TestResult("ChiSquare", chi2, TestType.RightTailed, new ChiSquaredDistribution(data.Count - functions.Length));

            // return the results
            FitResult fit = new FitResult(a, C, test);

            return(fit);
        }
Beispiel #5
0
        /// <summary>
        /// Fits an MA(1) model to the time series.
        /// </summary>
        /// <returns>The fit with parameters lag-1 coefficient, mean, and standard deviation.</returns>
        public FitResult FitToMA1()
        {
            if (data.Count < 4)
            {
                throw new InsufficientDataException();
            }

            // MA(1) model is
            //   y_t - \mu = u_t + \beta u_{t-1}
            // where u_t ~ N(0, \sigma) are IID.

            // It's easy to show that
            //   m = E(y_t) = \mu
            //   c_0 = V(y_t) = E((y_t - m)^2) = (1 + \beta^2) \sigma^2
            //   c_1 = V(y_t, y_{t-1}) = \beta \sigma^2
            // So the method of moments gives
            //   \beta = \frac{1 - \sqrt{1 - (2 g_1)^2}}{2}
            //   \mu = m
            //   \sigma^2 = \frac{c_0}{1 + \beta^2}
            // It turns out these are very poor (high bias, high variance) estimators,
            // but they illustrate a basic requirement that g_1 < 1/2.

            // The MLE estimator

            int n = data.Count;

            double m = data.Mean;

            Func <double, double> fnc = (double theta) => {
                double s         = 0.0;
                double uPrevious = 0.0;
                for (int i = 0; i < data.Count; i++)
                {
                    double u = data[i] - m - theta * uPrevious;
                    s        += u * u;
                    uPrevious = u;
                }
                ;
                return(s);
            };

            Extremum minimum = FunctionMath.FindMinimum(fnc, Interval.FromEndpoints(-1.0, 1.0));

            double beta   = minimum.Location;
            double sigma2 = minimum.Value / (data.Count - 3);

            // While there is significant evidence that the MLE value for \beta is biased
            // for small-n, I know of no anlytic correction.

            double[] parameters = new double[] { beta, m, Math.Sqrt(sigma2) };

            // The calculation of the variance for \mu can be improved over the MLE
            // result by plugging the values for \gamma_0 and \gamma_1 into the
            // exact formula.

            SymmetricMatrix covariances = new SymmetricMatrix(3);

            if (minimum.Curvature > 0.0)
            {
                covariances[0, 0] = sigma2 / minimum.Curvature;
            }
            else
            {
                covariances[0, 0] = MoreMath.Sqr(1.0 - beta * beta) / n;
            }
            covariances[1, 1] = sigma2 * (MoreMath.Sqr(1.0 + beta) - 2.0 * beta / n) / n;
            covariances[2, 2] = sigma2 / 2.0 / n;

            TimeSeries residuals = new TimeSeries();
            double     u1        = 0.0;

            for (int i = 0; i < data.Count; i++)
            {
                double u0 = data[i] - m - beta * u1;
                residuals.Add(u0);
                u1 = u0;
            }
            ;
            TestResult test = residuals.LjungBoxTest();

            FitResult result = new FitResult(
                parameters, covariances, test
                );

            return(result);
        }