예제 #1
0
        /// <summary>
        /// Fits an AR(1) model to the time series.
        /// </summary>
        /// <returns>The fit with parameters lag-1 coefficient, mean, and standard deviation.</returns>
        public FitResult FitToAR1()
        {
            // AR1 model is
            //   (x_t - \mu) = \alpha (x_{t-1} - \mu) + u_{t}
            // where u_{t} \sim N(0, \sigma) are IID

            // It's easy to show
            //   m = E(x_t) = \mu
            //   c_0 = V(x_t) = E((x_t - m)^2) = \frac{\sigma^2}{1 - \alpha^2}
            //   c_1 = V(x_t, x_t-1) = E((x_t - m)(x_{t-1} - m)) = \alpha c_0
            // which gives a way to get paramters via the method of moments. In particular,
            //   \alpha = c_1 / c_0

            // For maximum likelyhood estimation (MLE), we need
            //   \log L = -\frac{1}{2} \sum_i \left[ \log (2 \pi \sigma^2)
            //            + \left[\frac{(x_i - \mu) - \alpha (x_{i-1} - \mu)}{\sigma}\right]^2

            // Treatment of the 1st value a bit subtle. We could treat it as normally distributed as
            // per m and c_0, but then it enters differently than all other values, which significantly
            // complicates the equations. Or we could regard it as given and compute log likelihood
            // conditional on it; then all values enter in the same way, but sum begins with the second
            // value. We do the latter, which is called the "conditional MLE" in the literature.

            // Differentiating once
            //   \frac{\partial L}{\partial \alpha} = \frac{1}{\sigma^2} \sum_i ( x_i - \mu - \alpha x_{i-1} ) x_{i-1}
            //   \frac{\partial L}{\partial \mu} = \frac{1}{\sigma^2} \sum_i ( x_i - \mu - \alpha x_{i-1} )
            //   \frac{\partial L}{\partial \sigma} = \sum_i \left[ \frac{(x_i - \mu - \alpha x_{i-1})^2}{\sigma^3} - \frac{1}{\sigma} \right]
            // Set equal to zero to get equations for \mu, \alpha, \sigma. First two give a 2X2 system
            // that can be solved for \mu and \alpha, then thrid for \sigma. The third equation just says
            //   \sum_i (x_i - \mu - \alpha x_{i-1})^2 = \sum_i \sigma^2
            // that \sigma is the rms of residuals. If we play a little fast and loose with index ranges
            // (e.g. ingoring difference between quantities computed over first n-1 and last n-1 values),
            // then the other two give the same results as from the method of moments.

            // Differentiating twice
            //   \frac{\partial^2 L}{\partial \alpha^2} = \frac{-1}{\sigma^2} \sum_i x_{i-1} x_{i-1} = \frac{-n (c_0 + m^2)}{\sigma^2}
            //   \frac{\partial^2 L}{\partial \mu^2} = \frac{-1}{\sigma^2} \sum_i 1 = \frac{-n}{\sigma^2}
            //   \frac{\partial^2 L}{\partial \sigma^2} = \frac{-2 n}{\sigma^2}
            //  Mixed derivatives vanish because of the first derivative conditions.

            if (data.Count < 4)
            {
                throw new InsufficientDataException();
            }

            int n = data.Count;

            // compute mean, variance, lag-1 autocorrelation

            double m = data.Mean;

            double c0 = 0.0;

            for (int i = 1; i < data.Count; i++)
            {
                c0 += MoreMath.Sqr(data[i] - m);
            }

            double c1 = 0.0;

            for (int i = 1; i < data.Count; i++)
            {
                c1 += (data[i] - m) * (data[i - 1] - m);
            }

            double alpha = c1 / c0;

            // This expression for alpha is guaranteed to be asymptotically unbiased by MLE,
            // but it is known to be biased at finite n, and in fact the bias is known.

            // See http://www.alexchinco.com/bias-in-time-series-regressions/ for simulations and explanation.
            // He cites Kendall, "Note on Bias in the Estimation of Autocorrelation", Biometrika (1954) 41 (3-4) 403-404

            // See Shaman and Stine, "The Bias of Autogregressive Coefficient Estimators",
            // Journal of the American Statistical Association (1988) Vol. 83, No. 403, pp. 842-848
            // (http://www-stat.wharton.upenn.edu/~steele/Courses/956/Resource/YWSourceFiles/ShamanStine88.pdf)
            // for derivation and formulas for AR(1)-AR(6).

            // For AR(1), MLE systematically underestimates alpha:
            //   \hat{\alpha} = \alpha - \frac{1 + 3 \alpha}{n}
            // I have confrimed the accuracy of this formula via my own simulations.

            alpha = alpha + (1.0 + 3.0 * alpha) / n;

            double     sigma2    = 0.0;
            TimeSeries residuals = new TimeSeries();

            for (int i = 1; i < data.Count; i++)
            {
                double r = (data[i] - m) - alpha * (data[i - 1] - m);
                residuals.Add(r);
                sigma2 += MoreMath.Sqr(r);
            }
            sigma2 = sigma2 / (data.Count - 3);

            // Solution to MLE says denominator is n-1, but (i) Fuller says to use n-3,
            // (ii) simulations show n-3 is a better estimate, (iii) n-3 makes intuitive
            // sense because there are 3 parameters. I would prefer a more rigorous
            // argument, but that's good enough for now.

            // The formulas for the variances of alpha and sigma follow straightforwardly from
            // the second derivatives of the likelyhood function. For the variance of the mean
            // we use the exact formula with the \gamma_k for an AR(1) model with the fitted
            // alpha. After quite a bit of manipulation, that is
            //   v = \frac{\sigma^2}{(1-\alpha)^2} \left[ 1 -
            //         \frac{2\alpha}{n} \frac{1 - \alpha^n}{1 - \alpha^2} \right]
            // which gives a finite-n correction to the MLE result. Near \alpha \approx \pm 1,
            // we should use a series expansion to preserve accuracy.

            double[] parameters = new double[] { alpha, m, Math.Sqrt(sigma2) };

            SymmetricMatrix covariances = new SymmetricMatrix(3);

            covariances[0, 0] = (1.0 - alpha * alpha) / n;
            covariances[1, 1] = sigma2 / MoreMath.Sqr(1.0 - alpha) * (1.0 - 2.0 * alpha * (1.0 - MoreMath.Pow(alpha, n)) / (1.0 - alpha * alpha) / n) / n;
            covariances[2, 2] = sigma2 / 2.0 / n;

            TestResult test = residuals.LjungBoxTest();

            return(new FitResult(
                       parameters,
                       covariances,
                       test
                       ));
        }
예제 #2
0
        /// <summary>
        /// Fits an MA(1) model to the time series.
        /// </summary>
        /// <returns>The fit with parameters lag-1 coefficient, mean, and standard deviation.</returns>
        public FitResult FitToMA1()
        {
            if (data.Count < 4)
            {
                throw new InsufficientDataException();
            }

            // MA(1) model is
            //   y_t - \mu = u_t + \beta u_{t-1}
            // where u_t ~ N(0, \sigma) are IID.

            // It's easy to show that
            //   m = E(y_t) = \mu
            //   c_0 = V(y_t) = E((y_t - m)^2) = (1 + \beta^2) \sigma^2
            //   c_1 = V(y_t, y_{t-1}) = \beta \sigma^2
            // So the method of moments gives
            //   \beta = \frac{1 - \sqrt{1 - (2 g_1)^2}}{2}
            //   \mu = m
            //   \sigma^2 = \frac{c_0}{1 + \beta^2}
            // It turns out these are very poor (high bias, high variance) estimators,
            // but they illustrate a basic requirement that g_1 < 1/2.

            // The MLE estimator

            int n = data.Count;

            double m = data.Mean;

            Func <double, double> fnc = (double theta) => {
                double s         = 0.0;
                double uPrevious = 0.0;
                for (int i = 0; i < data.Count; i++)
                {
                    double u = data[i] - m - theta * uPrevious;
                    s        += u * u;
                    uPrevious = u;
                }
                ;
                return(s);
            };

            Extremum minimum = FunctionMath.FindMinimum(fnc, Interval.FromEndpoints(-1.0, 1.0));

            double beta   = minimum.Location;
            double sigma2 = minimum.Value / (data.Count - 3);

            // While there is significant evidence that the MLE value for \beta is biased
            // for small-n, I know of no anlytic correction.

            double[] parameters = new double[] { beta, m, Math.Sqrt(sigma2) };

            // The calculation of the variance for \mu can be improved over the MLE
            // result by plugging the values for \gamma_0 and \gamma_1 into the
            // exact formula.

            SymmetricMatrix covariances = new SymmetricMatrix(3);

            if (minimum.Curvature > 0.0)
            {
                covariances[0, 0] = sigma2 / minimum.Curvature;
            }
            else
            {
                covariances[0, 0] = MoreMath.Sqr(1.0 - beta * beta) / n;
            }
            covariances[1, 1] = sigma2 * (MoreMath.Sqr(1.0 + beta) - 2.0 * beta / n) / n;
            covariances[2, 2] = sigma2 / 2.0 / n;

            TimeSeries residuals = new TimeSeries();
            double     u1        = 0.0;

            for (int i = 0; i < data.Count; i++)
            {
                double u0 = data[i] - m - beta * u1;
                residuals.Add(u0);
                u1 = u0;
            }
            ;
            TestResult test = residuals.LjungBoxTest();

            FitResult result = new FitResult(
                parameters, covariances, test
                );

            return(result);
        }