Exemplo n.º 1
0
        public double VarSEbyJ(int j)
        {
            var new_y          = DenseVector.OfVector(x.Column(j));
            var new_x          = DenseMatrix.OfMatrix(x.RemoveColumn(j));
            var helpRegression = new MultipleRegressionModel();

            helpRegression.Fit(new_y, new_x, !intercept); // here x already has a constant column, so the regression model should not add another
            return(Variance(errors) / (Variance(new_y) * (1 - helpRegression.r_squared)));
        }
Exemplo n.º 2
0
        // Regress one timeseries on the other, test if the residuals of this regression satisfy requirement of stationarity by performing the Augmented Dickey Fuller Test on them and returning its p-Value
        public float EngelGrangerRegression(double[] values1, double[] values2, int lag)
        {
            if (values1.Length != values2.Length)
            {
                throw new ArgumentException($"EngelGranger test requires bot inputs to be of same lenght, the provided inputs are of length {values1.Length} and {values2.Length}");
            }
            // The Engel-Granger Two Step Test for Cointegration, requires a method (like ADF) to test time series data for stationarity

            // simple regression of values1 = beta_0 + beta_1 * values2 + error
            var regModel = new MultipleRegressionModel();
            var y        = DenseVector.OfArray(values1);
            var x        = DenseMatrix.OfColumnArrays(values2);

            regModel.Fit(y, x);
            float[] u = regModel.errors.Select(i => (float)i).ToArray();
            // return ADF for the residuals
            return(ADF(u, lag));
        }
Exemplo n.º 3
0
        // Perform the Augmented Dickey Fuller Test on a timeseries with a given number of lags
        public float ADF(float[] values, int lag = 1)
        {
            // To keep the regression feasable in terms of the proportion of features and observations, the lag will be restricted to the squareroot of all observations
            // This relation was arbitrarly choosen and could be changed -> however a decreasing growth rate for the lag in the number of observations, is desirable for run time efficiency and computational precision
            if (lag > Math.Sqrt(values.Length))
            {
                lag = Convert.ToInt32(Math.Sqrt(values.Length));
            }                                                                                        // A warning could be added


            // The reasoning of the following code follows the ADF as described here: https://nwfsc-timeseries.github.io/atsa-labs/sec-boxjenkins-aug-dickey-fuller.html
            // The ADF performs a regresssion where the Dependent variable y_delta is regressed on the lag one of y_t and multiple lags of the differenced timeseries
            // First calculate the set of dependent variables
            double[] getYDifferences()
            {
                double[] results = new double[values.Length - 1];
                for (int i = 0; i < results.Length; i++)
                {
                    results[i] = (double)values[i + 1] - values[i];
                }
                return(results);
            };
            double[] YDifferences = getYDifferences();
            // Because the ADF requires YDifferences to be regressed on its own lags (p), the number dependent variables for the observation are actually YDifferences.Lenght - lag
            int length = YDifferences.Length - lag;

            // alpha will be generated by allowing the regression to have an intercept, the other parameters are defined in the lines below

            // beta is defined as being the coefficient of t, thus the regression will require a count variable for the observatins
            int[] t_values = Enumerable.Range(0, length).ToArray();

            // The regression requires the last <length> observations of y_t with lag one, this is the parameter whose coefficient will be used to create the test statistic
            float[] y_1 = new float[length];
            Array.Copy(values, lag, y_1, 0, length); // lag does not need to be corrected by -1 as the YDifferences are calculated looking forward

            // define a function to get the differences in yDifferences with the desired shift -> shift 0 yields the dependent variable
            double[] YDifferencesWithLag(int shift)
            {
                double[] yDiff = new double[length];
                Array.Copy(YDifferences, lag - shift, yDiff, 0, length);
                return(yDiff);
            }

            // Y_t - Y_{t-1}-> the dependent variable of the regression
            var y = DenseVector.OfArray(YDifferencesWithLag(0));

            // define a matrix with the lagged YDifferences, as features, a constant to facilitate an intercept in the regresion is added by the regressionmodel
            var x_values = new Vector <double> [lag + 2]; // plus 2 for  t_values and y_1

            x_values[0] = new DenseVector(t_values.Select(i => Convert.ToDouble(i)).ToArray());
            x_values[1] = new DenseVector(y_1.Select(i => (double)i).ToArray());

            // Fill the columns of the matrix with the lagged timeseries values (lag is increasing -> i)
            for (int i = 0; i < lag; i++)
            {
                float[] ydiff_t_i = new float[length];
                Array.Copy(values, lag - (i + 1), ydiff_t_i, 0, length);
                x_values[i + 2] = new DenseVector(Array.ConvertAll(ydiff_t_i, v => (double)v));
            }
            var x = DenseMatrix.OfColumns(x_values);
            // Perform a regression
            var rm = new MultipleRegressionModel();

            rm.Fit(y, x);
            // return the p-Value for the Intercept

            float testStatistic = (float)(((float)rm.betas.ToArray()[2]) / Math.Sqrt(rm.VarSEbyJ(2))); // cf. for ADF statistic with mutliple regressors (not shown in the link above)  https://en.wikipedia.org/wiki/Augmented_Dickey%E2%80%93Fuller_test

            return((float)(StudentT.CDF(0, 1, length - x.ColumnCount, testStatistic)));                // The test is a onesided, testing for t-values beyond the critical threshold -> a sufficiently low CDF value here implies stationarity
        }