public double VarSEbyJ(int j) { var new_y = DenseVector.OfVector(x.Column(j)); var new_x = DenseMatrix.OfMatrix(x.RemoveColumn(j)); var helpRegression = new MultipleRegressionModel(); helpRegression.Fit(new_y, new_x, !intercept); // here x already has a constant column, so the regression model should not add another return(Variance(errors) / (Variance(new_y) * (1 - helpRegression.r_squared))); }
// Regress one timeseries on the other, test if the residuals of this regression satisfy requirement of stationarity by performing the Augmented Dickey Fuller Test on them and returning its p-Value public float EngelGrangerRegression(double[] values1, double[] values2, int lag) { if (values1.Length != values2.Length) { throw new ArgumentException($"EngelGranger test requires bot inputs to be of same lenght, the provided inputs are of length {values1.Length} and {values2.Length}"); } // The Engel-Granger Two Step Test for Cointegration, requires a method (like ADF) to test time series data for stationarity // simple regression of values1 = beta_0 + beta_1 * values2 + error var regModel = new MultipleRegressionModel(); var y = DenseVector.OfArray(values1); var x = DenseMatrix.OfColumnArrays(values2); regModel.Fit(y, x); float[] u = regModel.errors.Select(i => (float)i).ToArray(); // return ADF for the residuals return(ADF(u, lag)); }
// Perform the Augmented Dickey Fuller Test on a timeseries with a given number of lags public float ADF(float[] values, int lag = 1) { // To keep the regression feasable in terms of the proportion of features and observations, the lag will be restricted to the squareroot of all observations // This relation was arbitrarly choosen and could be changed -> however a decreasing growth rate for the lag in the number of observations, is desirable for run time efficiency and computational precision if (lag > Math.Sqrt(values.Length)) { lag = Convert.ToInt32(Math.Sqrt(values.Length)); } // A warning could be added // The reasoning of the following code follows the ADF as described here: https://nwfsc-timeseries.github.io/atsa-labs/sec-boxjenkins-aug-dickey-fuller.html // The ADF performs a regresssion where the Dependent variable y_delta is regressed on the lag one of y_t and multiple lags of the differenced timeseries // First calculate the set of dependent variables double[] getYDifferences() { double[] results = new double[values.Length - 1]; for (int i = 0; i < results.Length; i++) { results[i] = (double)values[i + 1] - values[i]; } return(results); }; double[] YDifferences = getYDifferences(); // Because the ADF requires YDifferences to be regressed on its own lags (p), the number dependent variables for the observation are actually YDifferences.Lenght - lag int length = YDifferences.Length - lag; // alpha will be generated by allowing the regression to have an intercept, the other parameters are defined in the lines below // beta is defined as being the coefficient of t, thus the regression will require a count variable for the observatins int[] t_values = Enumerable.Range(0, length).ToArray(); // The regression requires the last <length> observations of y_t with lag one, this is the parameter whose coefficient will be used to create the test statistic float[] y_1 = new float[length]; Array.Copy(values, lag, y_1, 0, length); // lag does not need to be corrected by -1 as the YDifferences are calculated looking forward // define a function to get the differences in yDifferences with the desired shift -> shift 0 yields the dependent variable double[] YDifferencesWithLag(int shift) { double[] yDiff = new double[length]; Array.Copy(YDifferences, lag - shift, yDiff, 0, length); return(yDiff); } // Y_t - Y_{t-1}-> the dependent variable of the regression var y = DenseVector.OfArray(YDifferencesWithLag(0)); // define a matrix with the lagged YDifferences, as features, a constant to facilitate an intercept in the regresion is added by the regressionmodel var x_values = new Vector <double> [lag + 2]; // plus 2 for t_values and y_1 x_values[0] = new DenseVector(t_values.Select(i => Convert.ToDouble(i)).ToArray()); x_values[1] = new DenseVector(y_1.Select(i => (double)i).ToArray()); // Fill the columns of the matrix with the lagged timeseries values (lag is increasing -> i) for (int i = 0; i < lag; i++) { float[] ydiff_t_i = new float[length]; Array.Copy(values, lag - (i + 1), ydiff_t_i, 0, length); x_values[i + 2] = new DenseVector(Array.ConvertAll(ydiff_t_i, v => (double)v)); } var x = DenseMatrix.OfColumns(x_values); // Perform a regression var rm = new MultipleRegressionModel(); rm.Fit(y, x); // return the p-Value for the Intercept float testStatistic = (float)(((float)rm.betas.ToArray()[2]) / Math.Sqrt(rm.VarSEbyJ(2))); // cf. for ADF statistic with mutliple regressors (not shown in the link above) https://en.wikipedia.org/wiki/Augmented_Dickey%E2%80%93Fuller_test return((float)(StudentT.CDF(0, 1, length - x.ColumnCount, testStatistic))); // The test is a onesided, testing for t-values beyond the critical threshold -> a sufficiently low CDF value here implies stationarity }