public void prediction_test() { // Example from http://www.real-statistics.com/multiple-regression/confidence-and-prediction-intervals/ var dt = Accord.IO.CsvReader.FromText(Resources.linreg, true).ToTable(); double[] y = dt.Columns["Poverty"].ToArray(); double[][] x = dt.ToArray("Infant Mort", "White", "Crime"); // Use Ordinary Least Squares to learn the regression OrdinaryLeastSquares ols = new OrdinaryLeastSquares(); // Use OLS to learn the multiple linear regression MultipleLinearRegression regression = ols.Learn(x, y); Assert.AreEqual(3, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(0.443650703716698, regression.Intercept, 1e-5); Assert.AreEqual(1.2791842411083394, regression.Weights[0], 1e-5); Assert.AreEqual(0.036259242392669415, regression.Weights[1], 1e-5); Assert.AreEqual(0.0014225014835705938, regression.Weights[2], 1e-5); double rse = regression.GetStandardError(x, y); Assert.AreEqual(rse, 2.4703520840798507, 1e-5); double[][] im = ols.GetInformationMatrix(); double mse = regression.GetStandardError(x, y); double[] se = regression.GetStandardErrors(mse, im); Assert.AreEqual(0.30063086032754965, se[0], 1e-10); Assert.AreEqual(0.033603448179240082, se[1], 1e-10); Assert.AreEqual(0.0022414548866296342, se[2], 1e-10); Assert.AreEqual(3.9879881671805824, se[3], 1e-10); double[] x0 = new double[] { 7, 80, 400 }; double y0 = regression.Transform(x0); Assert.AreEqual(y0, 12.867680376316864, 1e-5); double actual = regression.GetStandardError(x0, mse, im); Assert.AreEqual(0.35902764658470271, actual, 1e-10); DoubleRange ci = regression.GetConfidenceInterval(x0, mse, x.Length, im); Assert.AreEqual(ci.Min, 12.144995206616116, 1e-5); Assert.AreEqual(ci.Max, 13.590365546017612, 1e-5); actual = regression.GetPredictionStandardError(x0, mse, im); Assert.AreEqual(2.4963053239397244, actual, 1e-10); DoubleRange pi = regression.GetPredictionInterval(x0, mse, x.Length, im); Assert.AreEqual(pi.Min, 7.8428783761994554, 1e-5); Assert.AreEqual(pi.Max, 17.892482376434273, 1e-5); }
private void compute(double[][] x, double[] y) { int n = x.Length; int p = NumberOfInputs; SSt = 0; SSe = 0; outputMean = 0.0; NumberOfSamples = x.Length; // Compute the regression OrdinaryLeastSquares.Token = Token; regression = OrdinaryLeastSquares.Learn(x, y); informationMatrix = OrdinaryLeastSquares.GetInformationMatrix(); // Calculate mean of the expected outputs outputMean = y.Mean(); // Calculate actual outputs (results) #pragma warning disable 612, 618 results = regression.Transform(x); // Calculate SSe and SSt for (int i = 0; i < x.Length; i++) { double d; d = y[i] - results[i]; SSe += d * d; d = y[i] - outputMean; SSt += d * d; } // Calculate SSr SSr = SSt - SSe; // Calculate R-Squared rSquared = (SSt != 0) ? 1.0 - (SSe / SSt) : 1.0; // Calculated Adjusted R-Squared if (rSquared == 1) { rAdjusted = 1; } else { if (n - p == 1) { rAdjusted = double.NaN; } else { rAdjusted = 1.0 - (1.0 - rSquared) * ((n - 1.0) / (n - p - 1.0)); } } // Calculate Degrees of Freedom DFr = p; DFe = n - (p + 1); DFt = DFr + DFe; // Calculate Sum of Squares Mean MSe = SSe / DFe; MSr = SSr / DFr; MSt = SSt / DFt; // Calculate the F statistic ftest = new FTest(MSr / MSe, DFr, DFe); stdError = Math.Sqrt(MSe); // Create the ANOVA table List <AnovaVariationSource> table = new List <AnovaVariationSource>(); table.Add(new AnovaVariationSource(this, "Regression", SSr, DFr, MSr, ftest)); table.Add(new AnovaVariationSource(this, "Error", SSe, DFe, MSe, null)); table.Add(new AnovaVariationSource(this, "Total", SSt, DFt, MSt, null)); this.anovaTable = new AnovaSourceCollection(table); // Compute coefficient standard errors; standardErrors = new double[NumberOfInputs + 1]; for (int i = 0; i < informationMatrix.Length; i++) { standardErrors[i] = Math.Sqrt(MSe * informationMatrix[i][i]); } // Compute coefficient tests for (int i = 0; i < CoefficientValues.Length; i++) { double tStatistic = CoefficientValues[i] / standardErrors[i]; ttests[i] = new TTest(estimatedValue: CoefficientValues[i], standardError: standardErrors[i], degreesOfFreedom: DFe); ftests[i] = new FTest(tStatistic * tStatistic, 1, DFe); confidences[i] = ttests[i].GetConfidenceInterval(confidencePercent); } // Compute model performance tests ttest = new TTest(results, outputMean); ztest = new ZTest(results, outputMean); chiSquareTest = new ChiSquareTest(y, results, n - p - 1); #pragma warning restore 612, 618 }
public void prediction_test() { // Example from http://www.real-statistics.com/multiple-regression/confidence-and-prediction-intervals/ var dt = Accord.IO.CsvReader.FromText(Resources.linreg, true).ToTable(); double[][] y = dt.ToArray("Poverty"); double[][] x = dt.ToArray("Infant Mort", "White", "Crime"); // Use Ordinary Least Squares to learn the regression OrdinaryLeastSquares ols = new OrdinaryLeastSquares(); // Use OLS to learn the multiple linear regression MultivariateLinearRegression regression = ols.Learn(x, y); Assert.AreEqual(3, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(0.443650703716698, regression.Intercepts[0], 1e-5); Assert.AreEqual(1.2791842411083394, regression.Weights[0][0], 1e-5); Assert.AreEqual(0.036259242392669415, regression.Weights[1][0], 1e-5); Assert.AreEqual(0.0014225014835705938, regression.Weights[2][0], 1e-5); double rse = regression.GetStandardError(x, y)[0]; Assert.AreEqual(rse, 2.4703520840798507, 1e-5); double[][] im = ols.GetInformationMatrix(); double[] mse = regression.GetStandardError(x, y); double[][] se = regression.GetStandardErrors(mse, im); Assert.AreEqual(0.30063086032754965, se[0][0], 1e-10); Assert.AreEqual(0.033603448179240082, se[0][1], 1e-10); Assert.AreEqual(0.0022414548866296342, se[0][2], 1e-10); Assert.AreEqual(3.9879881671805824, se[0][3], 1e-10); double[] x0 = new double[] { 7, 80, 400 }; double y0 = regression.Transform(x0)[0]; Assert.AreEqual(y0, 12.867680376316864, 1e-5); double actual = regression.GetStandardError(x0, mse, im)[0]; Assert.AreEqual(0.35902764658470271, actual, 1e-10); DoubleRange ci = regression.GetConfidenceInterval(x0, mse, x.Length, im)[0]; Assert.AreEqual(ci.Min, 12.144995206616116, 1e-5); Assert.AreEqual(ci.Max, 13.590365546017612, 1e-5); actual = regression.GetPredictionStandardError(x0, mse, im)[0]; Assert.AreEqual(2.4963053239397244, actual, 1e-10); DoubleRange pi = regression.GetPredictionInterval(x0, mse, x.Length, im)[0]; Assert.AreEqual(pi.Min, 7.8428783761994554, 1e-5); Assert.AreEqual(pi.Max, 17.892482376434273, 1e-5); }
private static RegressionResult PerformOls(double[] yVal, string[] xVars, double[][] xVals) { var ols = new OrdinaryLeastSquares() { // intercept should represent the return if no impact from inputs. (mean daily return) // forcing to 0 as we assume that all explanation of price move should be due to independenet variables. // in reality there is likely an unexplainable drift. For example daily bleed (MER, funding etc). UseIntercept = false }; MultipleLinearRegression regression = ols.Learn(xVals, yVal); double[] predicted = regression.Transform(xVals); RegressionResult r = new RegressionResult(); r.ModelType = "OLS"; r.StandardError = regression.GetStandardError(xVals, yVal); // r.RSquared = new RSquaredLoss(xVals.Length, yVal, ).Loss(predicted); r.RSquared = regression.CoefficientOfDetermination(xVals, yVal, false); r.AdjRSquared = regression.CoefficientOfDetermination(xVals, yVal, true); r.Betas = SgtStringUtils.DoubleArrayToRoundedDelimitedString(regression.Weights); r.xVars = string.Join(";", xVars); // TODO need to validate the below section. add p-values? double[] coeffStandardErrors = regression.GetStandardErrors(r.StandardError, ols.GetInformationMatrix()); double[] coeffTScores = DoubleDivide(regression.Weights, coeffStandardErrors); double[] coeffPVals = new double[coeffTScores.Length]; double df = xVals.Length - 1; for (int i = 0; i < coeffTScores.Length; i++) { TTest tTest = new TTest(coeffTScores[i], df, OneSampleHypothesis.ValueIsDifferentFromHypothesis); coeffPVals[i] = tTest.PValue; } r.CoeffStandardErrors = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffStandardErrors); r.CoeffTScores = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffTScores); r.CoeffPVals = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffPVals); // /TODO r.xVarCount = regression.NumberOfInputs; r.SamplesCount = yVal.Length; r.Mean = regression.Intercept; return(r); }