public void LinearRegressionVariances() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // do a lot of fits, recording results of each FrameTable data = new FrameTable(); data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp"); for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 12; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // record result UncertainValue p = result.Predict(12.0); data.AddRow(new Dictionary <string, object>() { { "a", result.Intercept.Value }, { "va", result.Parameters.VarianceOf("Intercept") }, { "b", result.Slope.Value }, { "vb", result.Parameters.VarianceOf("Slope") }, { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") }, { "p", p.Value }, { "dp", p.Uncertainty } }); } // variances of parameters should agree with predictions Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median())); Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median())); // variance of prediction should agree with claim Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public static void LinearRegression() { List <double> x = new List <double>() { -1.1, 2.2, 1.4, 0.5, 3.7, 2.8 }; List <double> y = new List <double>() { -2.9, 3.4, 0.9, 0.1, 6.8, 5.7 }; LinearRegressionResult result = y.LinearRegression(x); Console.WriteLine($"y = ({result.Intercept}) + ({result.Slope}) x"); Console.WriteLine($"Fit explains {result.RSquared * 100.0}% of the variance"); Console.WriteLine($"Probability of no dependence {result.R.Probability}."); OneWayAnovaResult anova = result.Anova; Console.WriteLine("Fit dof = {0} SS = {1}", anova.Factor.DegreesOfFreedom, anova.Factor.SumOfSquares); Console.WriteLine("Residual dof = {0} SS = {1}", anova.Residual.DegreesOfFreedom, anova.Residual.SumOfSquares); Console.WriteLine("Total dof = {0} SS = {1}", anova.Total.DegreesOfFreedom, anova.Total.SumOfSquares); Console.WriteLine($"Probability of no dependence {anova.Result.Probability}."); // Print a 95% confidence interval on the slope Console.WriteLine($"slope is in {result.Slope.ConfidenceInterval(0.95)} with 95% confidence"); IReadOnlyList <double> residuals = result.Residuals; ColumnVector parameters = result.Parameters.ValuesVector; SymmetricMatrix covariance = result.Parameters.CovarianceMatrix; result.Parameters.CovarianceOf("Intercept", "Slope"); double x1 = 3.0; UncertainValue y1 = result.Predict(x1); Console.WriteLine($"Predicted y({x1}) = {y1}."); }
public void LinearRegressionSimple() { double a = -1.0; double b = 2.0; ContinuousDistribution xDistribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(); int n = 16; Random rng = new Random(1); double[] x = new double[n]; double[] y = new double[n]; for (int i = 0; i < 16; i++) { x[i] = xDistribution.GetRandomValue(rng); y[i] = a + b * x[i] + eDistribution.GetRandomValue(rng); } LinearRegressionResult result = y.LinearRegression(x); // Parameters should be right Assert.IsTrue(result.Intercept.ConfidenceInterval(0.95).ClosedContains(a)); Assert.IsTrue(result.Slope.ConfidenceInterval(0.95).ClosedContains(b)); // Reported values should be consistent Assert.IsTrue(result.Intercept == result.Parameters["Intercept"].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Intercept")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Intercept")))); Assert.IsTrue(result.Slope == result.Parameters["Slope"].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Slope")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Slope")))); // Residuals should agree with definition for (int i = 0; i < x.Length; i++) { double yp = result.Predict(x[i]).Value; Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp)); } // R and R-squared agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.RSquared, MoreMath.Sqr(result.R.Statistic.Value))); // F-test and R-test agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.F.Probability, result.R.Probability)); // ANOVA's sums of squares are correct double SST = y.Variance() * y.Length; Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.SumOfSquaredResiduals)); // R is same as correlation coefficient Assert.IsTrue(TestUtilities.IsNearlyEqual(x.CorrelationCoefficient(y), result.R.Statistic.Value)); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample pSample = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // Record predictions for a new point double x0 = 12.0; Sample ySample = new Sample(); double ySigma = 0.0; // do 100 fits for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 16; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // test consistancy Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0]))); Assert.IsTrue(result.Slope == result.Parameters[1].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1]))); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient)); // record best fit parameters double a = result.Parameters.Best[0]; double b = result.Parameters.Best[1]; pSample.Add(a, b); // record estimated covariances caa += result.Parameters.Covariance[0, 0]; cbb += result.Parameters.Covariance[1, 1]; cab += result.Parameters.Covariance[0, 1]; UncertainValue yPredict = result.Predict(x0); ySample.Add(yPredict.Value); ySigma += yPredict.Uncertainty; double SST = 0.0; foreach (double y in sample.Y) { SST += MoreMath.Sqr(y - sample.Y.Mean); } Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); } caa /= pSample.Count; cbb /= pSample.Count; cab /= pSample.Count; ySigma /= pSample.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // Check that the predicted ys conform to the model and the asserted uncertainty. Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0)); //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma)); }