public void MultivariateLinearRegressionAgreement2() { // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression. double intercept = 1.0; double slope = -2.0; ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0); UniformDistribution xDist = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0)); Random rng = new Random(1111111); MultivariateSample multi = new MultivariateSample("x", "y"); for (int i = 0; i < 10; i++) { double x = xDist.GetRandomValue(rng); double y = intercept + slope * x + yErrDist.GetRandomValue(rng); multi.Add(x, y); } // Old multi linear regression code. MultiLinearRegressionResult result1 = multi.LinearRegression(1); // Simple linear regression code. LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate)); // New multi linear regression code. MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList()); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate)); }
public void MultivariateLinearRegressionVariances() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = -3.0; double b0 = 2.0; double b1 = -1.0; ContinuousDistribution x0distribution = new LaplaceDistribution(); ContinuousDistribution x1distribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable data = new FrameTable(); data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "ab1Cov", "p", "dp"); // draw a sample from the model Random rng = new Random(4); for (int j = 0; j < 64; j++) { List <double> x0s = new List <double>(); List <double> x1s = new List <double>(); List <double> ys = new List <double>(); for (int i = 0; i < 16; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double e = eDistribution.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + e; x0s.Add(x0); x1s.Add(x1); ys.Add(y); } // do a linear regression fit on the model MultiLinearRegressionResult result = ys.MultiLinearRegression( new Dictionary <string, IReadOnlyList <double> > { { "x0", x0s }, { "x1", x1s } } ); UncertainValue pp = result.Predict(-5.0, 6.0); data.AddRow( result.Intercept.Value, result.Intercept.Uncertainty, result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty, result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty, result.Parameters.CovarianceOf("Intercept", "x1"), pp.Value, pp.Uncertainty ); } // The estimated parameters should agree with the model that generated the data. // The variances of the estimates should agree with the claimed variances Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean())); Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean())); Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b1"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["ab1Cov"].As <double>().Mean())); Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void MultivariateLinearRegressionSimple() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0); ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0)); ContinuousDistribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample("x0", "x1", "y"); FrameTable table = new FrameTable(); table.AddColumns <double>("x0", "x1", "y"); for (int i = 0; i < 100; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double eps = noise.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); table.AddRow(x0, x1, y); } // do a linear regression fit on the model ParameterCollection oldResult = sample.LinearRegression(2).Parameters; MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression( table["x0"].As <double>(), table["x1"].As <double>() ); // the result should have the appropriate dimension Assert.IsTrue(oldResult.Count == 3); Assert.IsTrue(newResult.Parameters.Count == 3); // The parameters should match the model Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0)); Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1)); Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a)); Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0)); Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1)); Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a)); // The residuals should be compatible with the model predictions for (int i = 0; i < table.Rows.Count; i++) { FrameRow row = table.Rows[i]; double x0 = (double)row["x0"]; double x1 = (double)row["x1"]; double yp = newResult.Predict(x0, x1).Value; double y = (double)row["y"]; Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp)); } }
public void TestMultivariateRegression() { double cz = 1.0; double cx = 0.0; double cy = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0)); ContinuousDistribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); Sample r2Sample = new Sample(); for (int i = 0; i < 500; i++) { MultivariateSample xyzSample = new MultivariateSample(3); for (int k = 0; k < 12; k++) { double x = xDistribution.GetRandomValue(rng); double y = yDistribution.GetRandomValue(rng); double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng); xyzSample.Add(x, y, z); } MultiLinearRegressionResult fit = xyzSample.LinearRegression(2); double fcx = fit.Parameters.Best[0]; double fcy = fit.Parameters.Best[1]; double fcz = fit.Parameters.Best[2]; double ss2 = 0.0; double ss1 = 0.0; foreach (double[] xyz in xyzSample) { ss2 += MoreMath.Sqr(xyz[2] - (fcx * xyz[0] + fcy * xyz[1] + fcz)); ss1 += MoreMath.Sqr(xyz[2] - xyzSample.Column(2).Mean); } double r2 = 1.0 - ss2 / ss1; r2Sample.Add(r2); } Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum); ContinuousDistribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0); //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0); Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Console.WriteLine(ks.RightProbability); Console.WriteLine(ks.Probability); }
public void TestMultivariateRegression() { // Collect r^2 values from multivariate linear regressions. double cz = 1.0; double cx = 0.0; double cy = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0)); ContinuousDistribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); List <double> r2Sample = new List <double>(); for (int i = 0; i < 500; i++) { MultivariateSample xyzSample = new MultivariateSample(3); for (int k = 0; k < 12; k++) { double x = xDistribution.GetRandomValue(rng); double y = yDistribution.GetRandomValue(rng); double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng); xyzSample.Add(x, y, z); } MultiLinearRegressionResult fit = xyzSample.LinearRegression(2); double fcx = fit.Parameters.ValuesVector[0]; double fcy = fit.Parameters.ValuesVector[1]; double fcz = fit.Parameters.ValuesVector[2]; r2Sample.Add(fit.RSquared); } // r^2 values should be distributed as expected. ContinuousDistribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Assert.IsTrue(ks.Probability > 0.05); }