public void MultivariateLinearRegressionAgreement() { Random rng = new Random(1); MultivariateSample SA = new MultivariateSample(2); for (int i = 0; i < 10; i++) { SA.Add(rng.NextDouble(), rng.NextDouble()); } RegressionResult RA = SA.LinearRegression(0); ColumnVector PA = RA.Parameters.Best; SymmetricMatrix CA = RA.Parameters.Covariance; MultivariateSample SB = SA.Columns(1, 0); RegressionResult RB = SB.LinearRegression(1); ColumnVector PB = RB.Parameters.Best; SymmetricMatrix CB = RB.Parameters.Covariance; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0])); BivariateSample SC = SA.TwoColumns(1, 0); RegressionResult RC = SC.LinearRegression(); ColumnVector PC = RC.Parameters.Best; SymmetricMatrix CC = RC.Parameters.Covariance; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC)); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC)); }
public void BivariateLinearRegressionGoodnessOfFitDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); for (int i = 0; i < 127; i++) { BivariateSample xys = new BivariateSample(); for (int j = 0; j < 7; j++) { xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } double f = xys.LinearRegression().GoodnessOfFit.Statistic; fs.Add(f); } Distribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); }
public void TestBivariateRegression() { // Do a bunch of linear regressions. r^2 should be distributed as expected. double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); List <double> r2Sample = new List <double>(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } LinearRegressionResult fit = xySample.LinearRegression(); double a = fit.Intercept.Value; double b = fit.Slope.Value; r2Sample.Add(fit.RSquared); } ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Assert.IsTrue(ks.Probability > 0.05); }
public void BivariateLinearRegressionNullDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); double f = result.F.Statistic; fs.Add(f); rSample.Add(result.R.Statistic); rDistribution = result.R.Distribution; fSample.Add(result.F.Statistic); fDistribution = result.F.Distribution; Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 } )); } ContinuousDistribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
// Not fixing this bug; use Polynomial interpolation for this scenario instead //[TestMethod] public void Bug6392() { // bug requests that we support regression with number of points equal to number // of fit parameters, i.e. polynomial fit var biSample = new BivariateSample(); biSample.Add(0, 1); biSample.Add(1, -1); var fitResult = biSample.LinearRegression(); }
public void LinearRegressionVariances() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // do a lot of fits, recording results of each FrameTable data = new FrameTable(); data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp"); for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 12; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // record result UncertainValue p = result.Predict(12.0); data.AddRow(new Dictionary <string, object>() { { "a", result.Intercept.Value }, { "va", result.Parameters.VarianceOf("Intercept") }, { "b", result.Slope.Value }, { "vb", result.Parameters.VarianceOf("Slope") }, { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") }, { "p", p.Value }, { "dp", p.Uncertainty } }); } // variances of parameters should agree with predictions Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median())); Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median())); // variance of prediction should agree with claim Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void TestBivariateRegression() { double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); Distribution eDistribution = new NormalDistribution(); Sample r2Sample = new Sample(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } FitResult fit = xySample.LinearRegression(); double a = fit.Parameters[0]; double b = fit.Parameters[1]; double ss2 = 0.0; double ss1 = 0.0; foreach (XY xy in xySample) { ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X)); ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean); } double r2 = 1.0 - ss2 / ss1; r2Sample.Add(r2); } Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum); Distribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0); Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Console.WriteLine(ks.RightProbability); Console.WriteLine(ks.Probability); }
public void BivariateLinearPolynomialRegressionAgreement() { // A degree-1 polynomial fit should give the same answer as a linear fit BivariateSample B = new BivariateSample(); B.Add(0.0, 5.0); B.Add(3.0, 6.0); B.Add(1.0, 7.0); B.Add(4.0, 8.0); B.Add(2.0, 9.0); GeneralLinearRegressionResult PR = B.PolynomialRegression(1); GeneralLinearRegressionResult LR = B.LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.ValuesVector, LR.Parameters.ValuesVector)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.CovarianceMatrix, LR.Parameters.CovarianceMatrix)); }
public void BivariateLinearRegressionNullDistribution() { // Create uncorrelated x and y values and do a linear fit. // The r-tests and F-test statistics returned by the linear fits // should agree and both test statistics should follow their claimed // distributions. Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); rSample.Add(result.R.Statistic.Value); rDistribution = result.R.Statistic.Distribution; fSample.Add(result.F.Statistic.Value); fDistribution = result.F.Statistic.Distribution; Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 } )); } Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public void BivariateLinearPolynomialRegressionAgreement() { // A degree-1 polynomial fit should give the same answer as a linear fit BivariateSample B = new BivariateSample(); B.Add(0.0, 5.0); B.Add(3.0, 6.0); B.Add(1.0, 7.0); B.Add(4.0, 8.0); B.Add(2.0, 9.0); FitResult PR = B.PolynomialRegression(1); FitResult LR = B.LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters, LR.Parameters)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.CovarianceMatrix, LR.CovarianceMatrix)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.GoodnessOfFit.Statistic, LR.GoodnessOfFit.Statistic)); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // also keep track of test statistics Sample fs = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new LogisticDistribution(); Distribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 25; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); s.Add(x, y); } // do the regression FitResult r = s.LinearRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); // record the fit statistic fs.Add(r.GoodnessOfFit.Statistic); Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // check that F is distributed as it should be Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample pSample = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // Record predictions for a new point double x0 = 12.0; Sample ySample = new Sample(); double ySigma = 0.0; // do 100 fits for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 16; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // test consistancy Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0]))); Assert.IsTrue(result.Slope == result.Parameters[1].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1]))); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient)); // record best fit parameters double a = result.Parameters.Best[0]; double b = result.Parameters.Best[1]; pSample.Add(a, b); // record estimated covariances caa += result.Parameters.Covariance[0, 0]; cbb += result.Parameters.Covariance[1, 1]; cab += result.Parameters.Covariance[0, 1]; UncertainValue yPredict = result.Predict(x0); ySample.Add(yPredict.Value); ySigma += yPredict.Uncertainty; double SST = 0.0; foreach (double y in sample.Y) { SST += MoreMath.Sqr(y - sample.Y.Mean); } Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); } caa /= pSample.Count; cbb /= pSample.Count; cab /= pSample.Count; ySigma /= pSample.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // Check that the predicted ys conform to the model and the asserted uncertainty. Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0)); //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma)); }