public void LinearLogisticRegressionSimple() { Polynomial m = Polynomial.FromCoefficients(-1.0, 2.0); FrameTable table = new FrameTable(); table.AddColumn <double>("x"); table.AddColumn <string>("z"); Random rng = new Random(2); ContinuousDistribution xDistribution = new CauchyDistribution(4.0, 2.0); for (int i = 0; i < 24; i++) { double x = xDistribution.GetRandomValue(rng); double y = m.Evaluate(x); double p = 1.0 / (1.0 + Math.Exp(-y)); bool z = (rng.NextDouble() < p); table.AddRow(x, z.ToString()); } LinearLogisticRegressionResult fit = table["z"].As((string s) => Boolean.Parse(s)).LinearLogisticRegression(table["x"].As <double>()); Assert.IsTrue(fit.Intercept.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(0))); Assert.IsTrue(fit.Slope.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(1))); }
public void MultivariateLinearRegressionVariances() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = -3.0; double b0 = 2.0; double b1 = -1.0; ContinuousDistribution x0distribution = new LaplaceDistribution(); ContinuousDistribution x1distribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable data = new FrameTable(); data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "ab1Cov", "p", "dp"); // draw a sample from the model Random rng = new Random(4); for (int j = 0; j < 64; j++) { List <double> x0s = new List <double>(); List <double> x1s = new List <double>(); List <double> ys = new List <double>(); for (int i = 0; i < 16; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double e = eDistribution.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + e; x0s.Add(x0); x1s.Add(x1); ys.Add(y); } // do a linear regression fit on the model MultiLinearRegressionResult result = ys.MultiLinearRegression( new Dictionary <string, IReadOnlyList <double> > { { "x0", x0s }, { "x1", x1s } } ); UncertainValue pp = result.Predict(-5.0, 6.0); data.AddRow( result.Intercept.Value, result.Intercept.Uncertainty, result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty, result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty, result.Parameters.CovarianceOf("Intercept", "x1"), pp.Value, pp.Uncertainty ); } // The estimated parameters should agree with the model that generated the data. // The variances of the estimates should agree with the claimed variances Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean())); Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean())); Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b1"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["ab1Cov"].As <double>().Mean())); Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void SpearmanNullDistributionTest() { // Pick independent distributions for x and y, which needn't be normal and needn't be related. ContinuousDistribution xDistrubtion = new UniformDistribution(); ContinuousDistribution yDistribution = new CauchyDistribution(); Random rng = new Random(1); // Generate bivariate samples of various sizes foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) { Sample testStatistics = new Sample(); ContinuousDistribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.SpearmanRhoTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution); Assert.IsTrue(r2.Probability > 0.05); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance)); } }
public void MultivariateLinearRegressionSimple() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0); ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0)); ContinuousDistribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample("x0", "x1", "y"); FrameTable table = new FrameTable(); table.AddColumns <double>("x0", "x1", "y"); for (int i = 0; i < 100; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double eps = noise.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); table.AddRow(x0, x1, y); } // do a linear regression fit on the model ParameterCollection oldResult = sample.LinearRegression(2).Parameters; MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression( table["x0"].As <double>(), table["x1"].As <double>() ); // the result should have the appropriate dimension Assert.IsTrue(oldResult.Count == 3); Assert.IsTrue(newResult.Parameters.Count == 3); // The parameters should match the model Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0)); Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1)); Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a)); Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0)); Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1)); Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a)); // The residuals should be compatible with the model predictions for (int i = 0; i < table.Rows.Count; i++) { FrameRow row = table.Rows[i]; double x0 = (double)row["x0"]; double x1 = (double)row["x1"]; double yp = newResult.Predict(x0, x1).Value; double y = (double)row["y"]; Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp)); } }
public void BivariateAssociationDiscreteNullDistribution() { Random rng = new Random(1); // Pick very non-normal distributions for our non-parameteric tests ContinuousDistribution xd = new FrechetDistribution(1.0); ContinuousDistribution yd = new CauchyDistribution(); // Pick small sample sizes to get exact distributions foreach (int n in TestUtilities.GenerateIntegerValues(4, 24, 4)) { // Do a bunch of test runs, recording reported statistic for each. List <int> spearmanStatistics = new List <int>(); List <int> kendallStatistics = new List <int>(); DiscreteDistribution spearmanDistribution = null; DiscreteDistribution kendallDistribution = null; for (int i = 0; i < 512; i++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int j = 0; j < n; j++) { x.Add(xd.GetRandomValue(rng)); y.Add(yd.GetRandomValue(rng)); } DiscreteTestStatistic spearman = Bivariate.SpearmanRhoTest(x, y).UnderlyingStatistic; if (spearman != null) { spearmanStatistics.Add(spearman.Value); spearmanDistribution = spearman.Distribution; } DiscreteTestStatistic kendall = Bivariate.KendallTauTest(x, y).UnderlyingStatistic; if (kendall != null) { kendallStatistics.Add(kendall.Value); kendallDistribution = kendall.Distribution; } } // Test whether statistics are actually distributed as claimed if (spearmanDistribution != null) { TestResult spearmanChiSquared = spearmanStatistics.ChiSquaredTest(spearmanDistribution); Assert.IsTrue(spearmanChiSquared.Probability > 0.01); } if (kendallDistribution != null) { TestResult kendallChiSquared = kendallStatistics.ChiSquaredTest(kendallDistribution); Assert.IsTrue(kendallChiSquared.Probability > 0.01); } } }
public void BivariateNullAssociation() { Random rng = new Random(31415926); // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests. FrameTable data = new FrameTable(); data.AddColumn <double>("r"); data.AddColumn <double>("ρ"); data.AddColumn <double>("τ"); // Create variables to hold the claimed distribution of each test statistic. ContinuousDistribution PRD = null; ContinuousDistribution SRD = null; ContinuousDistribution KTD = null; // Generate a large number of bivariate samples and conduct our three tests on each. ContinuousDistribution xDistribution = new LognormalDistribution(); ContinuousDistribution yDistribution = new CauchyDistribution(); for (int j = 0; j < 100; j++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int i = 0; i < 100; i++) { x.Add(xDistribution.GetRandomValue(rng)); y.Add(yDistribution.GetRandomValue(rng)); } TestResult PR = Bivariate.PearsonRTest(x, y); TestResult SR = Bivariate.SpearmanRhoTest(x, y); TestResult KT = Bivariate.KendallTauTest(x, y); PRD = PR.Statistic.Distribution; SRD = SR.Statistic.Distribution; KTD = KT.Statistic.Distribution; data.AddRow(new Dictionary <string, object>() { { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value } }); } Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05); Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05); Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05); }
public void BivariatePolynomialRegressionCovariance() { // do a set of polynomial regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed Random rng = new Random(271828); // define logistic parameters double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 }; // keep track of sample of returned a and b fit parameters MultivariateSample A = new MultivariateSample(a.Length); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them SymmetricMatrix C = new SymmetricMatrix(a.Length); // also keep track of test statistics Sample F = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new CauchyDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 4.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int j = 0; j < 20; j++) { double x = xd.GetRandomValue(rng); double y = nd.GetRandomValue(rng); for (int i = 0; i < a.Length; i++) { y += a[i] * MoreMath.Pow(x, i); } s.Add(x, y); } // do the regression PolynomialRegressionResult r = s.PolynomialRegression(a.Length - 1); ColumnVector ps = r.Parameters.ValuesVector; // record best fit parameters A.Add(ps); // record estimated covariances C += r.Parameters.CovarianceMatrix; // record the fit statistic F.Add(r.F.Statistic.Value); } C = (1.0 / A.Count) * C; // allow matrix division by real numbers // check that mean parameter estimates are what they should be: the underlying population parameters for (int i = 0; i < A.Dimension; i++) { Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i])); } // check that parameter covarainces are what they should be: the reported covariance estimates for (int i = 0; i < A.Dimension; i++) { for (int j = i; j < A.Dimension; j++) { Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j])); } } // check that F is distributed as it should be //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void LinearRegressionSimple() { double a = -1.0; double b = 2.0; ContinuousDistribution xDistribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(); int n = 16; Random rng = new Random(1); double[] x = new double[n]; double[] y = new double[n]; for (int i = 0; i < 16; i++) { x[i] = xDistribution.GetRandomValue(rng); y[i] = a + b * x[i] + eDistribution.GetRandomValue(rng); } LinearRegressionResult result = y.LinearRegression(x); // Parameters should be right Assert.IsTrue(result.Intercept.ConfidenceInterval(0.95).ClosedContains(a)); Assert.IsTrue(result.Slope.ConfidenceInterval(0.95).ClosedContains(b)); // Reported values should be consistent Assert.IsTrue(result.Intercept == result.Parameters["Intercept"].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Intercept")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Intercept")))); Assert.IsTrue(result.Slope == result.Parameters["Slope"].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Slope")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Slope")))); // Residuals should agree with definition for (int i = 0; i < x.Length; i++) { double yp = result.Predict(x[i]).Value; Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp)); } // R and R-squared agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.RSquared, MoreMath.Sqr(result.R.Statistic.Value))); // F-test and R-test agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.F.Probability, result.R.Probability)); // ANOVA's sums of squares are correct double SST = y.Variance() * y.Length; Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.SumOfSquaredResiduals)); // R is same as correlation coefficient Assert.IsTrue(TestUtilities.IsNearlyEqual(x.CorrelationCoefficient(y), result.R.Statistic.Value)); }