public void BivariatePolynomialRegression() { // do a set of polynomial regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed Random rng = new Random(271828); // define logistic parameters double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 }; // keep track of sample of returned a and b fit parameters MultivariateSample A = new MultivariateSample(a.Length); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them SymmetricMatrix C = new SymmetricMatrix(a.Length); // also keep track of test statistics Sample F = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new CauchyDistribution(); Distribution nd = new NormalDistribution(0.0, 4.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int j = 0; j < 20; j++) { double x = xd.GetRandomValue(rng); double y = nd.GetRandomValue(rng); for (int i = 0; i < a.Length; i++) { y += a[i] * MoreMath.Pow(x, i); } s.Add(x, y); } // do the regression FitResult r = s.PolynomialRegression(a.Length - 1); ColumnVector ps = r.Parameters; //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]); // record best fit parameters A.Add(ps); // record estimated covariances C += r.CovarianceMatrix; // record the fit statistic F.Add(r.GoodnessOfFit.Statistic); //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } C = (1.0 / A.Count) * C; // allow matrix division by real numbers // check that mean parameter estimates are what they should be: the underlying population parameters for (int i = 0; i < A.Dimension; i++) { Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]); Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i])); } // check that parameter covarainces are what they should be: the reported covariance estimates for (int i = 0; i < A.Dimension; i++) { for (int j = i; j < A.Dimension; j++) { Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance); Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j])); } } // check that F is distributed as it should be //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void MultivariateLinearRegressionAgreement() { Random rng = new Random(1); MultivariateSample SA = new MultivariateSample(2); for (int i = 0; i < 10; i++) { SA.Add(rng.NextDouble(), rng.NextDouble()); } FitResult RA = SA.LinearRegression(0); ColumnVector PA = RA.Parameters; SymmetricMatrix CA = RA.CovarianceMatrix; MultivariateSample SB = SA.Columns(1, 0); FitResult RB = SB.LinearRegression(1); ColumnVector PB = RB.Parameters; SymmetricMatrix CB = RB.CovarianceMatrix; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(RA.GoodnessOfFit.Statistic, RB.GoodnessOfFit.Statistic)); BivariateSample SC = SA.TwoColumns(1, 0); FitResult RC = SC.LinearRegression(); ColumnVector PC = RC.Parameters; SymmetricMatrix CC = RC.CovarianceMatrix; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC)); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC)); Assert.IsTrue(TestUtilities.IsNearlyEqual(RA.GoodnessOfFit.Statistic, RC.GoodnessOfFit.Statistic)); }
public void BivariateLogisticRegression() { double[] c = new double[] { -0.1, 1.0 }; Random rng = new Random(1); UniformDistribution pointDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 4.0)); BivariateSample sample1 = new BivariateSample(); MultivariateSample sample2 = new MultivariateSample(2); for (int k = 0; k < 1000; k++) { double x = pointDistribution.GetRandomValue(rng); double z = c[0] * x + c[1]; double ez = Math.Exp(z); double p = ez / (1.0 + ez); double y = (rng.NextDouble() < p) ? 1.0 : 0.0; sample1.Add(x, y); sample2.Add(x, y); } Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Mean / (1.0 - sample1.Y.Mean)); Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Variance); FitResult result1 = sample1.LinearLogisticRegression(); FitResult result2 = sample2.TwoColumns(0, 1).LinearLogisticRegression(); FitResult result3 = sample2.LogisticLinearRegression(1); for (int i = 0; i < result1.Dimension; i++) { Console.WriteLine("{0} {1} {2}", i, result1.Parameter(i), result3.Parameter(i) ); } }
public void MultivariateMoments() { // create a random sample MultivariateSample M = new MultivariateSample(3); Distribution d0 = new NormalDistribution(); Distribution d1 = new ExponentialDistribution(); Distribution d2 = new UniformDistribution(); Random rng = new Random(1); int n = 10; for (int i = 0; i < n; i++) { M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng)); } // test that moments agree for (int i = 0; i < 3; i++) { int[] p = new int[3]; p[i] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p))); p[i] = 2; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p))); for (int j = 0; j < i; j++) { int[] q = new int[3]; q[i] = 1; q[j] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q))); } } }