Exemplo n.º 1
0
        public void SamplePopulationMomentEstimateVariances()
        {
            Distribution d = new LognormalDistribution();

            // for various sample sizes...
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 32, 8)) {

                Console.WriteLine("n={0}", n);

                // we are going to store values for a bunch of estimators and their uncertainties
                MultivariateSample estimates = new MultivariateSample("M1", "C2", "C3", "C4");
                MultivariateSample variances = new MultivariateSample("M1", "C2", "C3", "C4");

                // create a bunch of samples
                for (int i = 0; i < 256; i++) {

                    Sample s = TestUtilities.CreateSample(d, n, 512 * n + i + 1);

                    UncertainValue M1 = s.PopulationMean;
                    UncertainValue C2 = s.PopulationVariance;
                    UncertainValue C3 = s.PopulationMomentAboutMean(3);
                    UncertainValue C4 = s.PopulationMomentAboutMean(4);
                    estimates.Add(M1.Value, C2.Value, C3.Value, C4.Value);
                    variances.Add(MoreMath.Sqr(M1.Uncertainty), MoreMath.Sqr(C2.Uncertainty), MoreMath.Sqr(C3.Uncertainty), MoreMath.Sqr(C4.Uncertainty));

                }

                // the claimed variance should agree with the measured variance of the estimators
                for (int c = 0; c < estimates.Dimension; c++) {
                    Console.WriteLine("{0} {1} {2}", estimates.Column(c).Name, estimates.Column(c).PopulationVariance, variances.Column(c).Mean);
                    Assert.IsTrue(estimates.Column(c).PopulationVariance.ConfidenceInterval(0.95).ClosedContains(variances.Column(c).Mean));
                }

            }
        }
        public void BivariatePolynomialRegression()
        {
            // do a set of polynomial regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed

            Random rng = new Random(271828);

            // define logistic parameters
            double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 };

            // keep track of sample of returned a and b fit parameters
            MultivariateSample A = new MultivariateSample(a.Length);

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            SymmetricMatrix C = new SymmetricMatrix(a.Length);

            // also keep track of test statistics
            Sample F = new Sample();

            // do 100 fits
            for (int k = 0; k < 100; k++) {

                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                Distribution xd = new CauchyDistribution();
                Distribution nd = new NormalDistribution(0.0, 4.0);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int j = 0; j < 20; j++) {
                    double x = xd.GetRandomValue(rng);
                    double y = nd.GetRandomValue(rng);
                    for (int i = 0; i < a.Length; i++) {
                        y += a[i] * MoreMath.Pow(x, i);
                    }
                    s.Add(x, y);
                }

                // do the regression
                FitResult r = s.PolynomialRegression(a.Length - 1);

                ColumnVector ps = r.Parameters;
                //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]);

                // record best fit parameters
                A.Add(ps);

                // record estimated covariances
                C += r.CovarianceMatrix;

                // record the fit statistic
                F.Add(r.GoodnessOfFit.Statistic);
                //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic);

            }

            C = (1.0 / A.Count) * C; // allow matrix division by real numbers

            // check that mean parameter estimates are what they should be: the underlying population parameters
            for (int i = 0; i < A.Dimension; i++) {
                Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]);
                Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i]));
            }

            // check that parameter covarainces are what they should be: the reported covariance estimates
            for (int i = 0; i < A.Dimension; i++) {
                for (int j = i; j < A.Dimension; j++) {
                    Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance);
                    Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j]));
                }
            }

            // check that F is distributed as it should be
            //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability);
        }
Exemplo n.º 3
0
        public void WeibullFitUncertainties()
        {
            // check that the uncertainty in reported fit parameters is actually meaningful
            // it should be the standard deviation of fit parameter values in a sample of many fits

            // define a population distribution
            Distribution distribution = new WeibullDistribution(2.5, 1.5);

            // draw a lot of samples from it; fit each sample and
            // record the reported parameter value and error of each
            BivariateSample values = new BivariateSample();
            MultivariateSample uncertainties = new MultivariateSample(3);
            for (int i = 0; i < 50; i++) {
                Sample sample = CreateSample(distribution, 10, i);
                FitResult fit = WeibullDistribution.FitToSample(sample);
                UncertainValue a = fit.Parameter(0);
                UncertainValue b = fit.Parameter(1);
                values.Add(a.Value, b.Value);
                uncertainties.Add(a.Uncertainty, b.Uncertainty, fit.Covariance(0,1));
            }

            // the reported errors should agree with the standard deviation of the reported parameters
            Assert.IsTrue(values.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(0).Mean));
            Assert.IsTrue(values.Y.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(1).Mean));
            //Console.WriteLine("{0} {1}", values.PopulationCovariance, uncertainties.Column(2).Mean);
            //Assert.IsTrue(values.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(2).Mean));
        }
        public void PC()
        {
            Random rng = new Random(1);
            double s = 1.0 / Math.Sqrt(2.0);

            MultivariateSample MS = new MultivariateSample(2);
            RectangularMatrix R = new RectangularMatrix(1000, 2);
            for (int i = 0; i < 1000; i++) {
                double r1 = 2.0 * rng.NextDouble() - 1.0;
                double r2 = 2.0 * rng.NextDouble() - 1.0;
                double x = r1 * 4.0 * s - r2 * 9.0 * s;
                double y = r1 * 4.0 * s + r2 * 9.0 * s;
                R[i, 0] = x; R[i, 1] = y;
                MS.Add(x, y);
            }

            Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance);
            Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance);

            Console.WriteLine("SVD");

            SingularValueDecomposition SVD = R.SingularValueDecomposition();
            for (int i = 0; i < SVD.Dimension; i++) {
                Console.WriteLine("{0} {1}", i, SVD.SingularValue(i));
                ColumnVector v = SVD.RightSingularVector(i);
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            Console.WriteLine("PCA");

            PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis();
            Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count);
            for (int i = 0; i < PCA.Dimension; i++) {
                PrincipalComponent PC = PCA.Component(i);
                Console.WriteLine("  {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction);
                RowVector v = PC.NormalizedVector();
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            // reconstruct
            SquareMatrix U = SVD.LeftTransformMatrix();
            SquareMatrix V = SVD.RightTransformMatrix();
            double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]);
            double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]);
            double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]);
            double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]);

            ColumnVector d1 = U[0,0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]);
            ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]);

            Console.WriteLine("compare");
            MultivariateSample RS = PCA.TransformedSample();
            IEnumerator<double[]> RSE = RS.GetEnumerator();
            RSE.MoveNext();
            double[] dv1 = RSE.Current;
            Console.WriteLine("{0} {1}", dv1[0], dv1[1]);
            Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]);
            RSE.Dispose();
        }
 private double GetTotalVariance(MultivariateSample sample)
 {
     double total = 0.0;
     for (int i = 0; i < sample.Dimension; i++) {
         total += sample.Column(i).Variance;
     }
     return (total);
 }
        public void PrincipalComponentAnalysis()
        {
            int D = 3;
            int N = 10;

            // construct a sample
            Random rng = new Random(1);
            MultivariateSample sample = new MultivariateSample(D);
            for (int i = 0; i < N; i++) {
                double x = 1.0 * rng.NextDouble() - 1.0;
                double y = 4.0 * rng.NextDouble() - 2.0;
                double z = 9.0 * rng.NextDouble() - 3.0;
                sample.Add(x, y, z);
            }

            // get its column means
            RowVector mu = new RowVector(D);
            for (int i = 0; i < D; i++) {
                mu[i] = sample.Column(i).Mean;
            }

            // get total variance
            double tVariance = GetTotalVariance(sample);
            Console.WriteLine(tVariance);

            // do a principal component analysis
            PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis();
            Assert.IsTrue(pca.Dimension == sample.Dimension);
            Assert.IsTrue(pca.Count == sample.Count);

            // check that the PCs behave as expected
            for (int i = 0; i < pca.Dimension; i++) {
                PrincipalComponent pc = pca.Component(i);
                Assert.IsTrue(pc.Index == i);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector()));
                Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0));
                if (i == 0) {
                    Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction);
                } else {
                    PrincipalComponent ppc = pca.Component(i - 1);
                    Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction);
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction));
                }
            }

            // express the sample in terms of principal components
            MultivariateSample csample = pca.TransformedSample();

            // check that the explained variances are as claimed
            for (int rD = 1; rD <= D; rD++) {
                MultivariateSample rSample = new MultivariateSample(D);
                foreach (double[] cEntry in csample) {
                    RowVector x = mu.Copy();
                    for (int i = 0; i < rD; i++) {
                        PrincipalComponent pc = pca.Component(i);
                        x += (cEntry[i] * pc.Weight) * pc.NormalizedVector();
                    }
                    rSample.Add(x);
                }
                double rVariance = GetTotalVariance(rSample);
                Console.WriteLine("{0} {1}", rD, rVariance);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD-1).CumulativeVarianceFraction));
            }
        }
        public void MultivariateMoments()
        {
            // create a random sample
            MultivariateSample M = new MultivariateSample(3);
            Distribution d0 = new NormalDistribution();
            Distribution d1 = new ExponentialDistribution();
            Distribution d2 = new UniformDistribution();
            Random rng = new Random(1);
            int n = 10;
            for (int i = 0; i < n; i++) {
                M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng));
            }

            // test that moments agree
            for (int i = 0; i < 3; i++) {
                int[] p = new int[3];
                p[i] = 1;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p)));
                p[i] = 2;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p)));
                for (int j = 0; j < i; j++) {
                    int[] q = new int[3];
                    q[i] = 1;
                    q[j] = 1;
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q)));
                }
            }
        }