public void SamplePopulationMomentEstimateVariances() { Distribution d = new LognormalDistribution(); // for various sample sizes... foreach (int n in TestUtilities.GenerateIntegerValues(4, 32, 8)) { Console.WriteLine("n={0}", n); // we are going to store values for a bunch of estimators and their uncertainties MultivariateSample estimates = new MultivariateSample("M1", "C2", "C3", "C4"); MultivariateSample variances = new MultivariateSample("M1", "C2", "C3", "C4"); // create a bunch of samples for (int i = 0; i < 256; i++) { Sample s = TestUtilities.CreateSample(d, n, 512 * n + i + 1); UncertainValue M1 = s.PopulationMean; UncertainValue C2 = s.PopulationVariance; UncertainValue C3 = s.PopulationMomentAboutMean(3); UncertainValue C4 = s.PopulationMomentAboutMean(4); estimates.Add(M1.Value, C2.Value, C3.Value, C4.Value); variances.Add(MoreMath.Sqr(M1.Uncertainty), MoreMath.Sqr(C2.Uncertainty), MoreMath.Sqr(C3.Uncertainty), MoreMath.Sqr(C4.Uncertainty)); } // the claimed variance should agree with the measured variance of the estimators for (int c = 0; c < estimates.Dimension; c++) { Console.WriteLine("{0} {1} {2}", estimates.Column(c).Name, estimates.Column(c).PopulationVariance, variances.Column(c).Mean); Assert.IsTrue(estimates.Column(c).PopulationVariance.ConfidenceInterval(0.95).ClosedContains(variances.Column(c).Mean)); } } }
public void BivariatePolynomialRegression() { // do a set of polynomial regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed Random rng = new Random(271828); // define logistic parameters double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 }; // keep track of sample of returned a and b fit parameters MultivariateSample A = new MultivariateSample(a.Length); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them SymmetricMatrix C = new SymmetricMatrix(a.Length); // also keep track of test statistics Sample F = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new CauchyDistribution(); Distribution nd = new NormalDistribution(0.0, 4.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int j = 0; j < 20; j++) { double x = xd.GetRandomValue(rng); double y = nd.GetRandomValue(rng); for (int i = 0; i < a.Length; i++) { y += a[i] * MoreMath.Pow(x, i); } s.Add(x, y); } // do the regression FitResult r = s.PolynomialRegression(a.Length - 1); ColumnVector ps = r.Parameters; //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]); // record best fit parameters A.Add(ps); // record estimated covariances C += r.CovarianceMatrix; // record the fit statistic F.Add(r.GoodnessOfFit.Statistic); //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } C = (1.0 / A.Count) * C; // allow matrix division by real numbers // check that mean parameter estimates are what they should be: the underlying population parameters for (int i = 0; i < A.Dimension; i++) { Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]); Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i])); } // check that parameter covarainces are what they should be: the reported covariance estimates for (int i = 0; i < A.Dimension; i++) { for (int j = i; j < A.Dimension; j++) { Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance); Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j])); } } // check that F is distributed as it should be //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void WeibullFitUncertainties() { // check that the uncertainty in reported fit parameters is actually meaningful // it should be the standard deviation of fit parameter values in a sample of many fits // define a population distribution Distribution distribution = new WeibullDistribution(2.5, 1.5); // draw a lot of samples from it; fit each sample and // record the reported parameter value and error of each BivariateSample values = new BivariateSample(); MultivariateSample uncertainties = new MultivariateSample(3); for (int i = 0; i < 50; i++) { Sample sample = CreateSample(distribution, 10, i); FitResult fit = WeibullDistribution.FitToSample(sample); UncertainValue a = fit.Parameter(0); UncertainValue b = fit.Parameter(1); values.Add(a.Value, b.Value); uncertainties.Add(a.Uncertainty, b.Uncertainty, fit.Covariance(0,1)); } // the reported errors should agree with the standard deviation of the reported parameters Assert.IsTrue(values.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(0).Mean)); Assert.IsTrue(values.Y.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(1).Mean)); //Console.WriteLine("{0} {1}", values.PopulationCovariance, uncertainties.Column(2).Mean); //Assert.IsTrue(values.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(2).Mean)); }
public void PC() { Random rng = new Random(1); double s = 1.0 / Math.Sqrt(2.0); MultivariateSample MS = new MultivariateSample(2); RectangularMatrix R = new RectangularMatrix(1000, 2); for (int i = 0; i < 1000; i++) { double r1 = 2.0 * rng.NextDouble() - 1.0; double r2 = 2.0 * rng.NextDouble() - 1.0; double x = r1 * 4.0 * s - r2 * 9.0 * s; double y = r1 * 4.0 * s + r2 * 9.0 * s; R[i, 0] = x; R[i, 1] = y; MS.Add(x, y); } Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance); Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance); Console.WriteLine("SVD"); SingularValueDecomposition SVD = R.SingularValueDecomposition(); for (int i = 0; i < SVD.Dimension; i++) { Console.WriteLine("{0} {1}", i, SVD.SingularValue(i)); ColumnVector v = SVD.RightSingularVector(i); Console.WriteLine(" {0} {1}", v[0], v[1]); } Console.WriteLine("PCA"); PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis(); Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count); for (int i = 0; i < PCA.Dimension; i++) { PrincipalComponent PC = PCA.Component(i); Console.WriteLine(" {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction); RowVector v = PC.NormalizedVector(); Console.WriteLine(" {0} {1}", v[0], v[1]); } // reconstruct SquareMatrix U = SVD.LeftTransformMatrix(); SquareMatrix V = SVD.RightTransformMatrix(); double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]); double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]); double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]); double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]); ColumnVector d1 = U[0,0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]); ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]); Console.WriteLine("compare"); MultivariateSample RS = PCA.TransformedSample(); IEnumerator<double[]> RSE = RS.GetEnumerator(); RSE.MoveNext(); double[] dv1 = RSE.Current; Console.WriteLine("{0} {1}", dv1[0], dv1[1]); Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]); RSE.Dispose(); }
private double GetTotalVariance(MultivariateSample sample) { double total = 0.0; for (int i = 0; i < sample.Dimension; i++) { total += sample.Column(i).Variance; } return (total); }
public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Component(i); Assert.IsTrue(pc.Index == i); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector())); Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); if (i == 0) { Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction); } else { PrincipalComponent ppc = pca.Component(i - 1); Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction); Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction)); } } // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Component(i); x += (cEntry[i] * pc.Weight) * pc.NormalizedVector(); } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD-1).CumulativeVarianceFraction)); } }
public void MultivariateMoments() { // create a random sample MultivariateSample M = new MultivariateSample(3); Distribution d0 = new NormalDistribution(); Distribution d1 = new ExponentialDistribution(); Distribution d2 = new UniformDistribution(); Random rng = new Random(1); int n = 10; for (int i = 0; i < n; i++) { M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng)); } // test that moments agree for (int i = 0; i < 3; i++) { int[] p = new int[3]; p[i] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p))); p[i] = 2; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p))); for (int j = 0; j < i; j++) { int[] q = new int[3]; q[i] = 1; q[j] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q))); } } }