public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Component(i); Assert.IsTrue(pc.Index == i); Assert.IsTrue(pc.Analysis == pca); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector())); Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); if (i == 0) { Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction); } else { PrincipalComponent ppc = pca.Component(i - 1); Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction); Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction)); } } // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Component(i); x += (cEntry[i] * pc.Weight) * pc.NormalizedVector(); } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD - 1).CumulativeVarianceFraction)); } }
public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected Assert.IsTrue(pca.Components.Count == pca.Dimension); for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Components[i]; Assert.IsTrue(pc.Index == i); Assert.IsTrue(pc.Analysis == pca); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector, pc.ScaledVector())); Assert.IsTrue(pca.MinimumDimension(pc.CumulativeVarianceFraction) == i + 1); } // Check enumerator, and verify that variance fractions behave as expected. int count = 0; double cumulative = 0.0; double previous = Double.PositiveInfinity; foreach (PrincipalComponent pc in pca.Components) { Assert.IsTrue(pc.Index == count); count++; Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); Assert.IsTrue(pc.VarianceFraction <= previous); previous = pc.VarianceFraction; cumulative += pc.VarianceFraction; Assert.IsTrue(TestUtilities.IsNearlyEqual(cumulative, pc.CumulativeVarianceFraction)); } Assert.IsTrue(count == pca.Components.Count); // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Components[i]; x += (cEntry[i] * pc.Weight) * pc.NormalizedVector; } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Components[rD - 1].CumulativeVarianceFraction)); } }