public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Component(i); Assert.IsTrue(pc.Index == i); Assert.IsTrue(pc.Analysis == pca); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector())); Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); if (i == 0) { Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction); } else { PrincipalComponent ppc = pca.Component(i - 1); Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction); Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction)); } } // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Component(i); x += (cEntry[i] * pc.Weight) * pc.NormalizedVector(); } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD - 1).CumulativeVarianceFraction)); } }
public void PC() { Random rng = new Random(1); double s = 1.0 / Math.Sqrt(2.0); MultivariateSample MS = new MultivariateSample(2); RectangularMatrix R = new RectangularMatrix(1000, 2); for (int i = 0; i < 1000; i++) { double r1 = 2.0 * rng.NextDouble() - 1.0; double r2 = 2.0 * rng.NextDouble() - 1.0; double x = r1 * 4.0 * s - r2 * 9.0 * s; double y = r1 * 4.0 * s + r2 * 9.0 * s; R[i, 0] = x; R[i, 1] = y; MS.Add(x, y); } Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance); Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance); Console.WriteLine("SVD"); SingularValueDecomposition SVD = R.SingularValueDecomposition(); for (int i = 0; i < SVD.Dimension; i++) { Console.WriteLine("{0} {1}", i, SVD.SingularValue(i)); ColumnVector v = SVD.RightSingularVector(i); Console.WriteLine(" {0} {1}", v[0], v[1]); } Console.WriteLine("PCA"); PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis(); Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count); for (int i = 0; i < PCA.Dimension; i++) { PrincipalComponent PC = PCA.Component(i); Console.WriteLine(" {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction); RowVector v = PC.NormalizedVector(); Console.WriteLine(" {0} {1}", v[0], v[1]); } // reconstruct SquareMatrix U = SVD.LeftTransformMatrix(); SquareMatrix V = SVD.RightTransformMatrix(); double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]); double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]); double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]); double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]); ColumnVector d1 = U[0, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]); ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]); Console.WriteLine("compare"); MultivariateSample RS = PCA.TransformedSample(); IEnumerator <double[]> RSE = RS.GetEnumerator(); RSE.MoveNext(); double[] dv1 = RSE.Current; Console.WriteLine("{0} {1}", dv1[0], dv1[1]); Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]); RSE.Dispose(); }