public static Tuple<Point, Vector> Compute(Point[] points)
        {
            var avgX = points.Select(p => p.X).Average();
            var avgY = points.Select(p => p.Y).Average();

            var shifted = points.Select(p => p - new Vector(avgX, avgY));
            var mvSample = new MultivariateSample(2);
            foreach (var p in shifted)
                mvSample.Add(p.X, p.Y);

            var pca = mvSample.PrincipalComponentAnalysis();
            var firstComponentVector = pca.Component(0).NormalizedVector();

            return Tuple.Create(
                new Point(avgX, avgY),
                new Vector(firstComponentVector[0], firstComponentVector[1]));
        }
Example #2
0
 public void Bug6391()
 {
     // this simple PCA caused a NonConvergenceException
     var mvSample = new MultivariateSample(2);
     mvSample.Add(0, 1);
     mvSample.Add(0, -1);
     var pca = mvSample.PrincipalComponentAnalysis();
 }
        public void PC()
        {
            Random rng = new Random(1);
            double s = 1.0 / Math.Sqrt(2.0);

            MultivariateSample MS = new MultivariateSample(2);
            RectangularMatrix R = new RectangularMatrix(1000, 2);
            for (int i = 0; i < 1000; i++) {
                double r1 = 2.0 * rng.NextDouble() - 1.0;
                double r2 = 2.0 * rng.NextDouble() - 1.0;
                double x = r1 * 4.0 * s - r2 * 9.0 * s;
                double y = r1 * 4.0 * s + r2 * 9.0 * s;
                R[i, 0] = x; R[i, 1] = y;
                MS.Add(x, y);
            }

            Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance);
            Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance);

            Console.WriteLine("SVD");

            SingularValueDecomposition SVD = R.SingularValueDecomposition();
            for (int i = 0; i < SVD.Dimension; i++) {
                Console.WriteLine("{0} {1}", i, SVD.SingularValue(i));
                ColumnVector v = SVD.RightSingularVector(i);
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            Console.WriteLine("PCA");

            PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis();
            Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count);
            for (int i = 0; i < PCA.Dimension; i++) {
                PrincipalComponent PC = PCA.Component(i);
                Console.WriteLine("  {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction);
                RowVector v = PC.NormalizedVector();
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            // reconstruct
            SquareMatrix U = SVD.LeftTransformMatrix();
            SquareMatrix V = SVD.RightTransformMatrix();
            double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]);
            double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]);
            double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1];
            Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]);
            double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1];
            Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]);

            ColumnVector d1 = U[0,0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]);
            ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);
            Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]);

            Console.WriteLine("compare");
            MultivariateSample RS = PCA.TransformedSample();
            IEnumerator<double[]> RSE = RS.GetEnumerator();
            RSE.MoveNext();
            double[] dv1 = RSE.Current;
            Console.WriteLine("{0} {1}", dv1[0], dv1[1]);
            Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]);
            RSE.Dispose();
        }
        public void PrincipalComponentAnalysis()
        {
            int D = 3;
            int N = 10;

            // construct a sample
            Random rng = new Random(1);
            MultivariateSample sample = new MultivariateSample(D);
            for (int i = 0; i < N; i++) {
                double x = 1.0 * rng.NextDouble() - 1.0;
                double y = 4.0 * rng.NextDouble() - 2.0;
                double z = 9.0 * rng.NextDouble() - 3.0;
                sample.Add(x, y, z);
            }

            // get its column means
            RowVector mu = new RowVector(D);
            for (int i = 0; i < D; i++) {
                mu[i] = sample.Column(i).Mean;
            }

            // get total variance
            double tVariance = GetTotalVariance(sample);
            Console.WriteLine(tVariance);

            // do a principal component analysis
            PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis();
            Assert.IsTrue(pca.Dimension == sample.Dimension);
            Assert.IsTrue(pca.Count == sample.Count);

            // check that the PCs behave as expected
            for (int i = 0; i < pca.Dimension; i++) {
                PrincipalComponent pc = pca.Component(i);
                Assert.IsTrue(pc.Index == i);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector()));
                Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0));
                if (i == 0) {
                    Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction);
                } else {
                    PrincipalComponent ppc = pca.Component(i - 1);
                    Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction);
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction));
                }
            }

            // express the sample in terms of principal components
            MultivariateSample csample = pca.TransformedSample();

            // check that the explained variances are as claimed
            for (int rD = 1; rD <= D; rD++) {
                MultivariateSample rSample = new MultivariateSample(D);
                foreach (double[] cEntry in csample) {
                    RowVector x = mu.Copy();
                    for (int i = 0; i < rD; i++) {
                        PrincipalComponent pc = pca.Component(i);
                        x += (cEntry[i] * pc.Weight) * pc.NormalizedVector();
                    }
                    rSample.Add(x);
                }
                double rVariance = GetTotalVariance(rSample);
                Console.WriteLine("{0} {1}", rD, rVariance);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD-1).CumulativeVarianceFraction));
            }
        }