コード例 #1
0
        //Protected methods (called by the public methods in parent class)
        protected override void DoTrain(Bitmap[] charImgs)
        {
            double[][] input = convertDataToPCAInputFormat(charImgs);

            pca = new PrincipalComponentAnalysis(input, AnalysisMethod.Center);
            pca.Compute();
        }
コード例 #2
0
        private PrincipalComponentAnalysis computePCA(double[][][] sequences)
        {
            PrincipalComponentAnalysis pca;
            // Create combined array for computing PCA

            int numTotalRows = sequences.Select(e => e.GetLength(0)).Sum();

            double[][] pcaCombined = new double[numTotalRows][];
            int total = 0;
            for (int i = 0; i < sequences.GetLength(0); i++)
            {
                for (int j = 0; j < sequences[i].GetLength(0); j++)
                {
                    pcaCombined[total + j] = sequences[i][j];
                }
                total += sequences[i].GetLength(0);
            }

            // PCA
            double[,] pcaCombinedMulti = jaggedToMulti(pcaCombined);
            pca = new PrincipalComponentAnalysis(pcaCombinedMulti);
            pca.Compute();

            return pca;
        }
コード例 #3
0
 public static double[,] PCA(double[,] sourceMatrix)
 {
     // Creates the Principal Component Analysis of the given source
     var pca = new PrincipalComponentAnalysis(sourceMatrix, AnalysisMethod.Center);
     // Compute the Principal Component Analysis
     pca.Compute();
     var length1 = sourceMatrix.GetLength(0);
     var length2 = sourceMatrix.GetLength(1);
        // int sercount = rawSeries.Count();
     // Creates a projection considering 80% of the information
        // pca.Transform(sourceMatrix, 0.8f, true);
     return pca.Transform(sourceMatrix, length1);
 }
コード例 #4
0
ファイル: MainForm.cs プロジェクト: accord-net/framework
        void btnCompute_Click(object sender, EventArgs e)
        {
            dataGridView2.Rows.Clear();


            // Extract feature vectors
            double[][] hands = extract();

            // Create a new Principal Component Analysis object
            pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Center,
                ExplainedVariance = 0.95
            };

            // Compute it
            pca.Learn(hands);

            // Now we will plot the Eigenvectors as images
            ArrayToImage reverse = new ArrayToImage(32, 32);


            // For each Principal Component
            for (int i = 0; i < pca.Components.Count; i++)
            {
                // We will extract its Eigenvector
                double[] vector = pca.Components[i].Eigenvector;

                // Normalize its values
                reverse.Max = vector.Max();
                reverse.Min = vector.Min();

                // Then arrange each vector value as if it was a pixel
                Bitmap eigenHand; reverse.Convert(vector, out eigenHand);

                // This will give the Eigenhands
                dataGridView2.Rows.Add(eigenHand, pca.Components[i].Proportion);
            }

            // Populate components overview with analysis data
            dgvPrincipalComponents.DataSource = pca.Components;
            distributionView.DataSource = pca.Components;
            cumulativeView.DataSource = pca.Components;

            btnCreateProjection.Enabled = true;
        }
コード例 #5
0
        public void transform_more_columns_than_samples_new_interface()
        {
            // Lindsay's tutorial data
            var datat = data.Transpose().ToJagged();

            var target = new PrincipalComponentAnalysis();

            // Compute
            var regression = target.Learn(datat);

            // Transform
            double[][] actual = target.Transform(datat);

            // Assert the scores equals the transformation of the input
            Assert.IsNull(target.Result);

            double[,] expected = 
            {
                {  0.50497524691810358, -0.00000000000000044408920985006262 },
                { -0.504975246918104,   -0.00000000000000035735303605122226 }
            };

            Assert.IsTrue(Matrix.IsEqual(expected, actual, 0.01));

            actual = target.Transform(datat);
            Assert.IsTrue(Matrix.IsEqual(expected, actual, 0.01));
        }
コード例 #6
0
        public void ConstructorTest2()
        {
            // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
            // using the paper's original method. The tutorial can be found online
            // at http://www.sccg.sk/~haladova/principal_components.pdf

            // Step 1. Get some data
            // ---------------------

            double[,] data =
            {
                { 2.5, 2.4 },
                { 0.5, 0.7 },
                { 2.2, 2.9 },
                { 1.9, 2.2 },
                { 3.1, 3.0 },
                { 2.3, 2.7 },
                { 2.0, 1.6 },
                { 1.0, 1.1 },
                { 1.5, 1.6 },
                { 1.1, 0.9 }
            };


            // Step 2. Subtract the mean
            // -------------------------
            //   Note: The framework does this automatically
            //   when computing the covariance matrix. In this
            //   step we will only compute the mean vector.

            double[] mean = Accord.Statistics.Tools.Mean(data);


            // Step 3. Compute the covariance matrix
            // -------------------------------------

            double[,] covariance = Accord.Statistics.Tools.Covariance(data, mean);

            // Create the analysis using the covariance matrix
            var pca = PrincipalComponentAnalysis.FromCovarianceMatrix(mean, covariance);

            // Compute it
            pca.Compute();


            // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
            //--------------------------------------------------------------------------

            // Those are the expected eigenvalues, in descending order:
            double[] eigenvalues = { 1.28402771, 0.0490833989 };

            // And this will be their proportion:
            double[] proportion = eigenvalues.Divide(eigenvalues.Sum());

            // Those are the expected eigenvectors,
            // in descending order of eigenvalues:
            double[,] eigenvectors =
            {
                { -0.677873399, -0.735178656 },
                { -0.735178656,  0.677873399 }
            };

            // Now, here is the place most users get confused. The fact is that
            // the Eigenvalue decomposition (EVD) is not unique, and both the SVD
            // and EVD routines used by the framework produces results which are
            // numerically different from packages such as STATA or MATLAB, but
            // those are correct.

            // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
            // a being a scalar) will also be an eigenvector. In the Lindsay case, the
            // framework produces a first eigenvector with inverted signs. This is the same
            // as considering a=-1 and taking a*v. The result is still correct.

            // Retrieve the first expected eigenvector
            double[] v = eigenvectors.GetColumn(0);

            // Multiply by a scalar and store it back
            eigenvectors.SetColumn(0, v.Multiply(-1));

            // Everything is alright (up to the 9 decimal places shown in the tutorial)
            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-8));


            // Step 5. Deriving the new data set
            // ---------------------------------

            double[,] actual = pca.Transform(data);

            // transformedData shown in pg. 18
            double[,] expected = new double[, ]
            {
                { 0.827970186, -0.175115307 },
                { -1.77758033, 0.142857227 },
                { 0.992197494, 0.384374989 },
                { 0.274210416, 0.130417207 },
                { 1.67580142, -0.209498461 },
                { 0.912949103, 0.175282444 },
                { -0.099109437, -0.349824698 },
                { -1.14457216, 0.046417258 },
                { -0.438046137, 0.017764629 },
                { -1.22382056, -0.162675287 },
            };

            // Everything is correct (up to 8 decimal places)
            Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8));
        }
コード例 #7
0
        public void learn_whiten_success()
        {
            #region doc_learn_1
            // Below is the same data used on the excellent paper "Tutorial
            //   On Principal Component Analysis", by Lindsay Smith (2002).
            double[][] data = 
            {
                new double[] { 2.5,  2.4 },
                new double[] { 0.5,  0.7 },
                new double[] { 2.2,  2.9 },
                new double[] { 1.9,  2.2 },
                new double[] { 3.1,  3.0 },
                new double[] { 2.3,  2.7 },
                new double[] { 2.0,  1.6 },
                new double[] { 1.0,  1.1 },
                new double[] { 1.5,  1.6 },
                new double[] { 1.1,  0.9 }
            };

            // Let's create an analysis with centering (covariance method)
            // but no standardization (correlation method) and whitening:
            var pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Center,
                Whiten = true
            };

            // Now we can learn the linear projection from the data
            MultivariateLinearRegression transform = pca.Learn(data);

            // Finally, we can project all the data
            double[][] output1 = pca.Transform(data);

            // Or just its first components by setting 
            // NumberOfOutputs to the desired components:
            pca.NumberOfOutputs = 1;

            // And then calling transform again:
            double[][] output2 = pca.Transform(data);

            // We can also limit to 80% of explained variance:
            pca.ExplainedVariance = 0.8;

            // And then call transform again:
            double[][] output3 = pca.Transform(data);
            #endregion

            double[] eigenvalues = { 1.28402771, 0.0490833989 };
            double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
            double[,] eigenvectors =
            {
                { 0.19940687993951403, -1.1061252858739095 },
                { 0.21626410214440508,  1.0199057073792104 }
            };

            // Everything is alright (up to the 9 decimal places shown in the tutorial)
            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5));

            pca.ExplainedVariance = 1.0;
            double[][] actual = pca.Transform(data);

            double[][] expected = 
            {
                new double[] {  0.243560157209023,  -0.263472650637184  },
                new double[] { -0.522902576315494,   0.214938218565977  },
                new double[] {  0.291870144299372,   0.578317788814594  },
                new double[] {  0.0806632088164338,  0.19622137941132   },
                new double[] {  0.492962746459375,  -0.315204397734004  },
                new double[] {  0.268558011864442,   0.263724118751361  },
                new double[] { -0.0291545644762578, -0.526334573603598  },
                new double[] { -0.336693495487974,   0.0698378585807067 },
                new double[] { -0.128858004446015,   0.0267280693333571 },
                new double[] { -0.360005627922904,  -0.244755811482527  } 
            };

            // var str = actual.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // Everything is correct (up to 8 decimal places)
            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
            Assert.IsTrue(expected.IsEqual(output1, atol: 1e-8));
            Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output2, atol: 1e-8));
            Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output3, atol: 1e-8));

            actual = transform.Transform(data);
            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
        }
コード例 #8
0
        public void PC()
        {
            Random rng = new Random(1);
            double s   = 1.0 / Math.Sqrt(2.0);

            MultivariateSample MS = new MultivariateSample(2);
            RectangularMatrix  R  = new RectangularMatrix(1000, 2);

            for (int i = 0; i < 1000; i++)
            {
                double r1 = 2.0 * rng.NextDouble() - 1.0;
                double r2 = 2.0 * rng.NextDouble() - 1.0;
                double x  = r1 * 4.0 * s - r2 * 9.0 * s;
                double y  = r1 * 4.0 * s + r2 * 9.0 * s;
                R[i, 0] = x; R[i, 1] = y;
                MS.Add(x, y);
            }

            Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance);
            Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance);

            Console.WriteLine("SVD");

            SingularValueDecomposition SVD = R.SingularValueDecomposition();

            for (int i = 0; i < SVD.Dimension; i++)
            {
                Console.WriteLine("{0} {1}", i, SVD.SingularValue(i));
                ColumnVector v = SVD.RightSingularVector(i);
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            Console.WriteLine("PCA");

            PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis();

            Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count);
            for (int i = 0; i < PCA.Dimension; i++)
            {
                PrincipalComponent PC = PCA.Component(i);
                Console.WriteLine("  {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction);
                RowVector v = PC.NormalizedVector();
                Console.WriteLine("  {0} {1}", v[0], v[1]);
            }

            // reconstruct
            SquareMatrix U  = SVD.LeftTransformMatrix();
            SquareMatrix V  = SVD.RightTransformMatrix();
            double       x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1];

            Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]);
            double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1];

            Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]);
            double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1];

            Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]);
            double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1];

            Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]);

            ColumnVector d1 = U[0, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                              U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);

            Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]);
            ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) +
                                U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1);

            Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]);

            Console.WriteLine("compare");
            MultivariateSample     RS  = PCA.TransformedSample();
            IEnumerator <double[]> RSE = RS.GetEnumerator();

            RSE.MoveNext();
            double[] dv1 = RSE.Current;
            Console.WriteLine("{0} {1}", dv1[0], dv1[1]);
            Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]);
            RSE.Dispose();
        }
コード例 #9
0
        public void PrincipalComponentAnalysis()
        {
            int D = 3;
            int N = 10;

            // construct a sample
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample(D);

            for (int i = 0; i < N; i++)
            {
                double x = 1.0 * rng.NextDouble() - 1.0;
                double y = 4.0 * rng.NextDouble() - 2.0;
                double z = 9.0 * rng.NextDouble() - 3.0;
                sample.Add(x, y, z);
            }

            // get its column means
            RowVector mu = new RowVector(D);

            for (int i = 0; i < D; i++)
            {
                mu[i] = sample.Column(i).Mean;
            }

            // get total variance
            double tVariance = GetTotalVariance(sample);

            Console.WriteLine(tVariance);

            // do a principal component analysis
            PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis();

            Assert.IsTrue(pca.Dimension == sample.Dimension);
            Assert.IsTrue(pca.Count == sample.Count);

            // check that the PCs behave as expected
            Assert.IsTrue(pca.Components.Count == pca.Dimension);
            for (int i = 0; i < pca.Dimension; i++)
            {
                PrincipalComponent pc = pca.Components[i];
                Assert.IsTrue(pc.Index == i);
                Assert.IsTrue(pc.Analysis == pca);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector, pc.ScaledVector()));
                Assert.IsTrue(pca.MinimumDimension(pc.CumulativeVarianceFraction) == i + 1);
            }

            // Check enumerator, and verify that variance fractions behave as expected.
            int    count      = 0;
            double cumulative = 0.0;
            double previous   = Double.PositiveInfinity;

            foreach (PrincipalComponent pc in pca.Components)
            {
                Assert.IsTrue(pc.Index == count);
                count++;
                Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0));
                Assert.IsTrue(pc.VarianceFraction <= previous);
                previous    = pc.VarianceFraction;
                cumulative += pc.VarianceFraction;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(cumulative, pc.CumulativeVarianceFraction));
            }
            Assert.IsTrue(count == pca.Components.Count);

            // express the sample in terms of principal components
            MultivariateSample csample = pca.TransformedSample();

            // check that the explained variances are as claimed
            for (int rD = 1; rD <= D; rD++)
            {
                MultivariateSample rSample = new MultivariateSample(D);
                foreach (double[] cEntry in csample)
                {
                    RowVector x = mu.Copy();
                    for (int i = 0; i < rD; i++)
                    {
                        PrincipalComponent pc = pca.Components[i];
                        x += (cEntry[i] * pc.Weight) * pc.NormalizedVector;
                    }
                    rSample.Add(x);
                }
                double rVariance = GetTotalVariance(rSample);
                Console.WriteLine("{0} {1}", rD, rVariance);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Components[rD - 1].CumulativeVarianceFraction));
            }
        }
コード例 #10
0
        public void ExceptionTest()
        {
            double[,] data = 
            {
                { 1, 2 },
                { 5, 2 },
                { 2, 2 },
                { 4, 2 },
            };


            PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize);

            bool thrown = false;

            try { pca.Compute(); }
            catch (ArithmeticException ex)
            {
                ex.ToString();
                thrown = true;
            }

            // Assert that an appropriate exception has been
            //   thrown in the case of a constant variable.
            Assert.IsTrue(thrown);
        }
コード例 #11
0
 private double[][][] getProjectedSequences(double[][][] sequences, PrincipalComponentAnalysis pca)
 {
     int nseqs = sequences.GetLength(0);
     double[][][] projSeqs = new double[nseqs][][];
     for (int i = 0; i < nseqs; i++)
     {
         projSeqs[i] = getProjectedSequence(sequences[i], pca);
     }
     return projSeqs;
 }
コード例 #12
0
        public void covariance_new_interface()
        {
            double[] mean = Measures.Mean(data, dimension: 0);
            double[][] cov = Measures.Covariance(data.ToJagged());

            #region doc_learn_3
            // Create the Principal Component Analysis 
            // specifying the CovarianceMatrix method:
            var pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.CovarianceMatrix,
                Means = mean // pass the original data mean vectors
            };

            // Learn the PCA projection using passing the cov matrix
            MultivariateLinearRegression transform = pca.Learn(cov);

            // Now, we can transform data as usual
            double[,] actual = pca.Transform(data);
            #endregion

            double[,] expected = new double[,]
            {
                {  0.827970186, -0.175115307 },
                { -1.77758033,   0.142857227 },
                {  0.992197494,  0.384374989 },
                {  0.274210416,  0.130417207 },
                {  1.67580142,  -0.209498461 },
                {  0.912949103,  0.175282444 },
                { -0.099109437, -0.349824698 },
                { -1.14457216,   0.046417258 },
                { -0.438046137,  0.017764629 },
                { -1.22382056,  -0.162675287 },
            };

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actual, expected, 0.01));

            // Transform
            double[,] image = pca.Transform(data);

            // Reverse
            double[,] reverse = pca.Revert(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(reverse, data, 1e-5));

            actual = transform.Transform(data.ToJagged()).ToMatrix();
            Assert.IsTrue(Matrix.IsEqual(actual, expected, 1e-5));
        }
コード例 #13
0
        public void TransformTest2()
        {
            // Lindsay's tutorial data
            double[,] datat = data.Transpose();

            PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(datat);

            // Compute
            target.Compute();

            // Transform
            double[,] actual = target.Transform(datat);

            // Assert the scores equals the transformation of the input

            double[,] result = target.Result;
            Assert.IsTrue(Matrix.IsEqual(result, actual, 0.01));
        }
コード例 #14
0
        internal void PCCompute(List<SpikeEvent> spikes)
        {
            // Matrix dimensions
            int numObs = spikes.Count;
            int wavelength = spikes[0].Waveform.Length;

            // Create waveform matrix
            double[,] waveforms = new double[numObs, wavelength];

            for (int i = 0; i < numObs; ++i)
            {
                for (int j = 0; j < wavelength; ++j)
                {
                    waveforms[i, j] = spikes[i].Waveform[j];
                }
            }

            // Make PCA object
            pca = new PrincipalComponentAnalysis(waveforms, AnalysisMethod.Standardize);

            // PC Decomp.
            pca.Compute();

            // Project
            currentProjection = new double[numObs][];
            double[,] tmp = pca.Transform(waveforms);
            for (int i = 0; i < tmp.GetLength(0); ++i)
            {
                currentProjection[i] = new double[projectionDimension];
                for (int j = 0; j < projectionDimension; ++j)
                    currentProjection[i][j] = tmp[i, j];
            }

            //// Create projection matrix
            //double maxPC = double.MinValue;

            //currentProjection = new double[numObs][];
            //for (int i = 0; i < numObs; ++i)
            //{
            //    currentProjection[i] = new double[projectionDimension];

            //    for (int j = 0; j < projectionDimension; ++j)
            //    {
            //        currentProjection[i][j] = pca.ComponentMatrix[i, j];
            //        if (currentProjection[i][j] > maxPC)
            //        {
            //            maxPC = currentProjection[i][j];
            //        }
            //    }
            //}

            //// Normalize projection
            //for (int i = 0; i < numObs; ++i)
            //{
            //    for (int j = 0; j < projectionDimension; ++j)
            //    {
            //        currentProjection[i][j] = 10000 * (currentProjection[i][j] / maxPC);
            //    }
            //}
        }
コード例 #15
0
 public ChannelModel(SerializationInfo info, StreamingContext ctxt)
 {
     this.kVals = (int[])info.GetValue("kVals", typeof(int[]));
     this.logLike = (double[])info.GetValue("logLike", typeof(double[]));
     this.rissanen = (double[])info.GetValue("rissanen", typeof(double[]));
     this.mdl = (double[])info.GetValue("mdl", typeof(double[]));
     this.channelNumber = (int)info.GetValue("channelNumber", typeof(int));
     this.K = (int)info.GetValue("K", typeof(int));
     this.projectionDimension = (int)info.GetValue("projectionDimension", typeof(int));
     this.currentProjection = (double[][])info.GetValue("currentProjection", typeof(double[][]));
     this.maxK = (int)info.GetValue("maxK", typeof(int));
     this.gmm = (GaussianMixtureModel)info.GetValue("gmm", typeof(GaussianMixtureModel));
     this.pca = (PrincipalComponentAnalysis)info.GetValue("pca", typeof(PrincipalComponentAnalysis));
     this.unitStartIndex = (int)info.GetValue("unitStartIndex", typeof(int));
     this.pValue = (double)info.GetValue("pValue",typeof(double));
 }
コード例 #16
0
        static void Main(string[] args)
        {
            //for correct symbol of float point
            System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone();
            customCulture.NumberFormat.NumberDecimalSeparator    = ".";
            System.Threading.Thread.CurrentThread.CurrentCulture = customCulture;

            //This is a program for demonstrating machine
            //learning and classifying the spectrum of light sources using .net


            //read data (If you use linux do not forget to correct the path to the files)
            string    trainCsvFilePath = @"data\train.csv";
            string    testCsvFilePath  = @"data\test.csv";
            DataTable trainTable       = new CsvReader(trainCsvFilePath, true).ToTable();
            DataTable testTable        = new CsvReader(testCsvFilePath, true).ToTable();


            // Convert the DataTable to input and output vectors (train and test)
            int[] trainOutputs = trainTable.Columns["label"].ToArray <int>();
            trainTable.Columns.Remove("label");
            double[][] trainInputs = trainTable.ToJagged <double>();
            int[]      testOutputs = testTable.Columns["label"].ToArray <int>();
            testTable.Columns.Remove("label");
            double[][] testInputs = testTable.ToJagged <double>();

            // training  model SVM classifier
            var teacher = new MulticlassSupportVectorLearning <Gaussian>()
            {
                // Configure the learning algorithm to use SMO to train the
                //  underlying SVMs in each of the binary class subproblems.
                Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                {
                    // Estimate a suitable guess for the Gaussian kernel's parameters.
                    // This estimate can serve as a starting point for a grid search.
                    UseKernelEstimation = true
                }
            };

            // Learn a machine
            var machine = teacher.Learn(trainInputs, trainOutputs);

            // Obtain class predictions for each sample
            int[] predicted = machine.Decide(testInputs);

            // print result
            int i = 0;

            Console.WriteLine("results - (predict ,real labels)");
            foreach (int pred in predicted)
            {
                Console.Write("({0},{1} )", pred, testOutputs[i]);
                i++;
            }

            //calculate the accuracy
            double error = new ZeroOneLoss(testOutputs).Loss(predicted);

            Console.WriteLine("\n accuracy: {0}", 1 - error);

            // consider the decrease in the dimension of features using PCA
            var pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Center,
                Whiten = true
            };

            pca.NumberOfOutputs = 2;
            MultivariateLinearRegression transform = pca.Learn(trainInputs);

            double[][] outputPCA = pca.Transform(trainInputs);

            // print it on the scatter plot
            ScatterplotBox.Show(outputPCA, trainOutputs).Hold();

            Console.ReadLine();
        }
コード例 #17
0
        internal void PCCompute(List <SpikeEvent> spikes)
        {
            // Matrix dimensions
            int numObs     = spikes.Count;
            int wavelength = spikes[0].Waveform.Length;

            // Create waveform matrix
            double[,] waveforms = new double[numObs, wavelength];

            for (int i = 0; i < numObs; ++i)
            {
                for (int j = 0; j < wavelength; ++j)
                {
                    waveforms[i, j] = spikes[i].Waveform[j];
                }
            }


            // Make PCA object
            pca = new PrincipalComponentAnalysis(waveforms, AnalysisMethod.Standardize);

            // PC Decomp.
            pca.Compute();

            // Project
            currentProjection = new double[numObs][];
            double[,] tmp     = pca.Transform(waveforms);
            for (int i = 0; i < tmp.GetLength(0); ++i)
            {
                currentProjection[i] = new double[projectionDimension];
                for (int j = 0; j < projectionDimension; ++j)
                {
                    currentProjection[i][j] = tmp[i, j];
                }
            }

            //// Create projection matrix
            //double maxPC = double.MinValue;

            //currentProjection = new double[numObs][];
            //for (int i = 0; i < numObs; ++i)
            //{
            //    currentProjection[i] = new double[projectionDimension];

            //    for (int j = 0; j < projectionDimension; ++j)
            //    {
            //        currentProjection[i][j] = pca.ComponentMatrix[i, j];
            //        if (currentProjection[i][j] > maxPC)
            //        {
            //            maxPC = currentProjection[i][j];
            //        }
            //    }
            //}

            //// Normalize projection
            //for (int i = 0; i < numObs; ++i)
            //{
            //    for (int j = 0; j < projectionDimension; ++j)
            //    {
            //        currentProjection[i][j] = 10000 * (currentProjection[i][j] / maxPC);
            //    }
            //}
        }
コード例 #18
0
 //Deserialization constructor
 public FeatureExtractionPCA(SerializationInfo info, StreamingContext context)
     : base(info, context)
 {
     pca = (PrincipalComponentAnalysis)info.GetValue("pca", typeof(PrincipalComponentAnalysis));
     numDimensions = (int?)info.GetValue("numDimensions", typeof(int?));
 }
コード例 #19
0
        public void ExceptionTest()
        {
            double[,] data = 
            {
                { 1, 2 },
                { 5, 2 },
                { 2, 2 },
                { 4, 2 },
            };


            var pca = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize);

            bool thrown = false;

            try { pca.Compute(); }
            catch (ArithmeticException ex)
            {
                ex.ToString();
                thrown = true;
            }

            // Default behavior changed: now an exception is not thrown anymore.
            // Instead, a small constant is added when computing standard deviations.
            Assert.IsFalse(thrown);

            var str1 = pca.SingularValues.ToCSharp();
            var str2 = pca.ComponentVectors.ToCSharp();

            Assert.IsTrue(pca.SingularValues.IsEqual(new double[] { 1.73205080756888, 0 }, 1e-7));
            Assert.IsTrue(pca.ComponentVectors.IsEqual(new double[][] {
                new double[] { 1, 0 },
                new double[] { 0, -1 }
            }, 1e-7));
        }
コード例 #20
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Credit Card Fraud dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.10\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "creditcard.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var df = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}\n\n", df.RowCount, df.ColumnCount);

            string[] featureCols = df.ColumnKeys.Where(
                x => !x.Equals("Time") && !x.Equals("Class")
                ).ToArray();

            var noFraudData = df.Rows[
                df["Class"].Where(x => x.Value == 0.0).Keys
                              ].Columns[featureCols];

            double[][] data = BuildJaggedArray(
                noFraudData.ToArray2D <double>(), noFraudData.RowCount, featureCols.Length
                );

            double[][] wholeData = BuildJaggedArray(
                df.Columns[featureCols].ToArray2D <double>(), df.RowCount, featureCols.Length
                );
            int[] labels = df.GetColumn <int>("Class").ValuesAll.ToArray();

            var pca = new PrincipalComponentAnalysis(
                PrincipalComponentMethod.Standardize
                );

            pca.Learn(data);

            double[][] transformed      = pca.Transform(wholeData);
            double[][] first2Components = transformed.Select(x => x.Where((y, i) => i < 2).ToArray()).ToArray();
            ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels);
            double[][] next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 1 && i <= 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 2 && i <= 3).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 3 && i <= 4).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels);

            DataSeriesBox.Show(
                pca.Components.Select((x, i) => (double)i),
                pca.Components.Select(x => x.CumulativeProportion)
                ).SetTitle("Explained Variance");
            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "explained-variance.csv"),
                pca.Components.Select((x, i) => String.Format("{0},{1:0.0000}", i + 1, x.CumulativeProportion))
                );

            Console.WriteLine("exporting train set...");

            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "pca-features.csv"),
                transformed.Select((x, i) => String.Format("{0},{1}", String.Join(",", x), labels[i]))
                );


            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }
コード例 #21
0
        public void FromCorrelationConstructorTest()
        {
            double[] mean = Accord.Statistics.Tools.Mean(data);
            double[] stdDev = Accord.Statistics.Tools.StandardDeviation(data);
            double[,] cov = Accord.Statistics.Tools.Correlation(data);

            var actual = PrincipalComponentAnalysis.FromCorrelationMatrix(mean, stdDev, cov);
            var expected = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize);

            // Compute
            actual.Compute();
            expected.Compute();

            // Transform
            double[,] actualTransform = actual.Transform(data);
            double[,] expectedTransform = expected.Transform(data);


            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actualTransform, expectedTransform, 0.01));

            // Transform
            double[,] image = actual.Transform(data);
            double[,] reverse = actual.Revert(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(reverse, data, 0.01));
        }
コード例 #22
0
 //initialize item
 void init()
 {
     pca       = new PrincipalComponentAnalysis();
     totalData = Directory.GetDirectories(savedDirectoryName).Length;
     Console.WriteLine($"Total Data : {totalData}");
 }
コード例 #23
0
        public void TransformTest1()
        {
            PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data);

            // Compute
            target.Compute();

            // Transform
            double[][] actual = target.Transform(data.ToArray());

            // first inversed.. ?
            double[][] expected = new double[][]
            {
                new double[] {  0.827970186, -0.175115307 },
                new double[] { -1.77758033,   0.142857227 },
                new double[] {  0.992197494,  0.384374989 },
                new double[] {  0.274210416,  0.130417207 },
                new double[] {  1.67580142,  -0.209498461 },
                new double[] {  0.912949103,  0.175282444 },
                new double[] { -0.099109437, -0.349824698 },
                new double[] { -1.14457216,   0.046417258 },
                new double[] { -0.438046137,  0.017764629 },
                new double[] { -1.22382056,  -0.162675287 },
            };

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actual, expected, 0.01));

        }
コード例 #24
0
        public void LoadSamples(int percentDataTraining)
        {
            var samples = File.ReadAllLines(pathFile)
                          .Select(x => x.Split(' ')
                                  .ToList())
                          .OrderBy(x => Guid.NewGuid())
                          .ToList();

            var position = samples.Count() * percentDataTraining / 100;

            Console.WriteLine($"position: {position}");

            TestSamples = samples.Skip(position).ToList();

            File.WriteAllLines("Datas/test.txt", TestSamples
                               .Select(x => x
                                       .Aggregate((y, z) => y + z))
                               .ToArray());


            Samples = samples.Take(position).ToList();

            File.WriteAllLines("Datas/train.txt", Samples
                               .Select(x => x
                                       .Aggregate((y, z) => y + z))
                               .ToArray());

            Labels = samples.Select(x => x.Last()).ToList();

            Attributes = Enumerable.Range(0, samples.First().Count() - 1).Select(x => x.ToString()).ToList();

            var data = Samples.Select(x => x.Select(y => double.Parse(y)).ToArray()).ToArray();

            var principalComponentAnalysis = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Center,
                Whiten = true
            };

            var transform = principalComponentAnalysis.Learn(data);


            var newdata = principalComponentAnalysis.Transform(data);

            newdata.ToList().ForEach(x =>
            {
                x.ToList().ForEach(y => Console.Write(y + " "));
                Console.WriteLine();
            });

            principalComponentAnalysis.NumberOfOutputs = 1;

            newdata = principalComponentAnalysis.Transform(data);

            newdata.ToList().ForEach(x =>
            {
                x.ToList().ForEach(y => Console.Write(y + " "));
                Console.WriteLine();
            });

            principalComponentAnalysis.ExplainedVariance = 0.8;

            newdata = principalComponentAnalysis.Transform(data);

            newdata.ToList().ForEach(x =>
            {
                x.ToList().ForEach(y => Console.Write(y + " "));
                Console.WriteLine();
            });

            var q = 1;
        }
コード例 #25
0
ファイル: SimpleCommunicator.cs プロジェクト: afcarl/IESoR
        //			testHold.WaitOne(5000);
        //IEndPointClient logger;

        List <PCAData2D> runPCA(List <NeatGenome> bestGenome, bool firstBehavior = true, int xBins = 0, int yBins = 0)
        {
            var totalStopWatch = System.Diagnostics.Stopwatch.StartNew();

            // Create new stopwatch
            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

            List <long> uIDs = bestGenome.Select(x => x.GenomeId).ToList();
            //make sure we have the right fitness!
            //TODO: Check multi-objective code to see what value has absolute fitness
            List <double> absoluteFitness = bestGenome.Select(x => x.RealFitness).ToList();

            if (bestGenome.Count == 0)
            {
                return(null);
            }

            //we know topBody > 0 by above check
            int componentCount = Math.Min(80, (firstBehavior ? bestGenome[0].Behavior.behaviorList.Count : bestGenome[0].SecondBehavior.behaviorList.Count));

            //double componentCount = (double)fn.Json.Args[1];

            //create our double array that's going to be condensed
            double[,] collectedData = new double[bestGenome.Count, componentCount];

            int xyIndex = 0;

            foreach (IGenome genome in bestGenome)
            {
                //need to grab the behavior objects from the genome, and enter them as data
                var behaviorList = (firstBehavior ? genome.Behavior.behaviorList : genome.SecondBehavior.behaviorList);
                for (var ix = 0; ix < componentCount; ix++)
                {
                    collectedData[xyIndex, ix] = (double)behaviorList[ix];
                }

                xyIndex++;
            }

            try
            {
                stopwatch.Stop();
                Console.WriteLine("Time before kernel: " + stopwatch.ElapsedMilliseconds);
                stopwatch = System.Diagnostics.Stopwatch.StartNew();

                //higher gaussian seemed better at spreading out behavior
                //might try polynomial of 3rd or 4th degree, constant = 0 by default

                //        IKernel kernel = new Polynomial(3, 0);//new Gaussian(1.9);//new Polynomial((int)numDegree.Value, (double)numConstant.Value);

                //        KernelPrincipalComponentAnalysis kpca = new KernelPrincipalComponentAnalysis(collectedData, kernel,
                //(PrincipalComponentAnalysis.AnalysisMethod.Correlation));

                PrincipalComponentAnalysis kpca = new PrincipalComponentAnalysis(collectedData,
                                                                                 (PrincipalComponentAnalysis.AnalysisMethod.Correlation));
                try
                {
                    kpca.Compute();
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                    return(null);
                }

                stopwatch.Stop();
                Console.WriteLine("Time During PCA: " + stopwatch.ElapsedMilliseconds);
                stopwatch = System.Diagnostics.Stopwatch.StartNew();

                double[,] transform = kpca.Transform(collectedData, 2);

                stopwatch.Stop();
                Console.WriteLine("Time During Transform: " + stopwatch.ElapsedMilliseconds);
                stopwatch = System.Diagnostics.Stopwatch.StartNew();

                List <PCAData2D> uidAndPoints = binAllPoints(transform, uIDs, absoluteFitness, xBins, yBins);

                stopwatch.Stop();
                Console.WriteLine("Time During Binning: " + stopwatch.ElapsedMilliseconds);


                //List<PCAData2D> uidAndPoints = new List<PCAData2D>();
                //for (int ix = 0; ix < bestGenome.Count; ix++)
                //{
                //    uidAndPoints.Add(new PCAData2D() { uid = uIDs[ix], x = mappedResults[ix, 0], y = mappedResults[ix, 1] });
                //}

                totalStopWatch.Stop();
                Console.WriteLine("Total Time For PCA: " + totalStopWatch.ElapsedMilliseconds);

                return(uidAndPoints);
            }
            catch (Exception e)
            {
                totalStopWatch.Stop();
                Console.WriteLine("Total Time For (failed) PCA: " + totalStopWatch.ElapsedMilliseconds);

                Console.WriteLine("Failed to run PCA");
                return(null);
            }
        }
コード例 #26
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Cyber Attack dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.9\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "data.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var rawDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // Encode Categorical Variables
            string[] categoricalVars =
            {
                "protocol_type", "service", "flag", "land"
            };
            // Encode Target Variables
            IDictionary <string, int> targetVarEncoding = new Dictionary <string, int>
            {
                { "normal", 0 },
                { "dos", 1 },
                { "probe", 2 },
                { "r2l", 3 },
                { "u2r", 4 }
            };

            var featuresDF = Frame.CreateEmpty <int, string>();

            foreach (string col in rawDF.ColumnKeys)
            {
                if (col.Equals("attack_type"))
                {
                    continue;
                }
                else if (col.Equals("attack_category"))
                {
                    featuresDF.AddColumn(
                        col,
                        rawDF.GetColumn <string>(col).Select(x => targetVarEncoding[x.Value])
                        );
                }
                else if (categoricalVars.Contains(col))
                {
                    var categoryDF = EncodeOneHot(rawDF.GetColumn <string>(col), col);

                    foreach (string newCol in categoryDF.ColumnKeys)
                    {
                        featuresDF.AddColumn(newCol, categoryDF.GetColumn <int>(newCol));
                    }
                }
                else
                {
                    featuresDF.AddColumn(
                        col,
                        rawDF[col].Select((x, i) => double.IsNaN(x.Value) ? 0.0 : x.Value)
                        );
                }
            }
            Console.WriteLine("* Shape: {0}, {1}\n\n", featuresDF.RowCount, featuresDF.ColumnCount);
            Console.WriteLine("* Exporting feature set...");
            featuresDF.SaveCsv(Path.Combine(dataDirPath, "features.csv"));

            // Build PCA with only normal data
            var rnd = new Random();

            int[] normalIdx = featuresDF["attack_category"]
                              .Where(x => x.Value == 0)
                              .Keys
                              .OrderBy(x => rnd.Next())
                              .Take(90000).ToArray();
            int[] attackIdx = featuresDF["attack_category"]
                              .Where(x => x.Value > 0)
                              .Keys
                              .OrderBy(x => rnd.Next())
                              .Take(10000).ToArray();
            int[] totalIdx = normalIdx.Concat(attackIdx).ToArray();

            var normalSet = featuresDF.Rows[normalIdx];

            string[] nonZeroValueCols = normalSet.ColumnKeys.Where(
                x => !x.Equals("attack_category") && normalSet[x].Max() != normalSet[x].Min()
                ).ToArray();

            double[][] normalData = BuildJaggedArray(
                normalSet.Columns[nonZeroValueCols].ToArray2D <double>(),
                normalSet.RowCount,
                nonZeroValueCols.Length
                );
            double[][] wholeData = BuildJaggedArray(
                featuresDF.Rows[totalIdx].Columns[nonZeroValueCols].ToArray2D <double>(),
                totalIdx.Length,
                nonZeroValueCols.Length
                );
            int[] labels = featuresDF
                           .Rows[totalIdx]
                           .GetColumn <int>("attack_category")
                           .ValuesAll.ToArray();

            var pca = new PrincipalComponentAnalysis(
                PrincipalComponentMethod.Standardize
                );

            pca.Learn(normalData);

            double[][] transformed      = pca.Transform(wholeData);
            double[][] first2Components = transformed.Select(
                x => x.Where((y, i) => i < 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels);
            double[][] next2Components = transformed.Select(
                x => x.Where((y, i) => i < 3 && i >= 1).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 4 && i >= 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 5 && i >= 3).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 6 && i >= 4).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #5 vs. Component #6", next2Components, labels);

            double[] explainedVariance = pca.Components
                                         .Select(x => x.CumulativeProportion)
                                         .Where(x => x < 1)
                                         .ToArray();

            DataSeriesBox.Show(
                explainedVariance.Select((x, i) => (double)i),
                explainedVariance
                ).SetTitle("Explained Variance");
            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "explained-variance.csv"),
                explainedVariance.Select((x, i) => String.Format("{0},{1:0.0000}", i, x))
                );

            Console.WriteLine("* Exporting pca-transformed feature set...");
            System.IO.File.WriteAllLines(
                Path.Combine(
                    dataDirPath,
                    "pca-transformed-features.csv"
                    ),
                transformed.Select(x => String.Join(",", x))
                );
            System.IO.File.WriteAllLines(
                Path.Combine(
                    dataDirPath,
                    "pca-transformed-labels.csv"
                    ),
                labels.Select(x => x.ToString())
                );


            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }
コード例 #27
0
ファイル: Factorize.cs プロジェクト: riffsircar/LegendOfBayes
        public static Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> > Factorize(List <Room> rooms, int dimensionReduction)
        {
            Dictionary <string, double[, ]> matrices = new Dictionary <string, double[, ]>();
            int column = 0;
            int size   = 0;

            foreach (var room in rooms)
            {
                foreach (var layer in room.objects)
                {
                    if (!matrices.ContainsKey(layer.Key))
                    {
                        size = layer.Value.GetLength(0) * layer.Value.GetLength(1);
                        matrices[layer.Key] = new double[rooms.Count, layer.Value.GetLength(0) * layer.Value.GetLength(1)];
                    }
                    matrices[layer.Key].FillColumn(column, layer.Value);
                }
                column++;
            }
            Dictionary <string, double[, ]> Ws         = new Dictionary <string, double[, ]>();
            Dictionary <string, double[, ]> Hs         = new Dictionary <string, double[, ]>();
            Dictionary <string, double[, ]> components = new Dictionary <string, double[, ]>();

            foreach (var mat in matrices)
            {
                PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(mat.Value);
                pca.Compute();
                for (int ii = 0; ii < dimensionReduction; ii++)
                {
                    pca.ComponentMatrix.rowToMatrix(ii, 12, 10).matToBitmap(0, 0).Save("pca" + mat.Key + ii + ".png");
                }
                components[mat.Key] = pca.ComponentMatrix;
                for (int jj = 0; jj < rooms.Count; jj++)
                {
                    rooms[jj].setCoefficients(mat.Key, pca.Result, jj, dimensionReduction);
                }

                /*
                 * NMF nmf = new NMF(mat.Value, dimensionReduction, 2000);
                 * Ws[mat.Key] = nmf.LeftNonnegativeFactors;
                 * Hs[mat.Key] = nmf.RightNonnegativeFactors;
                 * for (int ii = 0; ii < rooms.Count; ii++) {
                 *  rooms[ii].setCoefficients(mat.Key,nmf.RightNonnegativeFactors, ii);
                 * }
                 * string str = "";
                 * for (int xx = 0; xx < nmf.RightNonnegativeFactors.GetLength(1); xx++) {
                 *  for (int jj = 0; jj < nmf.RightNonnegativeFactors.GetLength(0); jj++) {
                 *      str += nmf.RightNonnegativeFactors[jj, xx] + ",";
                 *  }
                 *  str += "\n";
                 * }
                 * System.IO.File.WriteAllText(mat.Key + "W.txt", str);
                 * str = "";
                 * for (int xx = 0; xx < nmf.LeftNonnegativeFactors.GetLength(1); xx++) {
                 *  for (int jj = 0; jj < nmf.LeftNonnegativeFactors.GetLength(0); jj++) {
                 *      str += nmf.LeftNonnegativeFactors[jj, xx] + ",";
                 *  }
                 *  str += "\n";
                 * }
                 * System.IO.File.WriteAllText(mat.Key + "H.txt", str);
                 * for (int ii = 0; ii < nmf.LeftNonnegativeFactors.GetLength(1); ii++) {
                 *  double[,] W = nmf.LeftNonnegativeFactors.rowToMatrix(ii, 12, 10);
                 *  W.matToBitmap(0, 25).Save(mat.Key + ii + "W.png");
                 * }
                 * if (mat.Key == "blocks") {
                 *  double[,] reconstructed = new double[12, 10];
                 *  for (int ii = 0; ii < rooms[0].coefficients["blocks"].Length; ii++) {
                 *      double w = rooms[0].coefficients["blocks"][ii];
                 *      int counter = 0;
                 *      for (int xx = 0; xx < 12; xx++) {
                 *          for (int yy = 0; yy < 10; yy++) {
                 *              reconstructed[xx, yy] += w * Ws["blocks"][counter, ii];
                 *              counter++;
                 *          }
                 *      }
                 *  }
                 *  reconstructed.matToBitmap(0, 1).Save("room0Reconstructed.png");
                 * }
                 * */
            }
            int counter = 0;

            //   double[,] xy = new double[rooms.Count,2];
            double[][] clusterData = new double[rooms.Count][];
            foreach (var room in rooms)
            {
                int      compCounter = 0;
                double[] coeffs      = new double[room.coefficients.Count * dimensionReduction];
                foreach (var comp in room.coefficients)
                {
                    foreach (var coef in comp.Value)
                    {
                        coeffs[compCounter] = coef;
                        compCounter++;
                    }
                }
                clusterData[counter] = coeffs;
                Room reconstructed = room.reconstruct(components, 1);
                reconstructed.toBitmap().Save("room" + counter + "Reconstructed.png");
                counter++;
            }
            int    numberofClusters = 25;
            KMeans kmeans           = new KMeans(numberofClusters);

            kmeans.Tolerance = 0.5;
            int[] clusters = kmeans.Compute(clusterData);
            Dictionary <string, SortedSet <int> > clusteredRooms        = new Dictionary <string, SortedSet <int> >();
            Dictionary <int, SortedSet <string> > roomClusters          = new Dictionary <int, SortedSet <string> >();
            Dictionary <string, Dictionary <int, List <Room> > > output = new Dictionary <string, Dictionary <int, List <Room> > >();

            int[] clusterCounts = new int[numberofClusters];
            for (int ii = 0; ii < rooms.Count; ii++)
            {
                rooms[ii].setType();
                if (!clusteredRooms.ContainsKey(rooms[ii].roomType))
                {
                    output[rooms[ii].roomType]         = new Dictionary <int, List <Room> >();
                    clusteredRooms[rooms[ii].roomType] = new SortedSet <int>();
                }
                if (!output[rooms[ii].roomType].ContainsKey(clusters[ii]))
                {
                    output[rooms[ii].roomType][clusters[ii]] = new List <Room>();
                }
                if (!roomClusters.ContainsKey(clusters[ii]))
                {
                    roomClusters[clusters[ii]] = new SortedSet <string>();
                }
                output[rooms[ii].roomType][clusters[ii]].Add(rooms[ii]);
                roomClusters[clusters[ii]].Add(rooms[ii].roomType);
                clusterCounts[clusters[ii]]++;
                clusteredRooms[rooms[ii].roomType].Add(clusters[ii]);
                //   Console.WriteLine(ii + " " + clusters[ii]);
            }
            for (int ii = 0; ii < clusterCounts.Length; ii++)
            {
                string str = "";
                foreach (var roomtype in roomClusters[ii])
                {
                    str += roomtype + " ";
                }
                //   Console.WriteLine("Cluster "+ ii + " = " +clusterCounts[ii] + " : " + str);
            }
            foreach (var roomType in clusteredRooms)
            {
                string str = "";
                foreach (var cluster in roomType.Value)
                {
                    str += cluster + " ";
                }
                //   Console.WriteLine(roomType.Key + " " + str);
            }
            return(new Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> >(output, components));
        }
コード例 #28
0
        public void PrincipalComponentAnalysis()
        {
            int D = 3;
            int N = 10;

            // construct a sample
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample(D);

            for (int i = 0; i < N; i++)
            {
                double x = 1.0 * rng.NextDouble() - 1.0;
                double y = 4.0 * rng.NextDouble() - 2.0;
                double z = 9.0 * rng.NextDouble() - 3.0;
                sample.Add(x, y, z);
            }

            // get its column means
            RowVector mu = new RowVector(D);

            for (int i = 0; i < D; i++)
            {
                mu[i] = sample.Column(i).Mean;
            }

            // get total variance
            double tVariance = GetTotalVariance(sample);

            Console.WriteLine(tVariance);

            // do a principal component analysis
            PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis();

            Assert.IsTrue(pca.Dimension == sample.Dimension);
            Assert.IsTrue(pca.Count == sample.Count);

            // check that the PCs behave as expected
            for (int i = 0; i < pca.Dimension; i++)
            {
                PrincipalComponent pc = pca.Component(i);
                Assert.IsTrue(pc.Index == i);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector()));
                Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0));
                if (i == 0)
                {
                    Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction);
                }
                else
                {
                    PrincipalComponent ppc = pca.Component(i - 1);
                    Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction);
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction));
                }
            }

            // express the sample in terms of principal components
            MultivariateSample csample = pca.TransformedSample();

            // check that the explained variances are as claimed
            for (int rD = 1; rD <= D; rD++)
            {
                MultivariateSample rSample = new MultivariateSample(D);
                foreach (double[] cEntry in csample)
                {
                    RowVector x = mu.Copy();
                    for (int i = 0; i < rD; i++)
                    {
                        PrincipalComponent pc = pca.Component(i);
                        x += (cEntry[i] * pc.Weight) * pc.NormalizedVector();
                    }
                    rSample.Add(x);
                }
                double rVariance = GetTotalVariance(rSample);
                Console.WriteLine("{0} {1}", rD, rVariance);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD - 1).CumulativeVarianceFraction));
            }
        }
コード例 #29
0
ファイル: Program.cs プロジェクト: erisonliang/HdbscanSharp
        static void Main(string[] args)
        {
            // Specify which files to use.
            var projectDir = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;
            var pathFiles  = Directory.EnumerateFiles(projectDir + @"\DocumentClusteringExample\Samples").ToList();

            // Hyper parameters.

            // This option prevent overfitting on missing words.
            var replaceMissingValueWithRandomValue = false;

            var usePCA            = false;
            var numberOfOutputPCA = 100;
            var distanceFunction  = new PearsonCorrelation();

            var strategy              = ValueStrategy.Freq;
            var minVectorElements     = 2;
            var freqMin               = 2;
            var minWordCount          = 1;
            var maxWordCount          = 3;
            var minGroupOfWordsLength = 3;
            var minWordLength         = 1;
            var firstWordMinLength    = 1;
            var lastWordMinLength     = 1;
            var maxComposition        = int.MaxValue;
            var badWords              = File.ReadLines(projectDir + @"\DocumentClusteringExample\stop-words-english.txt")
                                        .Where(m => !string.IsNullOrWhiteSpace(m))
                                        .ToArray();
            var badPatternList = new string[]
            {
            };

            // Files -> List of expressions (Our dictionary based on files)
            var expressions = ExtractExpressionFromTextFiles.ExtractExpressions(
                pathFiles,
                new ExtractExpressionFromTextFilesOption
            {
                BadPatternList           = badPatternList,
                BadWords                 = badWords,
                FirstWordMinLength       = firstWordMinLength,
                LastWordMinLength        = lastWordMinLength,
                MaxExpressionComposition = maxComposition,
                MaxWordCount             = maxWordCount,
                MinGroupOfWordsLength    = minGroupOfWordsLength,
                MinWordCount             = minWordCount,
                MinWordFrequency         = freqMin,
                MinWordLength            = minWordLength
            });

            Console.WriteLine("Expressions: " + expressions.Count);

            // Files -­> Vectors
            var expressionVectorOption = new TextFileToExpressionVectorOption
            {
                MinVectorElements = minVectorElements,
                BadPatternList    = badPatternList,
                MaxWordCount      = maxWordCount,
                MinWordCount      = minWordCount,
                Strategy          = strategy,
                ReplaceMissingValueWithRandomValue = replaceMissingValueWithRandomValue
            };
            List <Tuple <string, double[]> > filesToVector = new List <Tuple <string, double[]> >();

            foreach (var pathFile in pathFiles)
            {
                filesToVector.Add(
                    new Tuple <string, double[]>(
                        pathFile,
                        TextFileToExpressionVector.GenerateExpressionVector(
                            expressions,
                            pathFile,
                            expressionVectorOption)
                        )
                    );
            }
            var vectors = filesToVector
                          .Select(m => m.Item2)
                          .ToList();

            Console.WriteLine("vectors count: " + vectors.Count);

            // Remove non-representative vectors
            for (int i = 0; i < vectors.Count; i++)
            {
                var vector = vectors[i];
                if (vector.Sum() < minVectorElements)
                {
                    vectors.RemoveAt(i);
                    pathFiles.RemoveAt(i);
                    i--;
                }
            }
            Console.WriteLine("vectors count (after removing non-representative vectors): " + vectors.Count);

            // Reduce the vector size with PCA.
            if (usePCA)
            {
                Console.WriteLine("Reducing vector size with PCA");
                Stopwatch stopwatch = new Stopwatch();
                stopwatch.Start();
                PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis();
                pca.NumberOfOutputs = numberOfOutputPCA;
                var trainingVector = vectors.ToArray();
                Shuffle(trainingVector);
                trainingVector = trainingVector.Take(600).ToArray();
                var pcaResult             = pca.Learn(trainingVector);
                var reducedVectorsWithPCA = pcaResult.Transform(vectors.ToArray());
                stopwatch.Stop();
                Console.WriteLine("PCA duration: " + stopwatch.Elapsed.ToString());

                vectors = reducedVectorsWithPCA.ToList();
            }


            // Run HDBSCAN algo.
            Console.WriteLine("HDBSCAN starting...");

            var contraintsList = new List <HdbscanConstraint>();

            if (usePCA)
            {
                for (int i = 1; i < numberOfOutputPCA; i++)
                {
                    contraintsList.Add(new HdbscanConstraint(i - 1, i, HdbscanConstraintType.CannotLink));
                }
            }

            var watch  = Stopwatch.StartNew();
            var result = HdbscanRunner.Run(new HdbscanParameters
            {
                DataSet           = vectors.ToArray(),
                MinPoints         = 5,
                MinClusterSize    = 5,
                DistanceFunction  = distanceFunction,
                Constraints       = contraintsList,
                UseMultipleThread = true
            });

            watch.Stop();
            Console.WriteLine("HDBSCAN done " + watch.Elapsed);

            // Read results.
            var labels = result.Labels;
            int n      = labels.Max();

            Console.WriteLine("\n\n");

            int clusterId = 0;

            for (int iCluster = 1; iCluster <= n; iCluster++)
            {
                Dictionary <string, int> categories = new Dictionary <string, int>();
                bool anyFound = false;
                for (int i = 0; i < labels.Length; i++)
                {
                    if (labels[i] == iCluster)
                    {
                        var fileName = Path.GetFileNameWithoutExtension(pathFiles[i]);
                        var category = fileName.Split('-')[0].Trim();

                        if (categories.ContainsKey(category))
                        {
                            var count = categories[category];
                            categories.Remove(category);
                            categories.Add(category, count + 1);
                        }
                        else
                        {
                            categories.Add(category, 1);
                        }

                        anyFound = true;
                    }
                }
                if (anyFound)
                {
                    clusterId++;
                    Console.WriteLine("Cluster #" + clusterId);

                    Console.WriteLine();
                    foreach (var category in categories)
                    {
                        Console.WriteLine(category.Key + ": " + category.Value);
                    }
                    Console.ReadLine();
                }
            }

            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
 public void buildModel(string modelPath)
 {
     outmodelpath = modelPath;
     using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath))
     {
         dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine());
         if (mType != dataPrepBase.modelTypes.PCA)
         {
             egVec = new double[1, 1];
             System.Windows.Forms.MessageBox.Show("Not a PCA Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error);
             return;
         }
         inpath = sr.ReadLine();
         VariableFieldNames = sr.ReadLine().Split(new char[] { ',' });
         corr = new double[VariableFieldNames.Length, VariableFieldNames.Length];
         egVec = new double[VariableFieldNames.Length, VariableFieldNames.Length];
         n = System.Convert.ToInt32(sr.ReadLine());
         meanVector = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray();
         stdVector = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray();
         string[] corrLg = sr.ReadLine().Split(new char[] { ',' });
         prop = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray();
         egVal = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray();
         string[] egVecLg = sr.ReadLine().Split(new char[] { ',' });
         for (int i = 0; i < VariableFieldNames.Length; i++)
         {
             for (int j = 0; j < VariableFieldNames.Length; j++)
             {
                 int indexVl = (i * VariableFieldNames.Length) + j;
                 corr[i, j] = System.Convert.ToDouble(corrLg[indexVl]);
                 egVec[i, j] = System.Convert.ToDouble(egVecLg[indexVl]);
             }
         }
         sr.Close();
     }
     pca = PrincipalComponentAnalysis.FromCorrelationMatrix(meanVector, stdVector, corr);
     pca.Compute();
 }
コード例 #31
0
        public void ConstructorTest()
        {
            // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
            // using the framework's default method. The tutorial can be found online
            // at http://www.sccg.sk/~haladova/principal_components.pdf

            // Step 1. Get some data
            // ---------------------

            double[,] data =
            {
                { 2.5, 2.4 },
                { 0.5, 0.7 },
                { 2.2, 2.9 },
                { 1.9, 2.2 },
                { 3.1, 3.0 },
                { 2.3, 2.7 },
                { 2.0, 1.6 },
                { 1.0, 1.1 },
                { 1.5, 1.6 },
                { 1.1, 0.9 }
            };


            // Step 2. Subtract the mean
            // -------------------------
            //   Note: The framework does this automatically. By default, the framework
            //   uses the "Center" method, which only subtracts the mean. However, it is
            //   also possible to remove the mean *and* divide by the standard deviation
            //   (thus performing the correlation method) by specifying "Standardize"
            //   instead of "Center" as the AnalysisMethod.

            AnalysisMethod method = AnalysisMethod.Center; // AnalysisMethod.Standardize


            // Step 3. Compute the covariance matrix
            // -------------------------------------
            //   Note: Accord.NET does not need to compute the covariance
            //   matrix in order to compute PCA. The framework uses the SVD
            //   method which is more numerically stable, but may require
            //   more processing or memory. In order to replicate the tutorial
            //   using covariance matrices, please see the next unit test.

            // Create the analysis using the selected method
            var pca = new PrincipalComponentAnalysis(data, method);

            // Compute it
            pca.Compute();


            // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
            // -------------------------------------------------------------------------
            //   Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition
            //   method, the Eigenvalues are computed from the singular values. However, it is
            //   not the Eigenvalues themselves which are important, but rather their proportion:

            // Those are the expected eigenvalues, in descending order:
            double[] eigenvalues = { 1.28402771, 0.0490833989 };

            // And this will be their proportion:
            double[] proportion = eigenvalues.Divide(eigenvalues.Sum());

            // Those are the expected eigenvectors,
            // in descending order of eigenvalues:
            double[,] eigenvectors =
            {
                { -0.677873399, -0.735178656 },
                { -0.735178656,  0.677873399 }
            };

            // Now, here is the place most users get confused. The fact is that
            // the Eigenvalue decomposition (EVD) is not unique, and both the SVD
            // and EVD routines used by the framework produces results which are
            // numerically different from packages such as STATA or MATLAB, but
            // those are correct.

            // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
            // a being a scalar) will also be an eigenvector. In the Lindsay case, the
            // framework produces a first eigenvector with inverted signs. This is the same
            // as considering a=-1 and taking a*v. The result is still correct.

            // Retrieve the first expected eigenvector
            double[] v = eigenvectors.GetColumn(0);

            // Multiply by a scalar and store it back
            eigenvectors.SetColumn(0, v.Multiply(-1));

            // Everything is alright (up to the 9 decimal places shown in the tutorial)
            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-5));

            // Step 5. Deriving the new data set
            // ---------------------------------

            double[,] actual = pca.Transform(data);

            // transformedData shown in pg. 18
            double[,] expected = new double[, ]
            {
                { 0.827970186, -0.175115307 },
                { -1.77758033, 0.142857227 },
                { 0.992197494, 0.384374989 },
                { 0.274210416, 0.130417207 },
                { 1.67580142, -0.209498461 },
                { 0.912949103, 0.175282444 },
                { -0.099109437, -0.349824698 },
                { -1.14457216, 0.046417258 },
                { -0.438046137, 0.017764629 },
                { -1.22382056, -0.162675287 },
            };

            // Everything is correct (up to 8 decimal places)
            Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8));
        }
 private void buildModel()
 {
     if (varCov == null) getCov();
     pca = PrincipalComponentAnalysis.FromCorrelationMatrix(MeanVector, StdVector, CorralationMatrix);
     pca.Compute();
     egVec = pca.ComponentMatrix;
     prop = pca.ComponentProportions;
     egVal = pca.Eigenvalues;
     //Console.WriteLine("PCA method = " + pca.Method.ToString());
 }
コード例 #33
0
        public void transform_more_columns_than_samples()
        {
            // Lindsay's tutorial data
            double[,] datat = data.Transpose();

            var target = new PrincipalComponentAnalysis(datat);

            // Compute
            target.Compute();

            // Transform
            double[,] actual = target.Transform(datat);

            // Assert the scores equals the transformation of the input

            double[,] result = target.Result;
            Assert.IsTrue(Matrix.IsEqual(result, actual, 0.01));
            Assert.AreEqual(2, result.Rows());
            Assert.AreEqual(2, result.Columns());
            Assert.IsTrue(result.IsSquare());
        }
コード例 #34
0
        public void learn_weights()
        {
            double[][] raw =
            {
                new[] {   2.5, 2.4,    1 },
                new[] {   0.5, 0.7,    1 },
                new[] {   2.2, 2.9,  0.5 },
                new[] {   2.2, 2.9,  0.5 },
                new[] {   1.9, 2.2,    1 },
                new[] {   3.1, 3.0,    1 },
                new[] {   2.3, 2.7,    1 },
                new[] {   2.0, 1.6,    1 },
                new[] {   1.0, 1.1, 0.25 },
                new[] {   1.0, 1.1, 0.25 },
                new[] {   1.0, 1.1, 0.25 },
                new[] {   1.0, 1.1, 0.25 },
                new[] {   1.5, 1.6,    1 },
                new[] {  42.5, 7.6,    0 },
                new[] { 743.5, 5.6,    0 },
                new[] {   1.5,  16,    0 },
                new[] {   1.1, 0.9,    1 }
            };

            double[][] data    = raw.GetColumns(0, 1);
            double[]   weights = raw.GetColumn(2);

            var method = PrincipalComponentMethod.Center;
            var pca    = new PrincipalComponentAnalysis(method);

            pca.Learn(data, weights);

            double[] eigenvalues = { 1.28402771, 0.0490833989 };

            double[] proportion = eigenvalues.Divide(eigenvalues.Sum());

            double[,] eigenvectors =
            {
                { -0.677873399, -0.735178656 },
                { -0.735178656,  0.677873399 }
            };

            double[] v = eigenvectors.GetColumn(0);
            eigenvectors.SetColumn(0, v.Multiply(-1));

            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 0.1));

            double[][] actual = pca.Transform(data);
            string     a      = actual.ToCSharp();

            /*
             * double[,] expected = new double[,]
             * {
             *  {  0.827970186, -0.175115307 },
             *  { -1.77758033,   0.142857227 },
             *  {  0.992197494,  0.384374989 },
             *  {  0.274210416,  0.130417207 },
             *  {  1.67580142,  -0.209498461 },
             *  {  0.912949103,  0.175282444 },
             *  { -0.099109437, -0.349824698 },
             *  { -1.14457216,   0.046417258 },
             *  { -0.438046137,  0.017764629 },
             *  { -1.22382056,  -0.162675287 },
             * };
             */

            double[][] expected =
            {
                new double[] {   0.827970186201088, -0.175115307046916 },
                new double[] {   -1.77758032528043,  0.142857226544281 },
                new double[] {   0.992197494414889,  0.384374988880413 }, // weight is 0.5
                new double[] {   0.992197494414889,  0.384374988880413 }, // weight is 0.5
                new double[] {     0.2742104159754,  0.130417206574127 },
                new double[] {    1.67580141864454, -0.209498461256753 },
                new double[] {   0.912949103158809,   0.17528244362037 },
                new double[] { -0.0991094374984439, -0.349824698097121 },
                new double[] {   -1.14457216379866, 0.0464172581832816 }, // weight is 0.25
                new double[] {   -1.14457216379866, 0.0464172581832816 }, // weight is 0.25
                new double[] {   -1.14457216379866, 0.0464172581832816 }, // weight is 0.25
                new double[] {   -1.14457216379866, 0.0464172581832816 }, // weight is 0.25
                new double[] {   -0.43804613676245, 0.0177646296750834 },
                new double[] {    31.7658351361525,  -26.0573198564776 }, // weight is 0
                new double[] {      505.4847301932,  -542.773304190164 }, // weight is 0
                new double[] {     10.148526503077,   9.77914156847845 }, // weight is 0
                new double[] {   -1.22382055505474, -0.162675287076762 }
            };

            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
        }
コード例 #35
0
        public void correlation_new_interface()
        {
            double[] mean = Measures.Mean(data, dimension: 0);
            double[] stdDev = Measures.StandardDeviation(data);
            double[][] cov = Measures.Correlation(data.ToJagged());

            var actual = PrincipalComponentAnalysis.FromCorrelationMatrix(mean, stdDev, cov.ToMatrix());
            var expected = new PrincipalComponentAnalysis(PrincipalComponentMethod.CorrelationMatrix)
            {
                Means = mean,
                StandardDeviations = stdDev
            };

            // Compute
            actual.Compute();
            var transform = expected.Learn(cov);

            // Transform
            double[,] actualTransform = actual.Transform(data);
            double[,] expectedTransform = expected.Transform(data);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actualTransform, expectedTransform, 0.01));

            // Transform
            double[,] image = actual.Transform(data);
            double[,] reverse = actual.Revert(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(reverse, data, 1e-6));

            // Transform
            double[][] image2 = transform.Transform(data.ToJagged());
            double[][] reverse2 = transform.Inverse().Transform(image2);
            Assert.IsTrue(Matrix.IsEqual(reverse, reverse2, 1e-6));
            Assert.IsTrue(Matrix.IsEqual(reverse2, data, 1e-6));

            // Transform
            double[][] reverse3 = actual.Revert(image2);
            Assert.IsTrue(Matrix.IsEqual(reverse, reverse3, 1e-6));
            Assert.IsTrue(Matrix.IsEqual(reverse3, data, 1e-6));

            var a = transform.Transform(data.ToJagged()).ToMatrix();
            Assert.IsTrue(Matrix.IsEqual(a, expectedTransform, 0.01));
        }
コード例 #36
0
        public void learn_whiten_success()
        {
            #region doc_learn_1
            // Below is the same data used on the excellent paper "Tutorial
            //   On Principal Component Analysis", by Lindsay Smith (2002).
            double[][] data =
            {
                new double[] { 2.5, 2.4 },
                new double[] { 0.5, 0.7 },
                new double[] { 2.2, 2.9 },
                new double[] { 1.9, 2.2 },
                new double[] { 3.1, 3.0 },
                new double[] { 2.3, 2.7 },
                new double[] { 2.0, 1.6 },
                new double[] { 1.0, 1.1 },
                new double[] { 1.5, 1.6 },
                new double[] { 1.1, 0.9 }
            };

            // Let's create an analysis with centering (covariance method)
            // but no standardization (correlation method) and whitening:
            var pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Center,
                Whiten = true
            };

            // Now we can learn the linear projection from the data
            MultivariateLinearRegression transform = pca.Learn(data);

            // Finally, we can project all the data
            double[][] output1 = pca.Transform(data);

            // Or just its first components by setting
            // NumberOfOutputs to the desired components:
            pca.NumberOfOutputs = 1;

            // And then calling transform again:
            double[][] output2 = pca.Transform(data);

            // We can also limit to 80% of explained variance:
            pca.ExplainedVariance = 0.8;

            // And then call transform again:
            double[][] output3 = pca.Transform(data);
            #endregion

            double[] eigenvalues = { 1.28402771, 0.0490833989 };
            double[] proportion  = eigenvalues.Divide(eigenvalues.Sum());
            double[,] eigenvectors =
            {
                { 0.19940687993951403, -1.1061252858739095 },
                { 0.21626410214440508,  1.0199057073792104 }
            };

            // Everything is alright (up to the 9 decimal places shown in the tutorial)
            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5));

            pca.ExplainedVariance = 1.0;
            double[][] actual = pca.Transform(data);

            double[][] expected =
            {
                new double[] {   0.243560157209023, -0.263472650637184 },
                new double[] {  -0.522902576315494,  0.214938218565977 },
                new double[] {   0.291870144299372,  0.578317788814594 },
                new double[] {  0.0806632088164338,   0.19622137941132 },
                new double[] {   0.492962746459375, -0.315204397734004 },
                new double[] {   0.268558011864442,  0.263724118751361 },
                new double[] { -0.0291545644762578, -0.526334573603598 },
                new double[] {  -0.336693495487974, 0.0698378585807067 },
                new double[] {  -0.128858004446015, 0.0267280693333571 },
                new double[] {  -0.360005627922904, -0.244755811482527 }
            };

            // var str = actual.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture);

            // Everything is correct (up to 8 decimal places)
            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
            Assert.IsTrue(expected.IsEqual(output1, atol: 1e-8));
            Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output2, atol: 1e-8));
            Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output3, atol: 1e-8));

            actual = transform.Transform(data);
            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
        }
コード例 #37
0
        public void Revert_new_method()
        {
            var target = new PrincipalComponentAnalysis();

            // Compute
            var transform = target.Learn(data.ToJagged());

            // Transform
            double[][] image = target.Transform(data.ToJagged());

            // Reverse
            double[][] actual = target.Revert(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actual, data, 0.01));

            // Reverse
            double[][] actual2 = transform.Inverse().Transform(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actual2, data, 0.01));
            Assert.IsTrue(Matrix.IsEqual(actual2, actual, 1e-5));
        }
コード例 #38
0
        public void learn_standardize()
        {
            double[][] data =
            {
                new double[] { 2.5, 2.4 },
                new double[] { 0.5, 0.7 },
                new double[] { 2.2, 2.9 },
                new double[] { 1.9, 2.2 },
                new double[] { 3.1, 3.0 },
                new double[] { 2.3, 2.7 },
                new double[] { 2.0, 1.6 },
                new double[] { 1.0, 1.1 },
                new double[] { 1.5, 1.6 },
                new double[] { 1.1, 0.9 }
            };

            var pca = new PrincipalComponentAnalysis()
            {
                Method = PrincipalComponentMethod.Standardize,
                Whiten = false
            };

            MultivariateLinearRegression transform = pca.Learn(data);

            double[][] output1 = pca.Transform(data);

            double[] eigenvalues = { 1.925929272692245, 0.074070727307754519 };
            double[] proportion  = eigenvalues.Divide(eigenvalues.Sum());
            double[,] eigenvectors =
            {
                { 0.70710678118654791, -0.70710678118654791 },
                { 0.70710678118654791,  0.70710678118654791 }
            };

            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5));

            pca.ExplainedVariance = 1.0;
            double[][] actual = pca.Transform(data);
            //      var str = actual.ToCSharp();
            double[][] expected =
            {
                new double[] {    1.03068028963519, -0.212053139513466 },
                new double[] {   -2.19045015647317,  0.168942295968493 },
                new double[] {    1.17818776184333,   0.47577321493322 },
                new double[] {   0.323294642065681,  0.161198977394117 },
                new double[] {    2.07219946786664, -0.251171725759119 },
                new double[] {    1.10117414355213,  0.218653302562498 },
                new double[] { -0.0878525068874546, -0.430054465638535 },
                new double[] {   -1.40605089061245, 0.0528100914316325 },
                new double[] {  -0.538118242086245, 0.0202112695602547 },
                new double[] {   -1.48306450890365, -0.204309820939091 }
            };

            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
            Assert.IsTrue(expected.IsEqual(output1, atol: 1e-8));

            actual = transform.Transform(data);
            Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8));
        }
コード例 #39
0
ファイル: MainForm.cs プロジェクト: accord-net/framework
        /// <summary>
        ///   Launched when the user clicks the "Run analysis" button.
        /// </summary>
        /// 
        private void btnCompute_Click(object sender, EventArgs e)
        {
            // Save any pending changes 
            dgvAnalysisSource.EndEdit();

            if (dgvAnalysisSource.DataSource == null)
            {
                MessageBox.Show("Please load some data using File > Open!");
                return;
            }


            // Create a matrix from the source data table
            double[][] sourceMatrix = (dgvAnalysisSource.DataSource as DataTable).ToArray(out columnNames);

            // Create and compute a new Simple Descriptive Analysis
            sda = new DescriptiveAnalysis(columnNames).Learn(sourceMatrix);

            // Show the descriptive analysis on the screen
            dgvDistributionMeasures.DataSource = sda.Measures;

            // Populates statistics overview tab with analysis data
            dgvStatisticCenter.DataSource = new ArrayDataView(sda.DeviationScores, columnNames);
            dgvStatisticStandard.DataSource = new ArrayDataView(sda.StandardScores, columnNames);

            dgvStatisticCovariance.DataSource = new ArrayDataView(sda.CovarianceMatrix, columnNames);
            dgvStatisticCorrelation.DataSource = new ArrayDataView(sda.CorrelationMatrix, columnNames);


            var method = (PrincipalComponentMethod)cbMethod.SelectedValue;

            // Create the Principal Component Analysis of the data 
            pca = new PrincipalComponentAnalysis(method);


            pca.Learn(sourceMatrix);  // Finally, compute the analysis!


            // Populate components overview with analysis data
            dgvFeatureVectors.DataSource = new ArrayDataView(pca.ComponentVectors);
            dgvPrincipalComponents.DataSource = pca.Components;
            dgvProjectionComponents.DataSource = pca.Components;
            distributionView.DataSource = pca.Components;
            cumulativeView.DataSource = pca.Components;

            numComponents.Maximum = pca.Components.Count;
            numComponents.Value = 1;
            numThreshold.Value = (decimal)pca.Components[0].CumulativeProportion * 100;
        }
コード例 #40
0
        public void ComputeRanking(List <ICoordinate> points, bool[] pointLabels, List <string> identities = null, PrincipalComponentAnalysis pca = null)
        {
            var         mapping = new Dictionary <ICoordinate, Tuple <int, bool, double, string> >(); //original idx, 1/0 label, distance, string name (for debugging)
            ICoordinate remappedCenter;

            if (pca != null)
            {
                var reverted = pca.Revert(new[] { new[] { CenterOfMass.X, CenterOfMass.Y } });
                remappedCenter = new Coordinate3D(reverted[0][0], reverted[0][1], reverted[0][2]);
            }
            else
            {
                remappedCenter = CenterOfMass;
            }
            for (var i = 0; i < points.Count; i++)
            {
                mapping.Add(points[i],
                            new Tuple <int, bool, double, string>(i, pointLabels[i], points[i].EuclideanDistance(remappedCenter),
                                                                  identities != null ? identities[i] : ""));
            }
            var rankedMap = mapping.OrderBy(pt => pt.Value.Item3).ToList();

            PointRanks = rankedMap
                         .Select((pt, idx) => new { id = pt.Value.Item1, rank = idx })
                         .OrderBy(t => t.id).Select(t => t.rank)
                         .ToArray();
            var namedLabelVector = rankedMap.Select(pt => pt.Value.Item4).ToArray();

            InducedLabledVector = rankedMap.Select(pt => pt.Value.Item2).ToArray();
        }
コード例 #41
0
        public void TransformTest3()
        {
            PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data);

            // Compute
            target.Compute();

            bool thrown = false;
            try
            {
                double[,] actual = target.Transform(data, 3);
            }
            catch { thrown = true; }

            Assert.IsTrue(thrown);
        }
コード例 #42
0
        //            testHold.WaitOne(5000);
        //IEndPointClient logger;
        List<PCAData2D> runPCA(List<NeatGenome> bestGenome, bool firstBehavior = true, int xBins =0 , int yBins =0)
        {
            var totalStopWatch = System.Diagnostics.Stopwatch.StartNew();

            // Create new stopwatch
            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                List<long> uIDs = bestGenome.Select(x => x.GenomeId).ToList();
            //make sure we have the right fitness!
            //TODO: Check multi-objective code to see what value has absolute fitness
                List<double> absoluteFitness = bestGenome.Select(x => x.RealFitness).ToList();

                if(bestGenome.Count == 0)
                    return null;

                //we know topBody > 0 by above check
                int componentCount = Math.Min(80, (firstBehavior ? bestGenome[0].Behavior.behaviorList.Count : bestGenome[0].SecondBehavior.behaviorList.Count));
                    //double componentCount = (double)fn.Json.Args[1];

                //create our double array that's going to be condensed
                double[,] collectedData = new double[bestGenome.Count, componentCount];

                    int xyIndex = 0;
                    foreach (IGenome genome in bestGenome)
                    {
                        //need to grab the behavior objects from the genome, and enter them as data
                        var behaviorList = (firstBehavior ? genome.Behavior.behaviorList : genome.SecondBehavior.behaviorList);
                        for (var ix = 0; ix < componentCount; ix++)
                        {
                                collectedData[xyIndex, ix] = (double)behaviorList[ix];
                        }

                        xyIndex++;
                    }

                    try
                    {

                        stopwatch.Stop();
                        Console.WriteLine("Time before kernel: " + stopwatch.ElapsedMilliseconds);
                        stopwatch = System.Diagnostics.Stopwatch.StartNew();

                        //higher gaussian seemed better at spreading out behavior
                        //might try polynomial of 3rd or 4th degree, constant = 0 by default

                //        IKernel kernel = new Polynomial(3, 0);//new Gaussian(1.9);//new Polynomial((int)numDegree.Value, (double)numConstant.Value);

                //        KernelPrincipalComponentAnalysis kpca = new KernelPrincipalComponentAnalysis(collectedData, kernel,
                //(PrincipalComponentAnalysis.AnalysisMethod.Correlation));

                        PrincipalComponentAnalysis kpca = new PrincipalComponentAnalysis(collectedData,
                (PrincipalComponentAnalysis.AnalysisMethod.Correlation));
                        try
                        {
                            kpca.Compute();
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message);
                            return null;
                        }

                        stopwatch.Stop();
                        Console.WriteLine("Time During PCA: " + stopwatch.ElapsedMilliseconds);
                        stopwatch = System.Diagnostics.Stopwatch.StartNew();

                        double[,] transform = kpca.Transform(collectedData, 2);

                        stopwatch.Stop();
                        Console.WriteLine("Time During Transform: " + stopwatch.ElapsedMilliseconds);
                        stopwatch = System.Diagnostics.Stopwatch.StartNew();

                        List<PCAData2D> uidAndPoints = binAllPoints(transform, uIDs, absoluteFitness, xBins, yBins);

                        stopwatch.Stop();
                        Console.WriteLine("Time During Binning: " + stopwatch.ElapsedMilliseconds);

                        //List<PCAData2D> uidAndPoints = new List<PCAData2D>();
                        //for (int ix = 0; ix < bestGenome.Count; ix++)
                        //{
                        //    uidAndPoints.Add(new PCAData2D() { uid = uIDs[ix], x = mappedResults[ix, 0], y = mappedResults[ix, 1] });
                        //}

                        totalStopWatch.Stop();
                        Console.WriteLine("Total Time For PCA: " + totalStopWatch.ElapsedMilliseconds);

                        return uidAndPoints;
                    }
                    catch (Exception e)
                    {
                        totalStopWatch.Stop();
                        Console.WriteLine("Total Time For (failed) PCA: " + totalStopWatch.ElapsedMilliseconds);

                        Console.WriteLine("Failed to run PCA");
                        return null;
                    }
        }
コード例 #43
0
        public void Revert()
        {
            PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data);

            // Compute
            target.Compute();

            // Transform
            double[,] image = target.Transform(data);

            // Reverse
            double[,] actual = target.Revert(image);

            // Verify both are equal with 0.01 tolerance value
            Assert.IsTrue(Matrix.IsEqual(actual, data, 0.01));
        }
コード例 #44
0
ファイル: MainForm.cs プロジェクト: natepan/framework
        private void btnRunAnalysis_Click(object sender, EventArgs e)
        {
            if (dgvAnalysisSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }


            // Finishes and save any pending changes to the given data
            dgvAnalysisSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvAnalysisSource.DataSource as DataTable).ToMatrix(out sourceColumnNames);

            // Creates the Simple Descriptive Analysis of the given source
            sda = new DescriptiveAnalysis(sourceMatrix, sourceColumnNames);

            sda.Compute();


            // Populates statistics overview tab with analysis data
            dgvStatisticCenter.DataSource = new ArrayDataView(sda.DeviationScores, sourceColumnNames);
            dgvStatisticStandard.DataSource = new ArrayDataView(sda.StandardScores, sourceColumnNames);

            dgvStatisticCovariance.DataSource = new ArrayDataView(sda.CovarianceMatrix, sourceColumnNames);
            dgvStatisticCorrelation.DataSource = new ArrayDataView(sda.CorrelationMatrix, sourceColumnNames);
            dgvDistributionMeasures.DataSource = sda.Measures;


            // Creates the Principal Component Analysis of the given source
            pca = new PrincipalComponentAnalysis(sda.Source,
                (AnalysisMethod)cbMethod.SelectedValue);


            // Compute the Principal Component Analysis
            pca.Compute();

            // Populates components overview with analysis data
            dgvFeatureVectors.DataSource = new ArrayDataView(pca.ComponentMatrix);

            dgvPrincipalComponents.DataSource = pca.Components;

            dgvProjectionComponents.DataSource = pca.Components;
            numComponents.Maximum = pca.Components.Count;
            numComponents.Value = 1;
            numThreshold.Value = (decimal)pca.Components[0].CumulativeProportion * 100;

            CreateComponentCumulativeDistributionGraph(graphCurve);
            CreateComponentDistributionGraph(graphShare);

        }
コード例 #45
0
        public void adjustTest()
        {
            double[,] data = (double[,])PrincipalComponentAnalysisTest.data.Clone();

            PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize);

            double[,] expected =
            {
                {  0.87874523495823,   0.578856809114491 },
                { -1.66834240260186,  -1.42942191638476  },
                {  0.496682089324217,  1.16952702249663  },
                {  0.114618943690204,  0.342588723761638 },
                {  1.64287152622626,   1.28766106517305  },
                {  0.624036471202221,  0.933258937143772 },
                {  0.241973325568208, -0.366215532296923 },
                { -1.03157049321184,  -0.956885745679056 },
                { -0.394798583821814, -0.366215532296923 },
                { -0.904216111333831, -1.19315383103191  }
            };


            double[,] actual = target.Adjust(data, false);

            Assert.IsTrue(expected.IsEqual(actual, 0.00001));
            Assert.AreNotEqual(data, actual);

            actual = target.Adjust(data, true);
            Assert.IsTrue(expected.IsEqual(actual, 0.00001));
            Assert.AreEqual(data, actual);
        }
コード例 #46
0
        public static Output Whitening(double[,] matrix)
        {
            if (matrix == null)
            {
                throw new ArgumentNullException(nameof(matrix));
            }

            // Step 1: convert matrix to a jagged array
            double[][] jaggedArray = matrix.ToJagged();

            // Step 2: do PCA whitening
            var pca = new PrincipalComponentAnalysis()
            {
                // the "Center" method only subtracts the mean.
                Method = PrincipalComponentMethod.Center,
                Whiten = true,
            };

            pca.Learn(jaggedArray);

            pca.Transform(jaggedArray);

            pca.ExplainedVariance = 0.95;

            double[][] transformedData = pca.Transform(jaggedArray);
            double[,] projectedData = transformedData.ToMatrix();
            double[,] eigenVectors  = pca.ComponentVectors.ToMatrix();
            int components = pca.Components.Count;

            // double[] eigneValues = pca.Eigenvalues; //sorted
            // int rows = projectedData.GetLength(0);
            int columns = projectedData.GetLength(1); //this is actually the number of output vectors before reversion

            // Step 3: revert a set of projected data into its original space
            // the output of the "Revert(Double[][])" method in Accord did not make sense.
            // however, we use its API to do so.
            double[,] reversion = Revert(projectedData, eigenVectors, components);

            // Build Projection Matrix
            // To do so, we need eigenVectors, and the number of columns of the projected data
            double[,] projectionMatrix = GetProjectionMatrix(eigenVectors, columns);

            // write the projection matrix to disk

            /*
             * // FIRST STEP: sort the eigenvectors based on the eigenvalue
             * var eigPairs = new List<Tuple<double, double[]>>();
             *
             * for (int i = 0; i < eigneValues.GetLength(0); i++)
             * {
             *  eigPairs.Add(Tuple.Create(Math.Abs(eigneValues[i]), GetColumn(eigenVectors, i)));
             * }
             *
             * // sort in descending order based on the eigenvalues
             * eigPairs.Sort((x, y) => y.Item1.CompareTo(x.Item1));
             */
            var output = new Output()
            {
                ProjectionMatrix = projectionMatrix,
                Reversion        = reversion,
                EigenVectors     = eigenVectors,
                Components       = components,
            };

            return(output);
        }
コード例 #47
0
        public void ConstructorTest()
        {
            // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
            // using the framework's default method. The tutorial can be found online
            // at http://www.sccg.sk/~haladova/principal_components.pdf

            // Step 1. Get some data
            // ---------------------

            double[,] data = 
            {
                { 2.5,  2.4 },
                { 0.5,  0.7 },
                { 2.2,  2.9 },
                { 1.9,  2.2 },
                { 3.1,  3.0 },
                { 2.3,  2.7 },
                { 2.0,  1.6 },
                { 1.0,  1.1 },
                { 1.5,  1.6 },
                { 1.1,  0.9 }
            };


            // Step 2. Subtract the mean
            // -------------------------
            //   Note: The framework does this automatically. By default, the framework
            //   uses the "Center" method, which only subtracts the mean. However, it is
            //   also possible to remove the mean *and* divide by the standard deviation
            //   (thus performing the correlation method) by specifying "Standardize"
            //   instead of "Center" as the AnalysisMethod.

            AnalysisMethod method = AnalysisMethod.Center; // AnalysisMethod.Standardize


            // Step 3. Compute the covariance matrix
            // -------------------------------------
            //   Note: Accord.NET does not need to compute the covariance
            //   matrix in order to compute PCA. The framework uses the SVD
            //   method which is more numerically stable, but may require
            //   more processing or memory. In order to replicate the tutorial
            //   using covariance matrices, please see the next unit test.

            // Create the analysis using the selected method
            var pca = new PrincipalComponentAnalysis(data, method);

            // Compute it
            pca.Compute();


            // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
            // -------------------------------------------------------------------------
            //   Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition
            //   method, the Eigenvalues are computed from the singular values. However, it is
            //   not the Eigenvalues themselves which are important, but rather their proportion:

            // Those are the expected eigenvalues, in descending order:
            double[] eigenvalues = { 1.28402771, 0.0490833989 };

            // And this will be their proportion:
            double[] proportion = eigenvalues.Divide(eigenvalues.Sum());

            // Those are the expected eigenvectors,
            // in descending order of eigenvalues:
            double[,] eigenvectors =
            {
                { -0.677873399, -0.735178656 },
                { -0.735178656,  0.677873399 }
            };

            // Now, here is the place most users get confused. The fact is that
            // the Eigenvalue decomposition (EVD) is not unique, and both the SVD
            // and EVD routines used by the framework produces results which are
            // numerically different from packages such as STATA or MATLAB, but
            // those are correct.

            // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
            // a being a scalar) will also be an eigenvector. In the Lindsay case, the
            // framework produces a first eigenvector with inverted signs. This is the same
            // as considering a=-1 and taking a*v. The result is still correct.

            // Retrieve the first expected eigenvector
            double[] v = eigenvectors.GetColumn(0);

            // Multiply by a scalar and store it back
            eigenvectors.SetColumn(0, v.Multiply(-1));

            // Everything is alright (up to the 9 decimal places shown in the tutorial)
            Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9));
            Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9));
            Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-5));

            // Step 5. Deriving the new data set
            // ---------------------------------

            double[,] actual = pca.Transform(data);

            // transformedData shown in pg. 18
            double[,] expected = new double[,]
            {
                {  0.827970186, -0.175115307 },
                { -1.77758033,   0.142857227 },
                {  0.992197494,  0.384374989 },
                {  0.274210416,  0.130417207 },
                {  1.67580142,  -0.209498461 },
                {  0.912949103,  0.175282444 },
                { -0.099109437, -0.349824698 },
                { -1.14457216,   0.046417258 },
                { -0.438046137,  0.017764629 },
                { -1.22382056,  -0.162675287 },
            };

            // Everything is correct (up to 8 decimal places)
            Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8));
        }
コード例 #48
0
        private double[][] getProjectedSequence(double[][] sequence, PrincipalComponentAnalysis pca)
        {
            if (pca == null)
                return sequence;

            int numComponents = pca.GetNumberOfComponents(1.0f);

            double[,] data = jaggedToMulti(sequence);
            string fn = System.IO.Path.GetRandomFileName();
            using (StreamWriter sr = new StreamWriter("Z:/WindowsFolders/Desktop/" + fn))
            {
                for (int i = 0; i < data.GetLength(0); i++)
                {
                    for (int j = 0; j < data.GetLength(1); j++)
                    {
                        sr.Write(data[i, j] + " ");
                    }
                    sr.WriteLine();
                }
            }
            double[,] projectedData = pca.Transform(data, numComponents);
            double[][] projTrainSeq = multiToJagged(projectedData);

            return projTrainSeq;
        }