//Protected methods (called by the public methods in parent class) protected override void DoTrain(Bitmap[] charImgs) { double[][] input = convertDataToPCAInputFormat(charImgs); pca = new PrincipalComponentAnalysis(input, AnalysisMethod.Center); pca.Compute(); }
private PrincipalComponentAnalysis computePCA(double[][][] sequences) { PrincipalComponentAnalysis pca; // Create combined array for computing PCA int numTotalRows = sequences.Select(e => e.GetLength(0)).Sum(); double[][] pcaCombined = new double[numTotalRows][]; int total = 0; for (int i = 0; i < sequences.GetLength(0); i++) { for (int j = 0; j < sequences[i].GetLength(0); j++) { pcaCombined[total + j] = sequences[i][j]; } total += sequences[i].GetLength(0); } // PCA double[,] pcaCombinedMulti = jaggedToMulti(pcaCombined); pca = new PrincipalComponentAnalysis(pcaCombinedMulti); pca.Compute(); return pca; }
public static double[,] PCA(double[,] sourceMatrix) { // Creates the Principal Component Analysis of the given source var pca = new PrincipalComponentAnalysis(sourceMatrix, AnalysisMethod.Center); // Compute the Principal Component Analysis pca.Compute(); var length1 = sourceMatrix.GetLength(0); var length2 = sourceMatrix.GetLength(1); // int sercount = rawSeries.Count(); // Creates a projection considering 80% of the information // pca.Transform(sourceMatrix, 0.8f, true); return pca.Transform(sourceMatrix, length1); }
void btnCompute_Click(object sender, EventArgs e) { dataGridView2.Rows.Clear(); // Extract feature vectors double[][] hands = extract(); // Create a new Principal Component Analysis object pca = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.Center, ExplainedVariance = 0.95 }; // Compute it pca.Learn(hands); // Now we will plot the Eigenvectors as images ArrayToImage reverse = new ArrayToImage(32, 32); // For each Principal Component for (int i = 0; i < pca.Components.Count; i++) { // We will extract its Eigenvector double[] vector = pca.Components[i].Eigenvector; // Normalize its values reverse.Max = vector.Max(); reverse.Min = vector.Min(); // Then arrange each vector value as if it was a pixel Bitmap eigenHand; reverse.Convert(vector, out eigenHand); // This will give the Eigenhands dataGridView2.Rows.Add(eigenHand, pca.Components[i].Proportion); } // Populate components overview with analysis data dgvPrincipalComponents.DataSource = pca.Components; distributionView.DataSource = pca.Components; cumulativeView.DataSource = pca.Components; btnCreateProjection.Enabled = true; }
public void transform_more_columns_than_samples_new_interface() { // Lindsay's tutorial data var datat = data.Transpose().ToJagged(); var target = new PrincipalComponentAnalysis(); // Compute var regression = target.Learn(datat); // Transform double[][] actual = target.Transform(datat); // Assert the scores equals the transformation of the input Assert.IsNull(target.Result); double[,] expected = { { 0.50497524691810358, -0.00000000000000044408920985006262 }, { -0.504975246918104, -0.00000000000000035735303605122226 } }; Assert.IsTrue(Matrix.IsEqual(expected, actual, 0.01)); actual = target.Transform(datat); Assert.IsTrue(Matrix.IsEqual(expected, actual, 0.01)); }
public void ConstructorTest2() { // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis" // using the paper's original method. The tutorial can be found online // at http://www.sccg.sk/~haladova/principal_components.pdf // Step 1. Get some data // --------------------- double[,] data = { { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 }, { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 } }; // Step 2. Subtract the mean // ------------------------- // Note: The framework does this automatically // when computing the covariance matrix. In this // step we will only compute the mean vector. double[] mean = Accord.Statistics.Tools.Mean(data); // Step 3. Compute the covariance matrix // ------------------------------------- double[,] covariance = Accord.Statistics.Tools.Covariance(data, mean); // Create the analysis using the covariance matrix var pca = PrincipalComponentAnalysis.FromCovarianceMatrix(mean, covariance); // Compute it pca.Compute(); // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix //-------------------------------------------------------------------------- // Those are the expected eigenvalues, in descending order: double[] eigenvalues = { 1.28402771, 0.0490833989 }; // And this will be their proportion: double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); // Those are the expected eigenvectors, // in descending order of eigenvalues: double[,] eigenvectors = { { -0.677873399, -0.735178656 }, { -0.735178656, 0.677873399 } }; // Now, here is the place most users get confused. The fact is that // the Eigenvalue decomposition (EVD) is not unique, and both the SVD // and EVD routines used by the framework produces results which are // numerically different from packages such as STATA or MATLAB, but // those are correct. // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with // a being a scalar) will also be an eigenvector. In the Lindsay case, the // framework produces a first eigenvector with inverted signs. This is the same // as considering a=-1 and taking a*v. The result is still correct. // Retrieve the first expected eigenvector double[] v = eigenvectors.GetColumn(0); // Multiply by a scalar and store it back eigenvectors.SetColumn(0, v.Multiply(-1)); // Everything is alright (up to the 9 decimal places shown in the tutorial) Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-8)); // Step 5. Deriving the new data set // --------------------------------- double[,] actual = pca.Transform(data); // transformedData shown in pg. 18 double[,] expected = new double[, ] { { 0.827970186, -0.175115307 }, { -1.77758033, 0.142857227 }, { 0.992197494, 0.384374989 }, { 0.274210416, 0.130417207 }, { 1.67580142, -0.209498461 }, { 0.912949103, 0.175282444 }, { -0.099109437, -0.349824698 }, { -1.14457216, 0.046417258 }, { -0.438046137, 0.017764629 }, { -1.22382056, -0.162675287 }, }; // Everything is correct (up to 8 decimal places) Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8)); }
public void learn_whiten_success() { #region doc_learn_1 // Below is the same data used on the excellent paper "Tutorial // On Principal Component Analysis", by Lindsay Smith (2002). double[][] data = { new double[] { 2.5, 2.4 }, new double[] { 0.5, 0.7 }, new double[] { 2.2, 2.9 }, new double[] { 1.9, 2.2 }, new double[] { 3.1, 3.0 }, new double[] { 2.3, 2.7 }, new double[] { 2.0, 1.6 }, new double[] { 1.0, 1.1 }, new double[] { 1.5, 1.6 }, new double[] { 1.1, 0.9 } }; // Let's create an analysis with centering (covariance method) // but no standardization (correlation method) and whitening: var pca = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.Center, Whiten = true }; // Now we can learn the linear projection from the data MultivariateLinearRegression transform = pca.Learn(data); // Finally, we can project all the data double[][] output1 = pca.Transform(data); // Or just its first components by setting // NumberOfOutputs to the desired components: pca.NumberOfOutputs = 1; // And then calling transform again: double[][] output2 = pca.Transform(data); // We can also limit to 80% of explained variance: pca.ExplainedVariance = 0.8; // And then call transform again: double[][] output3 = pca.Transform(data); #endregion double[] eigenvalues = { 1.28402771, 0.0490833989 }; double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); double[,] eigenvectors = { { 0.19940687993951403, -1.1061252858739095 }, { 0.21626410214440508, 1.0199057073792104 } }; // Everything is alright (up to the 9 decimal places shown in the tutorial) Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5)); pca.ExplainedVariance = 1.0; double[][] actual = pca.Transform(data); double[][] expected = { new double[] { 0.243560157209023, -0.263472650637184 }, new double[] { -0.522902576315494, 0.214938218565977 }, new double[] { 0.291870144299372, 0.578317788814594 }, new double[] { 0.0806632088164338, 0.19622137941132 }, new double[] { 0.492962746459375, -0.315204397734004 }, new double[] { 0.268558011864442, 0.263724118751361 }, new double[] { -0.0291545644762578, -0.526334573603598 }, new double[] { -0.336693495487974, 0.0698378585807067 }, new double[] { -0.128858004446015, 0.0267280693333571 }, new double[] { -0.360005627922904, -0.244755811482527 } }; // var str = actual.ToString(CSharpJaggedMatrixFormatProvider.InvariantCulture); // Everything is correct (up to 8 decimal places) Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8)); Assert.IsTrue(expected.IsEqual(output1, atol: 1e-8)); Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output2, atol: 1e-8)); Assert.IsTrue(expected.Get(null, 0, 1).IsEqual(output3, atol: 1e-8)); actual = transform.Transform(data); Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8)); }
public void PC() { Random rng = new Random(1); double s = 1.0 / Math.Sqrt(2.0); MultivariateSample MS = new MultivariateSample(2); RectangularMatrix R = new RectangularMatrix(1000, 2); for (int i = 0; i < 1000; i++) { double r1 = 2.0 * rng.NextDouble() - 1.0; double r2 = 2.0 * rng.NextDouble() - 1.0; double x = r1 * 4.0 * s - r2 * 9.0 * s; double y = r1 * 4.0 * s + r2 * 9.0 * s; R[i, 0] = x; R[i, 1] = y; MS.Add(x, y); } Console.WriteLine("x {0} {1}", MS.Column(0).Mean, MS.Column(0).Variance); Console.WriteLine("y {0} {1}", MS.Column(1).Mean, MS.Column(1).Variance); Console.WriteLine("SVD"); SingularValueDecomposition SVD = R.SingularValueDecomposition(); for (int i = 0; i < SVD.Dimension; i++) { Console.WriteLine("{0} {1}", i, SVD.SingularValue(i)); ColumnVector v = SVD.RightSingularVector(i); Console.WriteLine(" {0} {1}", v[0], v[1]); } Console.WriteLine("PCA"); PrincipalComponentAnalysis PCA = MS.PrincipalComponentAnalysis(); Console.WriteLine("Dimension = {0} Count = {1}", PCA.Dimension, PCA.Count); for (int i = 0; i < PCA.Dimension; i++) { PrincipalComponent PC = PCA.Component(i); Console.WriteLine(" {0} {1} {2} {3}", PC.Index, PC.Weight, PC.VarianceFraction, PC.CumulativeVarianceFraction); RowVector v = PC.NormalizedVector(); Console.WriteLine(" {0} {1}", v[0], v[1]); } // reconstruct SquareMatrix U = SVD.LeftTransformMatrix(); SquareMatrix V = SVD.RightTransformMatrix(); double x1 = U[0, 0] * SVD.SingularValue(0) * V[0, 0] + U[0, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x1 = {0} {1}", x1, R[0, 0]); double y1 = U[0, 0] * SVD.SingularValue(0) * V[1, 0] + U[0, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y1 = {0} {1}", y1, R[0, 1]); double x100 = U[100, 0] * SVD.SingularValue(0) * V[0, 0] + U[100, 1] * SVD.SingularValue(1) * V[0, 1]; Console.WriteLine("x100 = {0} {1}", x100, R[100, 0]); double y100 = U[100, 0] * SVD.SingularValue(0) * V[1, 0] + U[100, 1] * SVD.SingularValue(1) * V[1, 1]; Console.WriteLine("y100 = {0} {1}", y100, R[100, 1]); ColumnVector d1 = U[0, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[0, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d1 = ({0} {1})", d1[0], d1[1]); ColumnVector d100 = U[100, 0] * SVD.SingularValue(0) * SVD.RightSingularVector(0) + U[100, 1] * SVD.SingularValue(1) * SVD.RightSingularVector(1); Console.WriteLine("d100 = ({0} {1})", d100[0], d100[1]); Console.WriteLine("compare"); MultivariateSample RS = PCA.TransformedSample(); IEnumerator <double[]> RSE = RS.GetEnumerator(); RSE.MoveNext(); double[] dv1 = RSE.Current; Console.WriteLine("{0} {1}", dv1[0], dv1[1]); Console.WriteLine("{0} {1}", U[0, 0], U[0, 1]); RSE.Dispose(); }
public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected Assert.IsTrue(pca.Components.Count == pca.Dimension); for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Components[i]; Assert.IsTrue(pc.Index == i); Assert.IsTrue(pc.Analysis == pca); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector, pc.ScaledVector())); Assert.IsTrue(pca.MinimumDimension(pc.CumulativeVarianceFraction) == i + 1); } // Check enumerator, and verify that variance fractions behave as expected. int count = 0; double cumulative = 0.0; double previous = Double.PositiveInfinity; foreach (PrincipalComponent pc in pca.Components) { Assert.IsTrue(pc.Index == count); count++; Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); Assert.IsTrue(pc.VarianceFraction <= previous); previous = pc.VarianceFraction; cumulative += pc.VarianceFraction; Assert.IsTrue(TestUtilities.IsNearlyEqual(cumulative, pc.CumulativeVarianceFraction)); } Assert.IsTrue(count == pca.Components.Count); // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Components[i]; x += (cEntry[i] * pc.Weight) * pc.NormalizedVector; } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Components[rD - 1].CumulativeVarianceFraction)); } }
public void ExceptionTest() { double[,] data = { { 1, 2 }, { 5, 2 }, { 2, 2 }, { 4, 2 }, }; PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize); bool thrown = false; try { pca.Compute(); } catch (ArithmeticException ex) { ex.ToString(); thrown = true; } // Assert that an appropriate exception has been // thrown in the case of a constant variable. Assert.IsTrue(thrown); }
private double[][][] getProjectedSequences(double[][][] sequences, PrincipalComponentAnalysis pca) { int nseqs = sequences.GetLength(0); double[][][] projSeqs = new double[nseqs][][]; for (int i = 0; i < nseqs; i++) { projSeqs[i] = getProjectedSequence(sequences[i], pca); } return projSeqs; }
public void covariance_new_interface() { double[] mean = Measures.Mean(data, dimension: 0); double[][] cov = Measures.Covariance(data.ToJagged()); #region doc_learn_3 // Create the Principal Component Analysis // specifying the CovarianceMatrix method: var pca = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.CovarianceMatrix, Means = mean // pass the original data mean vectors }; // Learn the PCA projection using passing the cov matrix MultivariateLinearRegression transform = pca.Learn(cov); // Now, we can transform data as usual double[,] actual = pca.Transform(data); #endregion double[,] expected = new double[,] { { 0.827970186, -0.175115307 }, { -1.77758033, 0.142857227 }, { 0.992197494, 0.384374989 }, { 0.274210416, 0.130417207 }, { 1.67580142, -0.209498461 }, { 0.912949103, 0.175282444 }, { -0.099109437, -0.349824698 }, { -1.14457216, 0.046417258 }, { -0.438046137, 0.017764629 }, { -1.22382056, -0.162675287 }, }; // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actual, expected, 0.01)); // Transform double[,] image = pca.Transform(data); // Reverse double[,] reverse = pca.Revert(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(reverse, data, 1e-5)); actual = transform.Transform(data.ToJagged()).ToMatrix(); Assert.IsTrue(Matrix.IsEqual(actual, expected, 1e-5)); }
public void TransformTest2() { // Lindsay's tutorial data double[,] datat = data.Transpose(); PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(datat); // Compute target.Compute(); // Transform double[,] actual = target.Transform(datat); // Assert the scores equals the transformation of the input double[,] result = target.Result; Assert.IsTrue(Matrix.IsEqual(result, actual, 0.01)); }
internal void PCCompute(List<SpikeEvent> spikes) { // Matrix dimensions int numObs = spikes.Count; int wavelength = spikes[0].Waveform.Length; // Create waveform matrix double[,] waveforms = new double[numObs, wavelength]; for (int i = 0; i < numObs; ++i) { for (int j = 0; j < wavelength; ++j) { waveforms[i, j] = spikes[i].Waveform[j]; } } // Make PCA object pca = new PrincipalComponentAnalysis(waveforms, AnalysisMethod.Standardize); // PC Decomp. pca.Compute(); // Project currentProjection = new double[numObs][]; double[,] tmp = pca.Transform(waveforms); for (int i = 0; i < tmp.GetLength(0); ++i) { currentProjection[i] = new double[projectionDimension]; for (int j = 0; j < projectionDimension; ++j) currentProjection[i][j] = tmp[i, j]; } //// Create projection matrix //double maxPC = double.MinValue; //currentProjection = new double[numObs][]; //for (int i = 0; i < numObs; ++i) //{ // currentProjection[i] = new double[projectionDimension]; // for (int j = 0; j < projectionDimension; ++j) // { // currentProjection[i][j] = pca.ComponentMatrix[i, j]; // if (currentProjection[i][j] > maxPC) // { // maxPC = currentProjection[i][j]; // } // } //} //// Normalize projection //for (int i = 0; i < numObs; ++i) //{ // for (int j = 0; j < projectionDimension; ++j) // { // currentProjection[i][j] = 10000 * (currentProjection[i][j] / maxPC); // } //} }
public ChannelModel(SerializationInfo info, StreamingContext ctxt) { this.kVals = (int[])info.GetValue("kVals", typeof(int[])); this.logLike = (double[])info.GetValue("logLike", typeof(double[])); this.rissanen = (double[])info.GetValue("rissanen", typeof(double[])); this.mdl = (double[])info.GetValue("mdl", typeof(double[])); this.channelNumber = (int)info.GetValue("channelNumber", typeof(int)); this.K = (int)info.GetValue("K", typeof(int)); this.projectionDimension = (int)info.GetValue("projectionDimension", typeof(int)); this.currentProjection = (double[][])info.GetValue("currentProjection", typeof(double[][])); this.maxK = (int)info.GetValue("maxK", typeof(int)); this.gmm = (GaussianMixtureModel)info.GetValue("gmm", typeof(GaussianMixtureModel)); this.pca = (PrincipalComponentAnalysis)info.GetValue("pca", typeof(PrincipalComponentAnalysis)); this.unitStartIndex = (int)info.GetValue("unitStartIndex", typeof(int)); this.pValue = (double)info.GetValue("pValue",typeof(double)); }
static void Main(string[] args) { //for correct symbol of float point System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone(); customCulture.NumberFormat.NumberDecimalSeparator = "."; System.Threading.Thread.CurrentThread.CurrentCulture = customCulture; //This is a program for demonstrating machine //learning and classifying the spectrum of light sources using .net //read data (If you use linux do not forget to correct the path to the files) string trainCsvFilePath = @"data\train.csv"; string testCsvFilePath = @"data\test.csv"; DataTable trainTable = new CsvReader(trainCsvFilePath, true).ToTable(); DataTable testTable = new CsvReader(testCsvFilePath, true).ToTable(); // Convert the DataTable to input and output vectors (train and test) int[] trainOutputs = trainTable.Columns["label"].ToArray <int>(); trainTable.Columns.Remove("label"); double[][] trainInputs = trainTable.ToJagged <double>(); int[] testOutputs = testTable.Columns["label"].ToArray <int>(); testTable.Columns.Remove("label"); double[][] testInputs = testTable.ToJagged <double>(); // training model SVM classifier var teacher = new MulticlassSupportVectorLearning <Gaussian>() { // Configure the learning algorithm to use SMO to train the // underlying SVMs in each of the binary class subproblems. Learner = (param) => new SequentialMinimalOptimization <Gaussian>() { // Estimate a suitable guess for the Gaussian kernel's parameters. // This estimate can serve as a starting point for a grid search. UseKernelEstimation = true } }; // Learn a machine var machine = teacher.Learn(trainInputs, trainOutputs); // Obtain class predictions for each sample int[] predicted = machine.Decide(testInputs); // print result int i = 0; Console.WriteLine("results - (predict ,real labels)"); foreach (int pred in predicted) { Console.Write("({0},{1} )", pred, testOutputs[i]); i++; } //calculate the accuracy double error = new ZeroOneLoss(testOutputs).Loss(predicted); Console.WriteLine("\n accuracy: {0}", 1 - error); // consider the decrease in the dimension of features using PCA var pca = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.Center, Whiten = true }; pca.NumberOfOutputs = 2; MultivariateLinearRegression transform = pca.Learn(trainInputs); double[][] outputPCA = pca.Transform(trainInputs); // print it on the scatter plot ScatterplotBox.Show(outputPCA, trainOutputs).Hold(); Console.ReadLine(); }
internal void PCCompute(List <SpikeEvent> spikes) { // Matrix dimensions int numObs = spikes.Count; int wavelength = spikes[0].Waveform.Length; // Create waveform matrix double[,] waveforms = new double[numObs, wavelength]; for (int i = 0; i < numObs; ++i) { for (int j = 0; j < wavelength; ++j) { waveforms[i, j] = spikes[i].Waveform[j]; } } // Make PCA object pca = new PrincipalComponentAnalysis(waveforms, AnalysisMethod.Standardize); // PC Decomp. pca.Compute(); // Project currentProjection = new double[numObs][]; double[,] tmp = pca.Transform(waveforms); for (int i = 0; i < tmp.GetLength(0); ++i) { currentProjection[i] = new double[projectionDimension]; for (int j = 0; j < projectionDimension; ++j) { currentProjection[i][j] = tmp[i, j]; } } //// Create projection matrix //double maxPC = double.MinValue; //currentProjection = new double[numObs][]; //for (int i = 0; i < numObs; ++i) //{ // currentProjection[i] = new double[projectionDimension]; // for (int j = 0; j < projectionDimension; ++j) // { // currentProjection[i][j] = pca.ComponentMatrix[i, j]; // if (currentProjection[i][j] > maxPC) // { // maxPC = currentProjection[i][j]; // } // } //} //// Normalize projection //for (int i = 0; i < numObs; ++i) //{ // for (int j = 0; j < projectionDimension; ++j) // { // currentProjection[i][j] = 10000 * (currentProjection[i][j] / maxPC); // } //} }
//Deserialization constructor public FeatureExtractionPCA(SerializationInfo info, StreamingContext context) : base(info, context) { pca = (PrincipalComponentAnalysis)info.GetValue("pca", typeof(PrincipalComponentAnalysis)); numDimensions = (int?)info.GetValue("numDimensions", typeof(int?)); }
public void ExceptionTest() { double[,] data = { { 1, 2 }, { 5, 2 }, { 2, 2 }, { 4, 2 }, }; var pca = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize); bool thrown = false; try { pca.Compute(); } catch (ArithmeticException ex) { ex.ToString(); thrown = true; } // Default behavior changed: now an exception is not thrown anymore. // Instead, a small constant is added when computing standard deviations. Assert.IsFalse(thrown); var str1 = pca.SingularValues.ToCSharp(); var str2 = pca.ComponentVectors.ToCSharp(); Assert.IsTrue(pca.SingularValues.IsEqual(new double[] { 1.73205080756888, 0 }, 1e-7)); Assert.IsTrue(pca.ComponentVectors.IsEqual(new double[][] { new double[] { 1, 0 }, new double[] { 0, -1 } }, 1e-7)); }
static void Main(string[] args) { Console.SetWindowSize(100, 60); // Read in the Credit Card Fraud dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.10\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "creditcard.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var df = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); Console.WriteLine("* Shape: {0}, {1}\n\n", df.RowCount, df.ColumnCount); string[] featureCols = df.ColumnKeys.Where( x => !x.Equals("Time") && !x.Equals("Class") ).ToArray(); var noFraudData = df.Rows[ df["Class"].Where(x => x.Value == 0.0).Keys ].Columns[featureCols]; double[][] data = BuildJaggedArray( noFraudData.ToArray2D <double>(), noFraudData.RowCount, featureCols.Length ); double[][] wholeData = BuildJaggedArray( df.Columns[featureCols].ToArray2D <double>(), df.RowCount, featureCols.Length ); int[] labels = df.GetColumn <int>("Class").ValuesAll.ToArray(); var pca = new PrincipalComponentAnalysis( PrincipalComponentMethod.Standardize ); pca.Learn(data); double[][] transformed = pca.Transform(wholeData); double[][] first2Components = transformed.Select(x => x.Where((y, i) => i < 2).ToArray()).ToArray(); ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels); double[][] next2Components = transformed.Select( x => x.Where((y, i) => i >= 1 && i <= 2).ToArray() ).ToArray(); ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels); next2Components = transformed.Select( x => x.Where((y, i) => i >= 2 && i <= 3).ToArray() ).ToArray(); ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels); next2Components = transformed.Select( x => x.Where((y, i) => i >= 3 && i <= 4).ToArray() ).ToArray(); ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels); DataSeriesBox.Show( pca.Components.Select((x, i) => (double)i), pca.Components.Select(x => x.CumulativeProportion) ).SetTitle("Explained Variance"); System.IO.File.WriteAllLines( Path.Combine(dataDirPath, "explained-variance.csv"), pca.Components.Select((x, i) => String.Format("{0},{1:0.0000}", i + 1, x.CumulativeProportion)) ); Console.WriteLine("exporting train set..."); System.IO.File.WriteAllLines( Path.Combine(dataDirPath, "pca-features.csv"), transformed.Select((x, i) => String.Format("{0},{1}", String.Join(",", x), labels[i])) ); Console.WriteLine("\n\n\n\n\nDONE!!!"); Console.ReadKey(); }
public void FromCorrelationConstructorTest() { double[] mean = Accord.Statistics.Tools.Mean(data); double[] stdDev = Accord.Statistics.Tools.StandardDeviation(data); double[,] cov = Accord.Statistics.Tools.Correlation(data); var actual = PrincipalComponentAnalysis.FromCorrelationMatrix(mean, stdDev, cov); var expected = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize); // Compute actual.Compute(); expected.Compute(); // Transform double[,] actualTransform = actual.Transform(data); double[,] expectedTransform = expected.Transform(data); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actualTransform, expectedTransform, 0.01)); // Transform double[,] image = actual.Transform(data); double[,] reverse = actual.Revert(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(reverse, data, 0.01)); }
//initialize item void init() { pca = new PrincipalComponentAnalysis(); totalData = Directory.GetDirectories(savedDirectoryName).Length; Console.WriteLine($"Total Data : {totalData}"); }
public void TransformTest1() { PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data); // Compute target.Compute(); // Transform double[][] actual = target.Transform(data.ToArray()); // first inversed.. ? double[][] expected = new double[][] { new double[] { 0.827970186, -0.175115307 }, new double[] { -1.77758033, 0.142857227 }, new double[] { 0.992197494, 0.384374989 }, new double[] { 0.274210416, 0.130417207 }, new double[] { 1.67580142, -0.209498461 }, new double[] { 0.912949103, 0.175282444 }, new double[] { -0.099109437, -0.349824698 }, new double[] { -1.14457216, 0.046417258 }, new double[] { -0.438046137, 0.017764629 }, new double[] { -1.22382056, -0.162675287 }, }; // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actual, expected, 0.01)); }
public void LoadSamples(int percentDataTraining) { var samples = File.ReadAllLines(pathFile) .Select(x => x.Split(' ') .ToList()) .OrderBy(x => Guid.NewGuid()) .ToList(); var position = samples.Count() * percentDataTraining / 100; Console.WriteLine($"position: {position}"); TestSamples = samples.Skip(position).ToList(); File.WriteAllLines("Datas/test.txt", TestSamples .Select(x => x .Aggregate((y, z) => y + z)) .ToArray()); Samples = samples.Take(position).ToList(); File.WriteAllLines("Datas/train.txt", Samples .Select(x => x .Aggregate((y, z) => y + z)) .ToArray()); Labels = samples.Select(x => x.Last()).ToList(); Attributes = Enumerable.Range(0, samples.First().Count() - 1).Select(x => x.ToString()).ToList(); var data = Samples.Select(x => x.Select(y => double.Parse(y)).ToArray()).ToArray(); var principalComponentAnalysis = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.Center, Whiten = true }; var transform = principalComponentAnalysis.Learn(data); var newdata = principalComponentAnalysis.Transform(data); newdata.ToList().ForEach(x => { x.ToList().ForEach(y => Console.Write(y + " ")); Console.WriteLine(); }); principalComponentAnalysis.NumberOfOutputs = 1; newdata = principalComponentAnalysis.Transform(data); newdata.ToList().ForEach(x => { x.ToList().ForEach(y => Console.Write(y + " ")); Console.WriteLine(); }); principalComponentAnalysis.ExplainedVariance = 0.8; newdata = principalComponentAnalysis.Transform(data); newdata.ToList().ForEach(x => { x.ToList().ForEach(y => Console.Write(y + " ")); Console.WriteLine(); }); var q = 1; }
// testHold.WaitOne(5000); //IEndPointClient logger; List <PCAData2D> runPCA(List <NeatGenome> bestGenome, bool firstBehavior = true, int xBins = 0, int yBins = 0) { var totalStopWatch = System.Diagnostics.Stopwatch.StartNew(); // Create new stopwatch var stopwatch = System.Diagnostics.Stopwatch.StartNew(); List <long> uIDs = bestGenome.Select(x => x.GenomeId).ToList(); //make sure we have the right fitness! //TODO: Check multi-objective code to see what value has absolute fitness List <double> absoluteFitness = bestGenome.Select(x => x.RealFitness).ToList(); if (bestGenome.Count == 0) { return(null); } //we know topBody > 0 by above check int componentCount = Math.Min(80, (firstBehavior ? bestGenome[0].Behavior.behaviorList.Count : bestGenome[0].SecondBehavior.behaviorList.Count)); //double componentCount = (double)fn.Json.Args[1]; //create our double array that's going to be condensed double[,] collectedData = new double[bestGenome.Count, componentCount]; int xyIndex = 0; foreach (IGenome genome in bestGenome) { //need to grab the behavior objects from the genome, and enter them as data var behaviorList = (firstBehavior ? genome.Behavior.behaviorList : genome.SecondBehavior.behaviorList); for (var ix = 0; ix < componentCount; ix++) { collectedData[xyIndex, ix] = (double)behaviorList[ix]; } xyIndex++; } try { stopwatch.Stop(); Console.WriteLine("Time before kernel: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); //higher gaussian seemed better at spreading out behavior //might try polynomial of 3rd or 4th degree, constant = 0 by default // IKernel kernel = new Polynomial(3, 0);//new Gaussian(1.9);//new Polynomial((int)numDegree.Value, (double)numConstant.Value); // KernelPrincipalComponentAnalysis kpca = new KernelPrincipalComponentAnalysis(collectedData, kernel, //(PrincipalComponentAnalysis.AnalysisMethod.Correlation)); PrincipalComponentAnalysis kpca = new PrincipalComponentAnalysis(collectedData, (PrincipalComponentAnalysis.AnalysisMethod.Correlation)); try { kpca.Compute(); } catch (Exception e) { Console.WriteLine(e.Message); return(null); } stopwatch.Stop(); Console.WriteLine("Time During PCA: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); double[,] transform = kpca.Transform(collectedData, 2); stopwatch.Stop(); Console.WriteLine("Time During Transform: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); List <PCAData2D> uidAndPoints = binAllPoints(transform, uIDs, absoluteFitness, xBins, yBins); stopwatch.Stop(); Console.WriteLine("Time During Binning: " + stopwatch.ElapsedMilliseconds); //List<PCAData2D> uidAndPoints = new List<PCAData2D>(); //for (int ix = 0; ix < bestGenome.Count; ix++) //{ // uidAndPoints.Add(new PCAData2D() { uid = uIDs[ix], x = mappedResults[ix, 0], y = mappedResults[ix, 1] }); //} totalStopWatch.Stop(); Console.WriteLine("Total Time For PCA: " + totalStopWatch.ElapsedMilliseconds); return(uidAndPoints); } catch (Exception e) { totalStopWatch.Stop(); Console.WriteLine("Total Time For (failed) PCA: " + totalStopWatch.ElapsedMilliseconds); Console.WriteLine("Failed to run PCA"); return(null); } }
static void Main(string[] args) { Console.SetWindowSize(100, 60); // Read in the Cyber Attack dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.9\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "data.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var rawDF = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); // Encode Categorical Variables string[] categoricalVars = { "protocol_type", "service", "flag", "land" }; // Encode Target Variables IDictionary <string, int> targetVarEncoding = new Dictionary <string, int> { { "normal", 0 }, { "dos", 1 }, { "probe", 2 }, { "r2l", 3 }, { "u2r", 4 } }; var featuresDF = Frame.CreateEmpty <int, string>(); foreach (string col in rawDF.ColumnKeys) { if (col.Equals("attack_type")) { continue; } else if (col.Equals("attack_category")) { featuresDF.AddColumn( col, rawDF.GetColumn <string>(col).Select(x => targetVarEncoding[x.Value]) ); } else if (categoricalVars.Contains(col)) { var categoryDF = EncodeOneHot(rawDF.GetColumn <string>(col), col); foreach (string newCol in categoryDF.ColumnKeys) { featuresDF.AddColumn(newCol, categoryDF.GetColumn <int>(newCol)); } } else { featuresDF.AddColumn( col, rawDF[col].Select((x, i) => double.IsNaN(x.Value) ? 0.0 : x.Value) ); } } Console.WriteLine("* Shape: {0}, {1}\n\n", featuresDF.RowCount, featuresDF.ColumnCount); Console.WriteLine("* Exporting feature set..."); featuresDF.SaveCsv(Path.Combine(dataDirPath, "features.csv")); // Build PCA with only normal data var rnd = new Random(); int[] normalIdx = featuresDF["attack_category"] .Where(x => x.Value == 0) .Keys .OrderBy(x => rnd.Next()) .Take(90000).ToArray(); int[] attackIdx = featuresDF["attack_category"] .Where(x => x.Value > 0) .Keys .OrderBy(x => rnd.Next()) .Take(10000).ToArray(); int[] totalIdx = normalIdx.Concat(attackIdx).ToArray(); var normalSet = featuresDF.Rows[normalIdx]; string[] nonZeroValueCols = normalSet.ColumnKeys.Where( x => !x.Equals("attack_category") && normalSet[x].Max() != normalSet[x].Min() ).ToArray(); double[][] normalData = BuildJaggedArray( normalSet.Columns[nonZeroValueCols].ToArray2D <double>(), normalSet.RowCount, nonZeroValueCols.Length ); double[][] wholeData = BuildJaggedArray( featuresDF.Rows[totalIdx].Columns[nonZeroValueCols].ToArray2D <double>(), totalIdx.Length, nonZeroValueCols.Length ); int[] labels = featuresDF .Rows[totalIdx] .GetColumn <int>("attack_category") .ValuesAll.ToArray(); var pca = new PrincipalComponentAnalysis( PrincipalComponentMethod.Standardize ); pca.Learn(normalData); double[][] transformed = pca.Transform(wholeData); double[][] first2Components = transformed.Select( x => x.Where((y, i) => i < 2).ToArray() ).ToArray(); ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels); double[][] next2Components = transformed.Select( x => x.Where((y, i) => i < 3 && i >= 1).ToArray() ).ToArray(); ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels); next2Components = transformed.Select( x => x.Where((y, i) => i < 4 && i >= 2).ToArray() ).ToArray(); ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels); next2Components = transformed.Select( x => x.Where((y, i) => i < 5 && i >= 3).ToArray() ).ToArray(); ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels); next2Components = transformed.Select( x => x.Where((y, i) => i < 6 && i >= 4).ToArray() ).ToArray(); ScatterplotBox.Show("Component #5 vs. Component #6", next2Components, labels); double[] explainedVariance = pca.Components .Select(x => x.CumulativeProportion) .Where(x => x < 1) .ToArray(); DataSeriesBox.Show( explainedVariance.Select((x, i) => (double)i), explainedVariance ).SetTitle("Explained Variance"); System.IO.File.WriteAllLines( Path.Combine(dataDirPath, "explained-variance.csv"), explainedVariance.Select((x, i) => String.Format("{0},{1:0.0000}", i, x)) ); Console.WriteLine("* Exporting pca-transformed feature set..."); System.IO.File.WriteAllLines( Path.Combine( dataDirPath, "pca-transformed-features.csv" ), transformed.Select(x => String.Join(",", x)) ); System.IO.File.WriteAllLines( Path.Combine( dataDirPath, "pca-transformed-labels.csv" ), labels.Select(x => x.ToString()) ); Console.WriteLine("\n\n\n\n\nDONE!!!"); Console.ReadKey(); }
public static Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> > Factorize(List <Room> rooms, int dimensionReduction) { Dictionary <string, double[, ]> matrices = new Dictionary <string, double[, ]>(); int column = 0; int size = 0; foreach (var room in rooms) { foreach (var layer in room.objects) { if (!matrices.ContainsKey(layer.Key)) { size = layer.Value.GetLength(0) * layer.Value.GetLength(1); matrices[layer.Key] = new double[rooms.Count, layer.Value.GetLength(0) * layer.Value.GetLength(1)]; } matrices[layer.Key].FillColumn(column, layer.Value); } column++; } Dictionary <string, double[, ]> Ws = new Dictionary <string, double[, ]>(); Dictionary <string, double[, ]> Hs = new Dictionary <string, double[, ]>(); Dictionary <string, double[, ]> components = new Dictionary <string, double[, ]>(); foreach (var mat in matrices) { PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(mat.Value); pca.Compute(); for (int ii = 0; ii < dimensionReduction; ii++) { pca.ComponentMatrix.rowToMatrix(ii, 12, 10).matToBitmap(0, 0).Save("pca" + mat.Key + ii + ".png"); } components[mat.Key] = pca.ComponentMatrix; for (int jj = 0; jj < rooms.Count; jj++) { rooms[jj].setCoefficients(mat.Key, pca.Result, jj, dimensionReduction); } /* * NMF nmf = new NMF(mat.Value, dimensionReduction, 2000); * Ws[mat.Key] = nmf.LeftNonnegativeFactors; * Hs[mat.Key] = nmf.RightNonnegativeFactors; * for (int ii = 0; ii < rooms.Count; ii++) { * rooms[ii].setCoefficients(mat.Key,nmf.RightNonnegativeFactors, ii); * } * string str = ""; * for (int xx = 0; xx < nmf.RightNonnegativeFactors.GetLength(1); xx++) { * for (int jj = 0; jj < nmf.RightNonnegativeFactors.GetLength(0); jj++) { * str += nmf.RightNonnegativeFactors[jj, xx] + ","; * } * str += "\n"; * } * System.IO.File.WriteAllText(mat.Key + "W.txt", str); * str = ""; * for (int xx = 0; xx < nmf.LeftNonnegativeFactors.GetLength(1); xx++) { * for (int jj = 0; jj < nmf.LeftNonnegativeFactors.GetLength(0); jj++) { * str += nmf.LeftNonnegativeFactors[jj, xx] + ","; * } * str += "\n"; * } * System.IO.File.WriteAllText(mat.Key + "H.txt", str); * for (int ii = 0; ii < nmf.LeftNonnegativeFactors.GetLength(1); ii++) { * double[,] W = nmf.LeftNonnegativeFactors.rowToMatrix(ii, 12, 10); * W.matToBitmap(0, 25).Save(mat.Key + ii + "W.png"); * } * if (mat.Key == "blocks") { * double[,] reconstructed = new double[12, 10]; * for (int ii = 0; ii < rooms[0].coefficients["blocks"].Length; ii++) { * double w = rooms[0].coefficients["blocks"][ii]; * int counter = 0; * for (int xx = 0; xx < 12; xx++) { * for (int yy = 0; yy < 10; yy++) { * reconstructed[xx, yy] += w * Ws["blocks"][counter, ii]; * counter++; * } * } * } * reconstructed.matToBitmap(0, 1).Save("room0Reconstructed.png"); * } * */ } int counter = 0; // double[,] xy = new double[rooms.Count,2]; double[][] clusterData = new double[rooms.Count][]; foreach (var room in rooms) { int compCounter = 0; double[] coeffs = new double[room.coefficients.Count * dimensionReduction]; foreach (var comp in room.coefficients) { foreach (var coef in comp.Value) { coeffs[compCounter] = coef; compCounter++; } } clusterData[counter] = coeffs; Room reconstructed = room.reconstruct(components, 1); reconstructed.toBitmap().Save("room" + counter + "Reconstructed.png"); counter++; } int numberofClusters = 25; KMeans kmeans = new KMeans(numberofClusters); kmeans.Tolerance = 0.5; int[] clusters = kmeans.Compute(clusterData); Dictionary <string, SortedSet <int> > clusteredRooms = new Dictionary <string, SortedSet <int> >(); Dictionary <int, SortedSet <string> > roomClusters = new Dictionary <int, SortedSet <string> >(); Dictionary <string, Dictionary <int, List <Room> > > output = new Dictionary <string, Dictionary <int, List <Room> > >(); int[] clusterCounts = new int[numberofClusters]; for (int ii = 0; ii < rooms.Count; ii++) { rooms[ii].setType(); if (!clusteredRooms.ContainsKey(rooms[ii].roomType)) { output[rooms[ii].roomType] = new Dictionary <int, List <Room> >(); clusteredRooms[rooms[ii].roomType] = new SortedSet <int>(); } if (!output[rooms[ii].roomType].ContainsKey(clusters[ii])) { output[rooms[ii].roomType][clusters[ii]] = new List <Room>(); } if (!roomClusters.ContainsKey(clusters[ii])) { roomClusters[clusters[ii]] = new SortedSet <string>(); } output[rooms[ii].roomType][clusters[ii]].Add(rooms[ii]); roomClusters[clusters[ii]].Add(rooms[ii].roomType); clusterCounts[clusters[ii]]++; clusteredRooms[rooms[ii].roomType].Add(clusters[ii]); // Console.WriteLine(ii + " " + clusters[ii]); } for (int ii = 0; ii < clusterCounts.Length; ii++) { string str = ""; foreach (var roomtype in roomClusters[ii]) { str += roomtype + " "; } // Console.WriteLine("Cluster "+ ii + " = " +clusterCounts[ii] + " : " + str); } foreach (var roomType in clusteredRooms) { string str = ""; foreach (var cluster in roomType.Value) { str += cluster + " "; } // Console.WriteLine(roomType.Key + " " + str); } return(new Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> >(output, components)); }
public void PrincipalComponentAnalysis() { int D = 3; int N = 10; // construct a sample Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(D); for (int i = 0; i < N; i++) { double x = 1.0 * rng.NextDouble() - 1.0; double y = 4.0 * rng.NextDouble() - 2.0; double z = 9.0 * rng.NextDouble() - 3.0; sample.Add(x, y, z); } // get its column means RowVector mu = new RowVector(D); for (int i = 0; i < D; i++) { mu[i] = sample.Column(i).Mean; } // get total variance double tVariance = GetTotalVariance(sample); Console.WriteLine(tVariance); // do a principal component analysis PrincipalComponentAnalysis pca = sample.PrincipalComponentAnalysis(); Assert.IsTrue(pca.Dimension == sample.Dimension); Assert.IsTrue(pca.Count == sample.Count); // check that the PCs behave as expected for (int i = 0; i < pca.Dimension; i++) { PrincipalComponent pc = pca.Component(i); Assert.IsTrue(pc.Index == i); Assert.IsTrue(TestUtilities.IsNearlyEqual(pc.Weight * pc.NormalizedVector(), pc.ScaledVector())); Assert.IsTrue((0.0 <= pc.VarianceFraction) && (pc.VarianceFraction <= 1.0)); if (i == 0) { Assert.IsTrue(pc.VarianceFraction == pc.CumulativeVarianceFraction); } else { PrincipalComponent ppc = pca.Component(i - 1); Assert.IsTrue(pc.VarianceFraction <= ppc.VarianceFraction); Assert.IsTrue(TestUtilities.IsNearlyEqual(ppc.CumulativeVarianceFraction + pc.VarianceFraction, pc.CumulativeVarianceFraction)); } } // express the sample in terms of principal components MultivariateSample csample = pca.TransformedSample(); // check that the explained variances are as claimed for (int rD = 1; rD <= D; rD++) { MultivariateSample rSample = new MultivariateSample(D); foreach (double[] cEntry in csample) { RowVector x = mu.Copy(); for (int i = 0; i < rD; i++) { PrincipalComponent pc = pca.Component(i); x += (cEntry[i] * pc.Weight) * pc.NormalizedVector(); } rSample.Add(x); } double rVariance = GetTotalVariance(rSample); Console.WriteLine("{0} {1}", rD, rVariance); Assert.IsTrue(TestUtilities.IsNearlyEqual(rVariance / tVariance, pca.Component(rD - 1).CumulativeVarianceFraction)); } }
static void Main(string[] args) { // Specify which files to use. var projectDir = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName; var pathFiles = Directory.EnumerateFiles(projectDir + @"\DocumentClusteringExample\Samples").ToList(); // Hyper parameters. // This option prevent overfitting on missing words. var replaceMissingValueWithRandomValue = false; var usePCA = false; var numberOfOutputPCA = 100; var distanceFunction = new PearsonCorrelation(); var strategy = ValueStrategy.Freq; var minVectorElements = 2; var freqMin = 2; var minWordCount = 1; var maxWordCount = 3; var minGroupOfWordsLength = 3; var minWordLength = 1; var firstWordMinLength = 1; var lastWordMinLength = 1; var maxComposition = int.MaxValue; var badWords = File.ReadLines(projectDir + @"\DocumentClusteringExample\stop-words-english.txt") .Where(m => !string.IsNullOrWhiteSpace(m)) .ToArray(); var badPatternList = new string[] { }; // Files -> List of expressions (Our dictionary based on files) var expressions = ExtractExpressionFromTextFiles.ExtractExpressions( pathFiles, new ExtractExpressionFromTextFilesOption { BadPatternList = badPatternList, BadWords = badWords, FirstWordMinLength = firstWordMinLength, LastWordMinLength = lastWordMinLength, MaxExpressionComposition = maxComposition, MaxWordCount = maxWordCount, MinGroupOfWordsLength = minGroupOfWordsLength, MinWordCount = minWordCount, MinWordFrequency = freqMin, MinWordLength = minWordLength }); Console.WriteLine("Expressions: " + expressions.Count); // Files -> Vectors var expressionVectorOption = new TextFileToExpressionVectorOption { MinVectorElements = minVectorElements, BadPatternList = badPatternList, MaxWordCount = maxWordCount, MinWordCount = minWordCount, Strategy = strategy, ReplaceMissingValueWithRandomValue = replaceMissingValueWithRandomValue }; List <Tuple <string, double[]> > filesToVector = new List <Tuple <string, double[]> >(); foreach (var pathFile in pathFiles) { filesToVector.Add( new Tuple <string, double[]>( pathFile, TextFileToExpressionVector.GenerateExpressionVector( expressions, pathFile, expressionVectorOption) ) ); } var vectors = filesToVector .Select(m => m.Item2) .ToList(); Console.WriteLine("vectors count: " + vectors.Count); // Remove non-representative vectors for (int i = 0; i < vectors.Count; i++) { var vector = vectors[i]; if (vector.Sum() < minVectorElements) { vectors.RemoveAt(i); pathFiles.RemoveAt(i); i--; } } Console.WriteLine("vectors count (after removing non-representative vectors): " + vectors.Count); // Reduce the vector size with PCA. if (usePCA) { Console.WriteLine("Reducing vector size with PCA"); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(); pca.NumberOfOutputs = numberOfOutputPCA; var trainingVector = vectors.ToArray(); Shuffle(trainingVector); trainingVector = trainingVector.Take(600).ToArray(); var pcaResult = pca.Learn(trainingVector); var reducedVectorsWithPCA = pcaResult.Transform(vectors.ToArray()); stopwatch.Stop(); Console.WriteLine("PCA duration: " + stopwatch.Elapsed.ToString()); vectors = reducedVectorsWithPCA.ToList(); } // Run HDBSCAN algo. Console.WriteLine("HDBSCAN starting..."); var contraintsList = new List <HdbscanConstraint>(); if (usePCA) { for (int i = 1; i < numberOfOutputPCA; i++) { contraintsList.Add(new HdbscanConstraint(i - 1, i, HdbscanConstraintType.CannotLink)); } } var watch = Stopwatch.StartNew(); var result = HdbscanRunner.Run(new HdbscanParameters { DataSet = vectors.ToArray(), MinPoints = 5, MinClusterSize = 5, DistanceFunction = distanceFunction, Constraints = contraintsList, UseMultipleThread = true }); watch.Stop(); Console.WriteLine("HDBSCAN done " + watch.Elapsed); // Read results. var labels = result.Labels; int n = labels.Max(); Console.WriteLine("\n\n"); int clusterId = 0; for (int iCluster = 1; iCluster <= n; iCluster++) { Dictionary <string, int> categories = new Dictionary <string, int>(); bool anyFound = false; for (int i = 0; i < labels.Length; i++) { if (labels[i] == iCluster) { var fileName = Path.GetFileNameWithoutExtension(pathFiles[i]); var category = fileName.Split('-')[0].Trim(); if (categories.ContainsKey(category)) { var count = categories[category]; categories.Remove(category); categories.Add(category, count + 1); } else { categories.Add(category, 1); } anyFound = true; } } if (anyFound) { clusterId++; Console.WriteLine("Cluster #" + clusterId); Console.WriteLine(); foreach (var category in categories) { Console.WriteLine(category.Key + ": " + category.Value); } Console.ReadLine(); } } Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
public void buildModel(string modelPath) { outmodelpath = modelPath; using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.PCA) { egVec = new double[1, 1]; System.Windows.Forms.MessageBox.Show("Not a PCA Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } inpath = sr.ReadLine(); VariableFieldNames = sr.ReadLine().Split(new char[] { ',' }); corr = new double[VariableFieldNames.Length, VariableFieldNames.Length]; egVec = new double[VariableFieldNames.Length, VariableFieldNames.Length]; n = System.Convert.ToInt32(sr.ReadLine()); meanVector = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); stdVector = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); string[] corrLg = sr.ReadLine().Split(new char[] { ',' }); prop = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); egVal = (from string s in sr.ReadLine().Split(new char[] { ',' }) select System.Convert.ToDouble(s)).ToArray(); string[] egVecLg = sr.ReadLine().Split(new char[] { ',' }); for (int i = 0; i < VariableFieldNames.Length; i++) { for (int j = 0; j < VariableFieldNames.Length; j++) { int indexVl = (i * VariableFieldNames.Length) + j; corr[i, j] = System.Convert.ToDouble(corrLg[indexVl]); egVec[i, j] = System.Convert.ToDouble(egVecLg[indexVl]); } } sr.Close(); } pca = PrincipalComponentAnalysis.FromCorrelationMatrix(meanVector, stdVector, corr); pca.Compute(); }
public void ConstructorTest() { // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis" // using the framework's default method. The tutorial can be found online // at http://www.sccg.sk/~haladova/principal_components.pdf // Step 1. Get some data // --------------------- double[,] data = { { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 }, { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 } }; // Step 2. Subtract the mean // ------------------------- // Note: The framework does this automatically. By default, the framework // uses the "Center" method, which only subtracts the mean. However, it is // also possible to remove the mean *and* divide by the standard deviation // (thus performing the correlation method) by specifying "Standardize" // instead of "Center" as the AnalysisMethod. AnalysisMethod method = AnalysisMethod.Center; // AnalysisMethod.Standardize // Step 3. Compute the covariance matrix // ------------------------------------- // Note: Accord.NET does not need to compute the covariance // matrix in order to compute PCA. The framework uses the SVD // method which is more numerically stable, but may require // more processing or memory. In order to replicate the tutorial // using covariance matrices, please see the next unit test. // Create the analysis using the selected method var pca = new PrincipalComponentAnalysis(data, method); // Compute it pca.Compute(); // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix // ------------------------------------------------------------------------- // Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition // method, the Eigenvalues are computed from the singular values. However, it is // not the Eigenvalues themselves which are important, but rather their proportion: // Those are the expected eigenvalues, in descending order: double[] eigenvalues = { 1.28402771, 0.0490833989 }; // And this will be their proportion: double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); // Those are the expected eigenvectors, // in descending order of eigenvalues: double[,] eigenvectors = { { -0.677873399, -0.735178656 }, { -0.735178656, 0.677873399 } }; // Now, here is the place most users get confused. The fact is that // the Eigenvalue decomposition (EVD) is not unique, and both the SVD // and EVD routines used by the framework produces results which are // numerically different from packages such as STATA or MATLAB, but // those are correct. // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with // a being a scalar) will also be an eigenvector. In the Lindsay case, the // framework produces a first eigenvector with inverted signs. This is the same // as considering a=-1 and taking a*v. The result is still correct. // Retrieve the first expected eigenvector double[] v = eigenvectors.GetColumn(0); // Multiply by a scalar and store it back eigenvectors.SetColumn(0, v.Multiply(-1)); // Everything is alright (up to the 9 decimal places shown in the tutorial) Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-5)); // Step 5. Deriving the new data set // --------------------------------- double[,] actual = pca.Transform(data); // transformedData shown in pg. 18 double[,] expected = new double[, ] { { 0.827970186, -0.175115307 }, { -1.77758033, 0.142857227 }, { 0.992197494, 0.384374989 }, { 0.274210416, 0.130417207 }, { 1.67580142, -0.209498461 }, { 0.912949103, 0.175282444 }, { -0.099109437, -0.349824698 }, { -1.14457216, 0.046417258 }, { -0.438046137, 0.017764629 }, { -1.22382056, -0.162675287 }, }; // Everything is correct (up to 8 decimal places) Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8)); }
private void buildModel() { if (varCov == null) getCov(); pca = PrincipalComponentAnalysis.FromCorrelationMatrix(MeanVector, StdVector, CorralationMatrix); pca.Compute(); egVec = pca.ComponentMatrix; prop = pca.ComponentProportions; egVal = pca.Eigenvalues; //Console.WriteLine("PCA method = " + pca.Method.ToString()); }
public void transform_more_columns_than_samples() { // Lindsay's tutorial data double[,] datat = data.Transpose(); var target = new PrincipalComponentAnalysis(datat); // Compute target.Compute(); // Transform double[,] actual = target.Transform(datat); // Assert the scores equals the transformation of the input double[,] result = target.Result; Assert.IsTrue(Matrix.IsEqual(result, actual, 0.01)); Assert.AreEqual(2, result.Rows()); Assert.AreEqual(2, result.Columns()); Assert.IsTrue(result.IsSquare()); }
public void learn_weights() { double[][] raw = { new[] { 2.5, 2.4, 1 }, new[] { 0.5, 0.7, 1 }, new[] { 2.2, 2.9, 0.5 }, new[] { 2.2, 2.9, 0.5 }, new[] { 1.9, 2.2, 1 }, new[] { 3.1, 3.0, 1 }, new[] { 2.3, 2.7, 1 }, new[] { 2.0, 1.6, 1 }, new[] { 1.0, 1.1, 0.25 }, new[] { 1.0, 1.1, 0.25 }, new[] { 1.0, 1.1, 0.25 }, new[] { 1.0, 1.1, 0.25 }, new[] { 1.5, 1.6, 1 }, new[] { 42.5, 7.6, 0 }, new[] { 743.5, 5.6, 0 }, new[] { 1.5, 16, 0 }, new[] { 1.1, 0.9, 1 } }; double[][] data = raw.GetColumns(0, 1); double[] weights = raw.GetColumn(2); var method = PrincipalComponentMethod.Center; var pca = new PrincipalComponentAnalysis(method); pca.Learn(data, weights); double[] eigenvalues = { 1.28402771, 0.0490833989 }; double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); double[,] eigenvectors = { { -0.677873399, -0.735178656 }, { -0.735178656, 0.677873399 } }; double[] v = eigenvectors.GetColumn(0); eigenvectors.SetColumn(0, v.Multiply(-1)); Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 0.1)); double[][] actual = pca.Transform(data); string a = actual.ToCSharp(); /* * double[,] expected = new double[,] * { * { 0.827970186, -0.175115307 }, * { -1.77758033, 0.142857227 }, * { 0.992197494, 0.384374989 }, * { 0.274210416, 0.130417207 }, * { 1.67580142, -0.209498461 }, * { 0.912949103, 0.175282444 }, * { -0.099109437, -0.349824698 }, * { -1.14457216, 0.046417258 }, * { -0.438046137, 0.017764629 }, * { -1.22382056, -0.162675287 }, * }; */ double[][] expected = { new double[] { 0.827970186201088, -0.175115307046916 }, new double[] { -1.77758032528043, 0.142857226544281 }, new double[] { 0.992197494414889, 0.384374988880413 }, // weight is 0.5 new double[] { 0.992197494414889, 0.384374988880413 }, // weight is 0.5 new double[] { 0.2742104159754, 0.130417206574127 }, new double[] { 1.67580141864454, -0.209498461256753 }, new double[] { 0.912949103158809, 0.17528244362037 }, new double[] { -0.0991094374984439, -0.349824698097121 }, new double[] { -1.14457216379866, 0.0464172581832816 }, // weight is 0.25 new double[] { -1.14457216379866, 0.0464172581832816 }, // weight is 0.25 new double[] { -1.14457216379866, 0.0464172581832816 }, // weight is 0.25 new double[] { -1.14457216379866, 0.0464172581832816 }, // weight is 0.25 new double[] { -0.43804613676245, 0.0177646296750834 }, new double[] { 31.7658351361525, -26.0573198564776 }, // weight is 0 new double[] { 505.4847301932, -542.773304190164 }, // weight is 0 new double[] { 10.148526503077, 9.77914156847845 }, // weight is 0 new double[] { -1.22382055505474, -0.162675287076762 } }; Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8)); }
public void correlation_new_interface() { double[] mean = Measures.Mean(data, dimension: 0); double[] stdDev = Measures.StandardDeviation(data); double[][] cov = Measures.Correlation(data.ToJagged()); var actual = PrincipalComponentAnalysis.FromCorrelationMatrix(mean, stdDev, cov.ToMatrix()); var expected = new PrincipalComponentAnalysis(PrincipalComponentMethod.CorrelationMatrix) { Means = mean, StandardDeviations = stdDev }; // Compute actual.Compute(); var transform = expected.Learn(cov); // Transform double[,] actualTransform = actual.Transform(data); double[,] expectedTransform = expected.Transform(data); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actualTransform, expectedTransform, 0.01)); // Transform double[,] image = actual.Transform(data); double[,] reverse = actual.Revert(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(reverse, data, 1e-6)); // Transform double[][] image2 = transform.Transform(data.ToJagged()); double[][] reverse2 = transform.Inverse().Transform(image2); Assert.IsTrue(Matrix.IsEqual(reverse, reverse2, 1e-6)); Assert.IsTrue(Matrix.IsEqual(reverse2, data, 1e-6)); // Transform double[][] reverse3 = actual.Revert(image2); Assert.IsTrue(Matrix.IsEqual(reverse, reverse3, 1e-6)); Assert.IsTrue(Matrix.IsEqual(reverse3, data, 1e-6)); var a = transform.Transform(data.ToJagged()).ToMatrix(); Assert.IsTrue(Matrix.IsEqual(a, expectedTransform, 0.01)); }
public void Revert_new_method() { var target = new PrincipalComponentAnalysis(); // Compute var transform = target.Learn(data.ToJagged()); // Transform double[][] image = target.Transform(data.ToJagged()); // Reverse double[][] actual = target.Revert(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actual, data, 0.01)); // Reverse double[][] actual2 = transform.Inverse().Transform(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actual2, data, 0.01)); Assert.IsTrue(Matrix.IsEqual(actual2, actual, 1e-5)); }
public void learn_standardize() { double[][] data = { new double[] { 2.5, 2.4 }, new double[] { 0.5, 0.7 }, new double[] { 2.2, 2.9 }, new double[] { 1.9, 2.2 }, new double[] { 3.1, 3.0 }, new double[] { 2.3, 2.7 }, new double[] { 2.0, 1.6 }, new double[] { 1.0, 1.1 }, new double[] { 1.5, 1.6 }, new double[] { 1.1, 0.9 } }; var pca = new PrincipalComponentAnalysis() { Method = PrincipalComponentMethod.Standardize, Whiten = false }; MultivariateLinearRegression transform = pca.Learn(data); double[][] output1 = pca.Transform(data); double[] eigenvalues = { 1.925929272692245, 0.074070727307754519 }; double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); double[,] eigenvectors = { { 0.70710678118654791, -0.70710678118654791 }, { 0.70710678118654791, 0.70710678118654791 } }; Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, rtol: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, rtol: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, rtol: 1e-5)); pca.ExplainedVariance = 1.0; double[][] actual = pca.Transform(data); // var str = actual.ToCSharp(); double[][] expected = { new double[] { 1.03068028963519, -0.212053139513466 }, new double[] { -2.19045015647317, 0.168942295968493 }, new double[] { 1.17818776184333, 0.47577321493322 }, new double[] { 0.323294642065681, 0.161198977394117 }, new double[] { 2.07219946786664, -0.251171725759119 }, new double[] { 1.10117414355213, 0.218653302562498 }, new double[] { -0.0878525068874546, -0.430054465638535 }, new double[] { -1.40605089061245, 0.0528100914316325 }, new double[] { -0.538118242086245, 0.0202112695602547 }, new double[] { -1.48306450890365, -0.204309820939091 } }; Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8)); Assert.IsTrue(expected.IsEqual(output1, atol: 1e-8)); actual = transform.Transform(data); Assert.IsTrue(expected.IsEqual(actual, atol: 1e-8)); }
/// <summary> /// Launched when the user clicks the "Run analysis" button. /// </summary> /// private void btnCompute_Click(object sender, EventArgs e) { // Save any pending changes dgvAnalysisSource.EndEdit(); if (dgvAnalysisSource.DataSource == null) { MessageBox.Show("Please load some data using File > Open!"); return; } // Create a matrix from the source data table double[][] sourceMatrix = (dgvAnalysisSource.DataSource as DataTable).ToArray(out columnNames); // Create and compute a new Simple Descriptive Analysis sda = new DescriptiveAnalysis(columnNames).Learn(sourceMatrix); // Show the descriptive analysis on the screen dgvDistributionMeasures.DataSource = sda.Measures; // Populates statistics overview tab with analysis data dgvStatisticCenter.DataSource = new ArrayDataView(sda.DeviationScores, columnNames); dgvStatisticStandard.DataSource = new ArrayDataView(sda.StandardScores, columnNames); dgvStatisticCovariance.DataSource = new ArrayDataView(sda.CovarianceMatrix, columnNames); dgvStatisticCorrelation.DataSource = new ArrayDataView(sda.CorrelationMatrix, columnNames); var method = (PrincipalComponentMethod)cbMethod.SelectedValue; // Create the Principal Component Analysis of the data pca = new PrincipalComponentAnalysis(method); pca.Learn(sourceMatrix); // Finally, compute the analysis! // Populate components overview with analysis data dgvFeatureVectors.DataSource = new ArrayDataView(pca.ComponentVectors); dgvPrincipalComponents.DataSource = pca.Components; dgvProjectionComponents.DataSource = pca.Components; distributionView.DataSource = pca.Components; cumulativeView.DataSource = pca.Components; numComponents.Maximum = pca.Components.Count; numComponents.Value = 1; numThreshold.Value = (decimal)pca.Components[0].CumulativeProportion * 100; }
public void ComputeRanking(List <ICoordinate> points, bool[] pointLabels, List <string> identities = null, PrincipalComponentAnalysis pca = null) { var mapping = new Dictionary <ICoordinate, Tuple <int, bool, double, string> >(); //original idx, 1/0 label, distance, string name (for debugging) ICoordinate remappedCenter; if (pca != null) { var reverted = pca.Revert(new[] { new[] { CenterOfMass.X, CenterOfMass.Y } }); remappedCenter = new Coordinate3D(reverted[0][0], reverted[0][1], reverted[0][2]); } else { remappedCenter = CenterOfMass; } for (var i = 0; i < points.Count; i++) { mapping.Add(points[i], new Tuple <int, bool, double, string>(i, pointLabels[i], points[i].EuclideanDistance(remappedCenter), identities != null ? identities[i] : "")); } var rankedMap = mapping.OrderBy(pt => pt.Value.Item3).ToList(); PointRanks = rankedMap .Select((pt, idx) => new { id = pt.Value.Item1, rank = idx }) .OrderBy(t => t.id).Select(t => t.rank) .ToArray(); var namedLabelVector = rankedMap.Select(pt => pt.Value.Item4).ToArray(); InducedLabledVector = rankedMap.Select(pt => pt.Value.Item2).ToArray(); }
public void TransformTest3() { PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data); // Compute target.Compute(); bool thrown = false; try { double[,] actual = target.Transform(data, 3); } catch { thrown = true; } Assert.IsTrue(thrown); }
// testHold.WaitOne(5000); //IEndPointClient logger; List<PCAData2D> runPCA(List<NeatGenome> bestGenome, bool firstBehavior = true, int xBins =0 , int yBins =0) { var totalStopWatch = System.Diagnostics.Stopwatch.StartNew(); // Create new stopwatch var stopwatch = System.Diagnostics.Stopwatch.StartNew(); List<long> uIDs = bestGenome.Select(x => x.GenomeId).ToList(); //make sure we have the right fitness! //TODO: Check multi-objective code to see what value has absolute fitness List<double> absoluteFitness = bestGenome.Select(x => x.RealFitness).ToList(); if(bestGenome.Count == 0) return null; //we know topBody > 0 by above check int componentCount = Math.Min(80, (firstBehavior ? bestGenome[0].Behavior.behaviorList.Count : bestGenome[0].SecondBehavior.behaviorList.Count)); //double componentCount = (double)fn.Json.Args[1]; //create our double array that's going to be condensed double[,] collectedData = new double[bestGenome.Count, componentCount]; int xyIndex = 0; foreach (IGenome genome in bestGenome) { //need to grab the behavior objects from the genome, and enter them as data var behaviorList = (firstBehavior ? genome.Behavior.behaviorList : genome.SecondBehavior.behaviorList); for (var ix = 0; ix < componentCount; ix++) { collectedData[xyIndex, ix] = (double)behaviorList[ix]; } xyIndex++; } try { stopwatch.Stop(); Console.WriteLine("Time before kernel: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); //higher gaussian seemed better at spreading out behavior //might try polynomial of 3rd or 4th degree, constant = 0 by default // IKernel kernel = new Polynomial(3, 0);//new Gaussian(1.9);//new Polynomial((int)numDegree.Value, (double)numConstant.Value); // KernelPrincipalComponentAnalysis kpca = new KernelPrincipalComponentAnalysis(collectedData, kernel, //(PrincipalComponentAnalysis.AnalysisMethod.Correlation)); PrincipalComponentAnalysis kpca = new PrincipalComponentAnalysis(collectedData, (PrincipalComponentAnalysis.AnalysisMethod.Correlation)); try { kpca.Compute(); } catch (Exception e) { Console.WriteLine(e.Message); return null; } stopwatch.Stop(); Console.WriteLine("Time During PCA: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); double[,] transform = kpca.Transform(collectedData, 2); stopwatch.Stop(); Console.WriteLine("Time During Transform: " + stopwatch.ElapsedMilliseconds); stopwatch = System.Diagnostics.Stopwatch.StartNew(); List<PCAData2D> uidAndPoints = binAllPoints(transform, uIDs, absoluteFitness, xBins, yBins); stopwatch.Stop(); Console.WriteLine("Time During Binning: " + stopwatch.ElapsedMilliseconds); //List<PCAData2D> uidAndPoints = new List<PCAData2D>(); //for (int ix = 0; ix < bestGenome.Count; ix++) //{ // uidAndPoints.Add(new PCAData2D() { uid = uIDs[ix], x = mappedResults[ix, 0], y = mappedResults[ix, 1] }); //} totalStopWatch.Stop(); Console.WriteLine("Total Time For PCA: " + totalStopWatch.ElapsedMilliseconds); return uidAndPoints; } catch (Exception e) { totalStopWatch.Stop(); Console.WriteLine("Total Time For (failed) PCA: " + totalStopWatch.ElapsedMilliseconds); Console.WriteLine("Failed to run PCA"); return null; } }
public void Revert() { PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data); // Compute target.Compute(); // Transform double[,] image = target.Transform(data); // Reverse double[,] actual = target.Revert(image); // Verify both are equal with 0.01 tolerance value Assert.IsTrue(Matrix.IsEqual(actual, data, 0.01)); }
private void btnRunAnalysis_Click(object sender, EventArgs e) { if (dgvAnalysisSource.DataSource == null) { MessageBox.Show("Please load some data first."); return; } // Finishes and save any pending changes to the given data dgvAnalysisSource.EndEdit(); // Creates a matrix from the source data table double[,] sourceMatrix = (dgvAnalysisSource.DataSource as DataTable).ToMatrix(out sourceColumnNames); // Creates the Simple Descriptive Analysis of the given source sda = new DescriptiveAnalysis(sourceMatrix, sourceColumnNames); sda.Compute(); // Populates statistics overview tab with analysis data dgvStatisticCenter.DataSource = new ArrayDataView(sda.DeviationScores, sourceColumnNames); dgvStatisticStandard.DataSource = new ArrayDataView(sda.StandardScores, sourceColumnNames); dgvStatisticCovariance.DataSource = new ArrayDataView(sda.CovarianceMatrix, sourceColumnNames); dgvStatisticCorrelation.DataSource = new ArrayDataView(sda.CorrelationMatrix, sourceColumnNames); dgvDistributionMeasures.DataSource = sda.Measures; // Creates the Principal Component Analysis of the given source pca = new PrincipalComponentAnalysis(sda.Source, (AnalysisMethod)cbMethod.SelectedValue); // Compute the Principal Component Analysis pca.Compute(); // Populates components overview with analysis data dgvFeatureVectors.DataSource = new ArrayDataView(pca.ComponentMatrix); dgvPrincipalComponents.DataSource = pca.Components; dgvProjectionComponents.DataSource = pca.Components; numComponents.Maximum = pca.Components.Count; numComponents.Value = 1; numThreshold.Value = (decimal)pca.Components[0].CumulativeProportion * 100; CreateComponentCumulativeDistributionGraph(graphCurve); CreateComponentDistributionGraph(graphShare); }
public void adjustTest() { double[,] data = (double[,])PrincipalComponentAnalysisTest.data.Clone(); PrincipalComponentAnalysis target = new PrincipalComponentAnalysis(data, AnalysisMethod.Standardize); double[,] expected = { { 0.87874523495823, 0.578856809114491 }, { -1.66834240260186, -1.42942191638476 }, { 0.496682089324217, 1.16952702249663 }, { 0.114618943690204, 0.342588723761638 }, { 1.64287152622626, 1.28766106517305 }, { 0.624036471202221, 0.933258937143772 }, { 0.241973325568208, -0.366215532296923 }, { -1.03157049321184, -0.956885745679056 }, { -0.394798583821814, -0.366215532296923 }, { -0.904216111333831, -1.19315383103191 } }; double[,] actual = target.Adjust(data, false); Assert.IsTrue(expected.IsEqual(actual, 0.00001)); Assert.AreNotEqual(data, actual); actual = target.Adjust(data, true); Assert.IsTrue(expected.IsEqual(actual, 0.00001)); Assert.AreEqual(data, actual); }
public static Output Whitening(double[,] matrix) { if (matrix == null) { throw new ArgumentNullException(nameof(matrix)); } // Step 1: convert matrix to a jagged array double[][] jaggedArray = matrix.ToJagged(); // Step 2: do PCA whitening var pca = new PrincipalComponentAnalysis() { // the "Center" method only subtracts the mean. Method = PrincipalComponentMethod.Center, Whiten = true, }; pca.Learn(jaggedArray); pca.Transform(jaggedArray); pca.ExplainedVariance = 0.95; double[][] transformedData = pca.Transform(jaggedArray); double[,] projectedData = transformedData.ToMatrix(); double[,] eigenVectors = pca.ComponentVectors.ToMatrix(); int components = pca.Components.Count; // double[] eigneValues = pca.Eigenvalues; //sorted // int rows = projectedData.GetLength(0); int columns = projectedData.GetLength(1); //this is actually the number of output vectors before reversion // Step 3: revert a set of projected data into its original space // the output of the "Revert(Double[][])" method in Accord did not make sense. // however, we use its API to do so. double[,] reversion = Revert(projectedData, eigenVectors, components); // Build Projection Matrix // To do so, we need eigenVectors, and the number of columns of the projected data double[,] projectionMatrix = GetProjectionMatrix(eigenVectors, columns); // write the projection matrix to disk /* * // FIRST STEP: sort the eigenvectors based on the eigenvalue * var eigPairs = new List<Tuple<double, double[]>>(); * * for (int i = 0; i < eigneValues.GetLength(0); i++) * { * eigPairs.Add(Tuple.Create(Math.Abs(eigneValues[i]), GetColumn(eigenVectors, i))); * } * * // sort in descending order based on the eigenvalues * eigPairs.Sort((x, y) => y.Item1.CompareTo(x.Item1)); */ var output = new Output() { ProjectionMatrix = projectionMatrix, Reversion = reversion, EigenVectors = eigenVectors, Components = components, }; return(output); }
public void ConstructorTest() { // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis" // using the framework's default method. The tutorial can be found online // at http://www.sccg.sk/~haladova/principal_components.pdf // Step 1. Get some data // --------------------- double[,] data = { { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 }, { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 } }; // Step 2. Subtract the mean // ------------------------- // Note: The framework does this automatically. By default, the framework // uses the "Center" method, which only subtracts the mean. However, it is // also possible to remove the mean *and* divide by the standard deviation // (thus performing the correlation method) by specifying "Standardize" // instead of "Center" as the AnalysisMethod. AnalysisMethod method = AnalysisMethod.Center; // AnalysisMethod.Standardize // Step 3. Compute the covariance matrix // ------------------------------------- // Note: Accord.NET does not need to compute the covariance // matrix in order to compute PCA. The framework uses the SVD // method which is more numerically stable, but may require // more processing or memory. In order to replicate the tutorial // using covariance matrices, please see the next unit test. // Create the analysis using the selected method var pca = new PrincipalComponentAnalysis(data, method); // Compute it pca.Compute(); // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix // ------------------------------------------------------------------------- // Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition // method, the Eigenvalues are computed from the singular values. However, it is // not the Eigenvalues themselves which are important, but rather their proportion: // Those are the expected eigenvalues, in descending order: double[] eigenvalues = { 1.28402771, 0.0490833989 }; // And this will be their proportion: double[] proportion = eigenvalues.Divide(eigenvalues.Sum()); // Those are the expected eigenvectors, // in descending order of eigenvalues: double[,] eigenvectors = { { -0.677873399, -0.735178656 }, { -0.735178656, 0.677873399 } }; // Now, here is the place most users get confused. The fact is that // the Eigenvalue decomposition (EVD) is not unique, and both the SVD // and EVD routines used by the framework produces results which are // numerically different from packages such as STATA or MATLAB, but // those are correct. // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with // a being a scalar) will also be an eigenvector. In the Lindsay case, the // framework produces a first eigenvector with inverted signs. This is the same // as considering a=-1 and taking a*v. The result is still correct. // Retrieve the first expected eigenvector double[] v = eigenvectors.GetColumn(0); // Multiply by a scalar and store it back eigenvectors.SetColumn(0, v.Multiply(-1)); // Everything is alright (up to the 9 decimal places shown in the tutorial) Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9)); Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9)); Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-5)); // Step 5. Deriving the new data set // --------------------------------- double[,] actual = pca.Transform(data); // transformedData shown in pg. 18 double[,] expected = new double[,] { { 0.827970186, -0.175115307 }, { -1.77758033, 0.142857227 }, { 0.992197494, 0.384374989 }, { 0.274210416, 0.130417207 }, { 1.67580142, -0.209498461 }, { 0.912949103, 0.175282444 }, { -0.099109437, -0.349824698 }, { -1.14457216, 0.046417258 }, { -0.438046137, 0.017764629 }, { -1.22382056, -0.162675287 }, }; // Everything is correct (up to 8 decimal places) Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8)); }
private double[][] getProjectedSequence(double[][] sequence, PrincipalComponentAnalysis pca) { if (pca == null) return sequence; int numComponents = pca.GetNumberOfComponents(1.0f); double[,] data = jaggedToMulti(sequence); string fn = System.IO.Path.GetRandomFileName(); using (StreamWriter sr = new StreamWriter("Z:/WindowsFolders/Desktop/" + fn)) { for (int i = 0; i < data.GetLength(0); i++) { for (int j = 0; j < data.GetLength(1); j++) { sr.Write(data[i, j] + " "); } sr.WriteLine(); } } double[,] projectedData = pca.Transform(data, numComponents); double[][] projTrainSeq = multiToJagged(projectedData); return projTrainSeq; }