/// <summary>
 /// Matrix factorization constructor for eigenvalue decomposition.
 /// </summary>
 /// <param name="factorizationType">Factorization type</param>
 /// <param name="rank">Rank</param>
 /// <param name="determinant">Determinant</param>
 /// <param name="eigenvalues">Eigenvalues</param>
 /// <param name="eigenvectors">Eigenvectors</param>
 /// <param name="D">Diagonal eigenvalues matrix</param>
 public MatrixFactorization(string factorizationType, int rank, double determinant, 
     InsightVector eigenvalues, InsightMatrix eigenvectors, InsightMatrix D)
 {
     this.FactorizationType = factorizationType;
     this.Rank = rank;
     this.Determinant = determinant;
     this.Eigenvalues = eigenvalues;
     this.Eigenvectors = eigenvectors;
     this.EigenvaluesDiagonal = D;
 }
 /// <summary>
 /// Matrix factorization constructor for singular value decomposition.
 /// </summary>
 /// <param name="factorizationType">Factorization type</param>
 /// <param name="rank">Rank</param>
 /// <param name="l2Norm">L2 norm</param>
 /// <param name="S">Singular values</param>
 /// <param name="U">Left singular vectors</param>
 /// <param name="VT">Right singular vectors</param>
 /// <param name="W">Diagonal singular values matrix</param>
 public MatrixFactorization(string factorizationType, int rank, double l2Norm, 
     InsightVector S, InsightMatrix U, InsightMatrix VT, InsightMatrix W)
 {
     this.FactorizationType = factorizationType;
     this.Rank = rank;
     this.L2Norm = l2Norm;
     this.SingularValues = S;
     this.LeftSingularVectors = U;
     this.RightSingularVectors = VT;
     this.SingularValuesDiagonal = W;
 }
        /// <summary>
        /// Performs principal component analysis on the input data set.  Extra parameters 
        /// are used to specify the critera or methodology used to limit the number of features 
        /// in the transformed data set.  Only one extra parameter must be specified.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="featureLimit">Maximum number of features in the new data set</param>
        /// <param name="percentThreshold">Specifies the percent of the concept variance to use
        /// in limiting the number of features selected for the new data set (range 0-1)</param>
        /// <returns>Transformed matrix with reduced number of dimensions</returns>
        private InsightMatrix PerformPCA(InsightMatrix matrix, int? featureLimit, double? percentThreshold)
        {
            // Center each feature and calculate the covariance matrix
            InsightMatrix covariance = matrix.Center().CovarianceMatrix(true);

            // Perform eigenvalue decomposition on the covariance matrix
            MatrixFactorization evd = covariance.EigenvalueDecomposition();
            int rank = evd.Eigenvalues.Where(x => x > 0.001).Count();

            // Determine the number of features to keep for the final data set
            if (featureLimit != null)
            {
                // Enforce a raw numeric feature limit
                if (rank > featureLimit)
                    rank = featureLimit.Value;
            }
            else if (percentThreshold != null)
            {
                // Limit to a percent of the variance in the data set
                // (represented by the sum of the eigenvalues)
                double totalVariance = evd.Eigenvalues.Sum() * percentThreshold.Value;
                double accumulatedVariance = 0;
                rank = 0;
                while (accumulatedVariance < totalVariance)
                {
                    accumulatedVariance += evd.Eigenvalues[rank];
                    rank++;
                }
            }

            // Extract the principal components (in order by eigenvalue size)
            InsightMatrix featureVectors = new InsightMatrix(evd.Eigenvalues.Count, rank);
            for (int i = 0; i < rank; i++)
            {
                // Find the largest remaining eigenvalue
                int index = evd.Eigenvalues.MaxIndex();
                featureVectors.SetColumn(i, evd.Eigenvectors.Column(index));

                // Set this position to zero so the next iteration captures the
                // next-largest eigenvalue
                evd.Eigenvalues[index] = 0;
            }

            // Calculate and return the reduced data set
            InsightMatrix result = (featureVectors.Transpose() * matrix.Transpose()).Transpose();

            return result;
        }
 /// <summary>
 /// Extracts the most important features from a data set using PCA.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <param name="featureLimit">Maximum number of features in the new data set</param>
 /// <returns>Transformed matrix with reduced number of dimensions</returns>
 public InsightMatrix ExtractFeatures(InsightMatrix matrix, int featureLimit)
 {
     return PerformPCA(matrix, featureLimit, null);
 }
        /// <summary>
        /// Performs linear regression on the input data.
        /// </summary>
        /// <param name="data">Training data</param>
        /// <param name="alpha">The learning rate for the algorithm</param>
        /// <param name="lambda">The regularization weight for the algorithm</param>
        /// <param name="iters">The number of training iterations to run</param>
        /// <returns>Tuple containing the parameter and error vectors</returns>
        private Tuple<InsightVector, InsightVector> PerformLinearRegression(InsightMatrix data, double alpha,
            double lambda, int iters)
        {
            // First add a ones column for the intercept term
            data = data.InsertColumn(0, 1);

            // Split the data into training data and the target variable
            var X = data.RemoveColumn(data.ColumnCount - 1);
            var y = data.Column(data.ColumnCount - 1);

            // Initialize several variables needed for the computation
            var theta = new InsightVector(X.ColumnCount);
            var temp = new InsightVector(X.ColumnCount);
            var error = new InsightVector(iters);

            // Perform gradient descent on the parameters theta
            for (int i = 0; i < iters; i++)
            {
                var delta = (X * theta.ToColumnMatrix()) - y.ToColumnMatrix();

                for (int j = 0; j < theta.Count; j++)
                {
                    var inner = delta.Multiply(X.SubMatrix(0, X.RowCount, j, 1));

                    if (j == 0)
                    {
                        temp[j] = theta[j] - ((alpha / X.RowCount) * inner.Column(0).Sum());
                    }
                    else
                    {
                        var reg = (2 * lambda) * theta[j];
                        temp[j] = theta[j] - ((alpha / X.RowCount) * inner.Column(0).Sum()) + reg;
                    }
                }

                theta = temp.Clone();
                error[i] = ComputeError(X, y, theta, lambda);
            }

            return new Tuple<InsightVector, InsightVector>(theta, error);
        }
        /// <summary>
        /// Trains the model using the supplied data.
        /// </summary>
        /// <param name="data">Training data</param>
        /// <param name="alpha">The learning rate for the algorithm</param>
        /// <param name="lambda">The regularization weight for the algorithm</param>
        /// <param name="iters">The number of training iterations to run</param>
        public void Train(InsightMatrix data, double? alpha, double? lambda, int? iters)
        {
            if (alpha != null) Alpha = alpha.Value;
            if (lambda != null) Lambda = lambda.Value;
            if (iters != null) Iterations = iters.Value;

            Train(data);
        }
 /// <summary>
 /// Predicts the target for a new batch of instances of the data using the algorithm's trained model.
 /// </summary>
 /// <param name="instances">New instances</param>
 /// <returns>Predictions</returns>
 public List<double> Predict(InsightMatrix instances)
 {
     instances = instances.InsertColumn(0, 1);
     return (instances * Theta.ToColumnMatrix()).Column(0).ToList();
 }
        /// <summary>
        /// Performs the meta K-Means clustering algorithm on the data set using the provided parameters.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="similarityMethod">Similarity measure used to compare instances</param>
        /// <param name="distanceMethod">Distance measure used to compare instances</param>
        /// <param name="clusters">Number of desired clusters</param>
        /// <returns>Result set that includes cluster centroids, cluster assignments, and total distortion</returns>
        private IClusteringResults PerformMetaKMeansClustering(InsightMatrix matrix, 
            DistanceMethod? distanceMethod, int? clusters, int? iterations)
        {
            if (distanceMethod == null)
            {
                // Default to sum of squared error (equivalent to Euclidean distance)
                distanceMethod = DistanceMethod.EuclideanDistance;
            }

            if (clusters == null)
            {
                // Need to add some type of intelligent way to figure out a good number
                // of clusters to use based on an analysis of the data
                clusters = 3;
            }

            if (iterations == null)
            {
                // Default to 10 iterations
                iterations = 10;
            }

            // Use the first iteration to initialize the results
            var bestResults = new KMeansClustering().Cluster(matrix, distanceMethod.Value, clusters.Value);

            for (int i = 1; i < iterations; i++)
            {
                var results = new KMeansClustering().Cluster(matrix, distanceMethod.Value, clusters.Value);

                if (results.Distortion < bestResults.Distortion)
                {
                    bestResults = results;
                }
            }

            return bestResults;
        }
Esempio n. 9
0
 /// <summary>
 /// Element-wise divides the original matrix by the provided matrix.
 /// </summary>
 /// <param name="matrix">2nd matrix</param>
 /// <returns>Result matrix</returns>
 public InsightMatrix Divide(InsightMatrix matrix)
 {
     return new InsightMatrix(this.Data.PointwiseDivide(matrix.Data));
 }
Esempio n. 10
0
        /// <summary>
        /// Calculates the covariance matrix.
        /// </summary>
        /// <param name="isCentered">Indicates if the data set is already centered</param>
        /// <returns>Covariance matrix</returns>
        public InsightMatrix CovarianceMatrix(bool isCentered = false)
        {
            int columns = this.Data.ColumnCount, rows = this.Data.RowCount;
            InsightMatrix cov = new InsightMatrix(columns);

            for (int i = 0; i < columns; i++)
            {
                for (int j = 0; j < columns; j++)
                {
                    if (isCentered)
                    {
                        // Since each dimension is already centered, the numerator
                        // is simply the dot product of the 2 vectors
                        cov.Data[i, j] = (this.Data.Column(i) * this.Data.Column(j)) / (rows - 1);
                    }
                    else
                    {
                        double covariance = 0;
                        double imean = this.Data.Column(i).Mean(), jmean = this.Data.Column(j).Mean();
                        for (int k = 0; k < rows; k++)
                        {
                            covariance += (this.Data[k, i] - imean) * (this.Data[k, j] - jmean);
                        }

                        cov.Data[i, j] = covariance / (rows - 1);
                    }
                }
            }

            return cov;
        }
Esempio n. 11
0
        /// <summary>
        /// Calculates the correlation matrix.
        /// </summary>
        /// <param name="isCentered">Indicates if the data set is already centered</param>
        /// <returns>Correlation matrix</returns>
        public InsightMatrix CorrelationMatrix(bool isCentered = false)
        {
            var cov = this.CovarianceMatrix();
            int columns = this.Data.ColumnCount, rows = this.Data.RowCount;
            var cor = new InsightMatrix(columns);

            var stds = new List<double>();

            for (int i = 0; i < columns; i++)
            {
                // Calculate the standard deviation for each feature
                double std = 0;
                double mean = this.Data.Column(i).Mean();

                for (int j = 0; j < rows; j++)
                {
                    std += Math.Pow((this.Data[j, i] - mean), 2);
                }

                std = Math.Sqrt(std / (rows - 1));
                stds.Add(std);
            }

            for (int i = 0; i < columns; i++)
            {
                for (int j = 0; j < columns; j++)
                {
                    // Calculate the correlation by dividing the covariance by the product
                    // of the standard deviations of the two features
                    cor.Data[i, j] = cov.Data[i, j] / (stds[i] * stds[j]);
                }
            }

            return cor;
        }
Esempio n. 12
0
        /// <summary>
        /// Centers each column in the matrix by subtracting each value in the column by the mean.
        /// </summary>
        /// <param name="meanVector">Vector with pre-computed column means</param>
        /// <returns>Column-centered matrix</returns>
        public InsightMatrix Center(InsightVector meanVector)
        {
            var matrix = new InsightMatrix(this.Data);
            int colLength = matrix.Data.ColumnCount;

            for (int i = 0; i < colLength; i++)
            {
                int length = matrix.Data.RowCount;

                for (int j = 0; j < length; j++)
                {
                    matrix.Data[j, i] = matrix.Data[j, i] - meanVector.Data[i];
                }
            }

            return matrix;
        }
Esempio n. 13
0
        /// <summary>
        /// Centers each column in the matrix by subtracting each value in the column by the mean.
        /// </summary>
        /// <returns>Column-centered matrix</returns>
        public InsightMatrix Center()
        {
            var matrix = new InsightMatrix(this.Data);
            int colLength = matrix.Data.ColumnCount;

            for (int i = 0; i < colLength; i++)
            {
                int length = matrix.Data.RowCount;
                double mean = matrix.Data.Column(i).Mean();

                for (int j = 0; j < length; j++)
                {
                    this.Data[j, i] = matrix.Data[j, i] - mean;
                }
            }

            return matrix;
        }
Esempio n. 14
0
 /// <summary>
 /// Concatenates the current matrix with the given matrix.
 /// </summary>
 /// <param name="matrix">Right matrix</param>
 /// <returns>Concatenated matrix</returns>
 public InsightMatrix Append(InsightMatrix matrix)
 {
     return new InsightMatrix(this.Data.Append(matrix.Data));
 }
Esempio n. 15
0
        /// <summary>
        /// Performs the K-Means clustering algorithm on the data set using the provided parameters.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="similarityMethod">Similarity measure used to compare instances</param>
        /// <param name="distanceMethod">Distance measure used to compare instances</param>
        /// <param name="clusters">Number of desired clusters</param>
        /// <returns>Result set that includes cluster centroids, cluster assignments, and total distortion</returns>
        private IClusteringResults PerformKMeansClustering(InsightMatrix matrix, 
            DistanceMethod? distanceMethod, int? clusters)
        {
            if (distanceMethod == null)
            {
                // Default to sum of squared error (equivalent to Euclidean distance)
                distanceMethod = DistanceMethod.EuclideanDistance;
            }

            if (clusters == null)
            {
                // Need to add some type of intelligent way to figure out a good number
                // of clusters to use based on an analysis of the data
                clusters = 3;
            }

            var assignments = new InsightVector(matrix.RowCount);
            var centroids = new InsightMatrix(clusters.Value, matrix.ColumnCount);
            var random = new Random();
            double distortion = -1;

            // Initialize means via random selection
            for (int i = 0; i < clusters; i++)
            {
                var samples = new List<int>();
                int sample = random.Next(0, matrix.RowCount - 1);

                // Make sure we don't use the same instance more than once
                while (samples.Exists(x => x == sample))
                {
                    sample = random.Next(0, matrix.RowCount - 1);
                }

                samples.Add(sample);
                centroids.SetRow(i, matrix.Row(sample));
            }

            // Keep going until convergence point is reached
            while (true)
            {
                // Re-initialize the distortion (total error)
                distortion = 0;

                // Assign each point to the nearest mean
                for (int i = 0; i < matrix.RowCount; i++)
                {
                    // Compute the proximity to each centroid to find the closest match
                    double closestProximity = -1;
                    for (int j = 0; j < clusters; j++)
                    {
                        double proximity = matrix.Row(i).DistanceFrom(centroids.Row(j), distanceMethod.Value);

                        if (j == 0)
                        {
                            closestProximity = proximity;
                            assignments[i] = j;
                        }
                        else if (proximity < closestProximity)
                        {
                            closestProximity = proximity;
                            assignments[i] = j;
                        }
                    }

                    // Add the proximity value to the total distortion for this solution
                    distortion += closestProximity;
                }

                // Calculate the new means for each centroid
                var newCentroids = new InsightMatrix(clusters.Value, matrix.ColumnCount);
                bool converged = true;

                for (int i = 0; i < clusters; i++)
                {
                    int instanceCount = assignments.Where(x => x == i).Count();

                    // Compute the means for each instance assigned to the current cluster
                    for (int j = 0; j < newCentroids.ColumnCount; j++)
                    {
                        double sum = 0;
                        for (int k = 0; k < matrix.RowCount; k++)
                        {
                            if (assignments[k] == i) sum += matrix[k, j];
                        }

                        if (instanceCount > 0)
                            newCentroids[i, j] = Math.Round(sum / instanceCount, 2);
                        else
                            newCentroids[i, j] = centroids[i, j];

                        if (newCentroids[i, j] != centroids[i, j])
                            converged = false;
                    }

                    centroids.SetRow(i, newCentroids.Row(i));
                }

                // If the new centroid means did not change then we've reached the final result
                if (converged) break;
            }

            return new ClusteringResults(centroids, assignments, distortion);
        }
 /// <summary>
 /// Cluster the data set into groups of similar instances.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <returns>Result set that includes cluster centroids, cluster assignments, and total distortion</returns>
 public IClusteringResults Cluster(InsightMatrix matrix)
 {
     return PerformMetaKMeansClustering(matrix, null, null, null);
 }
 /// <summary>
 /// Cluster the data set into groups of similar instances.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <param name="comparisonMethod">Distance measure used to compare instances</param>
 /// <param name="clusters">Number of desired clusters</param>
 /// <param name="iterations">Number of times to run the algorithm</param>
 /// <returns>Result set that includes cluster centroids, cluster assignments, and total distortion</returns>
 public IClusteringResults Cluster(InsightMatrix matrix, DistanceMethod comparisonMethod, int clusters, int iterations)
 {
     return PerformMetaKMeansClustering(matrix, comparisonMethod, clusters, iterations);
 }
Esempio n. 18
0
 /// <summary>
 /// Element-wise multiplies two matrices together.
 /// </summary>
 /// <param name="matrix">2nd matrix</param>
 /// <returns>Result matrix</returns>
 public InsightMatrix Multiply(InsightMatrix matrix)
 {
     return new InsightMatrix(this.Data.PointwiseMultiply(matrix.Data));
 }
        /// <summary>
        /// Performs linear discriminant analysis on the input data set.  Extra parameters 
        /// are used to specify the critera or methodology used to limit the number of features 
        /// in the transformed data set.  Only one extra parameter must be specified.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="featureLimit">Maximum number of features in the new data set</param>
        /// <param name="percentThreshold">Specifies the percent of the concept variance to use
        /// in limiting the number of features selected for the new data set (range 0-1)</param>
        /// <returns>Transformed matrix with reduced number of dimensions</returns>
        private InsightMatrix PerformLDA(InsightMatrix matrix, int? featureLimit, double? percentThreshold)
        {
            // Calculate the mean vector for the entire data set (skipping the class column)
            int columnCount = matrix.ColumnCount - 1;
            InsightVector totalMean = new InsightVector(columnCount);
            for (int i = 0; i < columnCount; i++)
            {
                totalMean[i] = matrix.Column(i).Mean();
            }

            // Derive a sub-matrix for each class in the data set
            List<InsightMatrix> classes = matrix.Decompose(columnCount);

            // Calculate the mean and covariance matrix for each class
            var meanVectors = new List<KeyValuePair<int, InsightVector>>();
            var covariances = new List<InsightMatrix>();
            foreach (var classMatrix in classes)
            {
                InsightVector means = new InsightVector(columnCount);
                for (int i = 0; i < columnCount; i++)
                {
                    means[i] = classMatrix.Column(i).Mean();
                }

                // Using a dictionary to keep the number of samples in the class in
                // addition to the mean vector - we'll need both later on
                meanVectors.Add(new KeyValuePair<int, InsightVector>(classMatrix.RowCount, means));

                // Drop the class column then compute the covariance matrix for this class
                InsightMatrix covariance = classMatrix.SubMatrix(0, classMatrix.RowCount, 0, classMatrix.ColumnCount - 1);
                covariance = covariance.Center().CovarianceMatrix(true);
                covariances.Add(covariance);
            }

            // Calculate the within-class scatter matrix
            InsightMatrix withinClassScatter = covariances.Aggregate((x, y) => new InsightMatrix((x + y)));

            // Calculate the between-class scatter matrix
            InsightMatrix betweenClassScatter = meanVectors.Aggregate(
                new InsightMatrix(totalMean.Count), (x, y) =>
                    x + (y.Key * (y.Value - totalMean).ToColumnMatrix() *
                    (y.Value - totalMean).ToColumnMatrix().Transpose()));

            // Compute the LDA projection and perform eigenvalue decomposition on the projected matrix
            InsightMatrix projection = new InsightMatrix(
                (withinClassScatter.Inverse() * betweenClassScatter));
            MatrixFactorization evd = projection.EigenvalueDecomposition();
            int rank = evd.Eigenvalues.Where(x => x > 0.001).Count();

            // Determine the number of features to keep for the final data set
            if (featureLimit != null)
            {
                // Enforce a raw numeric feature limit
                if (rank > featureLimit)
                    rank = featureLimit.Value;
            }
            else if (percentThreshold != null)
            {
                // Limit to a percent of the variance in the data set (represented by the sum of the eigenvalues)
                double totalVariance = evd.Eigenvalues.Sum() * percentThreshold.Value;
                double accumulatedVariance = 0;
                rank = 0;
                while (accumulatedVariance < totalVariance)
                {
                    accumulatedVariance += evd.Eigenvalues[rank];
                    rank++;
                }
            }

            // Extract the most important vectors (in order by eigenvalue size)
            InsightMatrix projectionVectors = new InsightMatrix(evd.Eigenvalues.Count, rank);
            for (int i = 0; i < rank; i++)
            {
                // Find the largest remaining eigenvalue
                int index = evd.Eigenvalues.MaxIndex();
                projectionVectors.SetColumn(i, evd.Eigenvectors.Column(index));

                // Set this position to zero so the next iteration captures the next-largest eigenvalue
                evd.Eigenvalues[index] = 0;
            }

            // Multiply each class matrix by the projection vectors
            for (int i = 0; i < classes.Count; i++)
            {
                // Save the class vector
                InsightVector classVector = classes[i].Column(0);

                // Create a new class matrix using the projection vectors
                classes[i] = (projectionVectors.Transpose() *
                    classes[i].SubMatrix(0, classes[i].RowCount, 1, classes[i].ColumnCount - 1)
                    .Transpose()).Transpose();

                // Insert the class vector back into the matrix
                classes[i] = classes[i].InsertColumn(0, classVector);
            }

            // Concatenate back into a single matrix
            InsightMatrix result = classes.Aggregate((x, y) => x.Stack(y));

            return result;
        }
Esempio n. 20
0
        /// <summary>
        /// Scales each column in the matrix by dividing each value in the column by the square 
        /// root of the squared sum.  For a column that is already centered, this is equivalent 
        /// to dividing by the standard deviation.
        /// </summary>
        /// <returns>Column-scaled matrix</returns>
        public InsightMatrix Scale()
        {
            var matrix = new InsightMatrix(this.Data);
            int colLength = matrix.Data.ColumnCount;

            for (int i = 0; i < colLength; i++)
            {
                int length = matrix.Data.RowCount;
                double ss = 0;

                for (int j = 0; j < length; j++)
                {
                    ss += matrix.Data[j, i] * matrix.Data[j, i];
                }

                double ss2 = Math.Sqrt(ss);

                for (int j = 0; j < length; j++)
                {
                    matrix.Data[j, i] = matrix.Data[j, i] / ss2;
                }
            }

            return matrix;
        }
Esempio n. 21
0
 /// <summary>
 /// Trains the model using the supplied data.  Uses the default training
 /// parameters for the model.
 /// </summary>
 /// <param name="data">Training data</param>
 public void Train(InsightMatrix data)
 {
     var results = PerformLinearRegression(data, Alpha, Lambda, Iterations);
     Theta = results.Item1;
     Error = results.Item2;
 }
Esempio n. 22
0
        /// <summary>
        /// Sorts the rows of a matrix using the values in the designated column for comparison.
        /// </summary>
        /// <param name="columnIndex">Column to use for sorting</param>
        /// <returns>Row-sorted matrix</returns>
        public InsightMatrix Sort(int columnIndex)
        {
            var matrix = new InsightMatrix(this.Data);

            var sortKeys = Enumerable.Range(0, matrix.Data.RowCount)
                .Select(x => new
                {
                    Index = x,
                    Value = matrix.Data[x, columnIndex]
                })
                .OrderBy(x => x.Value)
                .ToList();

            var sortKeys2 = sortKeys
                .Select((x, i) => new
                {
                    NewIndex = i,
                    OldIndex = x.Index,
                })
                .OrderBy(x => x.NewIndex)
                .ToList();

            var sortKeys3 = sortKeys2
                .Select(x => x.OldIndex)
                .ToList();

            for (int i = 0; i < matrix.Data.RowCount; i++)
            {
                // Save the row at the current index
                InsightVector temp = new InsightVector(matrix.Data.Row(i));

                // Copy the row from the new index to the current index
                matrix.Data.SetRow(i, matrix.Data.Row(sortKeys3[i]));

                // Copy the saved row to the new index
                matrix.Data.SetRow(sortKeys3[i], temp.Data);

                // Update the index to show row at position i is now at sortkeys[i]
                int position = sortKeys3.IndexOf(i, i);
                sortKeys3[position] = sortKeys3[i];
            }

            return matrix;
        }
Esempio n. 23
0
 /// <summary>
 /// Computes the total error of the solution with parameters theta.
 /// </summary>
 /// <param name="X">Training data</param>
 /// <param name="y">Target variable</param>
 /// <param name="theta">Model parameters</param>
 /// <param name="lambda">Regularization weight</param
 /// <returns>Solution error</returns>
 private double ComputeError(InsightMatrix X, InsightVector y, InsightVector theta, double lambda)
 {
     var inner = ((X * theta.ToColumnMatrix()) - y.ToColumnMatrix()).Power(2);
     var thetaSub = theta.SubVector(1, theta.Count - 1);
     var reg = lambda * thetaSub.Multiply(thetaSub).Sum();
     return (inner.Column(0).Sum() / (2 * X.RowCount)) + reg;
 }
Esempio n. 24
0
 /// <summary>
 /// Stacks the current matrix on top of the provided matrix.
 /// </summary>
 /// <param name="matrix">Lower matrix</param>
 /// <returns>Stacked matrix</returns>
 public InsightMatrix Stack(InsightMatrix matrix)
 {
     return new InsightMatrix(this.Data.Stack(matrix.Data));
 }
 /// <summary>
 /// Extracts the most important features from a data set using PCA.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <returns>Transformed matrix with reduced number of dimensions</returns>
 public InsightMatrix ExtractFeatures(InsightMatrix matrix)
 {
     return PerformPCA(matrix, null, null);
 }
Esempio n. 26
0
 /// <summary>
 /// Create a new matrix as a copy of the given matrix.
 /// </summary>
 /// <param name="matrix">Matrix to copy</param>
 public InsightMatrix(InsightMatrix matrix)
 {
     Data = DenseMatrix.OfMatrix(matrix.Data);
 }
 /// <summary>
 /// Extracts the most important features from a data set using PCA.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <param name="percentThreshold">Specifies the percent of the concept variance to use
 /// in limiting the number of features selected for the new data set (range 0-1)</param>
 /// <returns>Transformed matrix with reduced number of dimensions</returns>
 public InsightMatrix ExtractFeatures(InsightMatrix matrix, double percentThreshold)
 {
     return PerformPCA(matrix, null, percentThreshold);
 }
        /// <summary>
        /// Performs singular value decomposition on the input data set.  Extra parameters 
        /// are used to specify the critera or methodology used to limit the number of features 
        /// in the transformed data set.  Only one extra parameter must be specified.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="featureLimit">Maximum number of features in the new data set</param>
        /// <param name="percentThreshold">Specifies the percent of the concept variance to use
        /// in limiting the number of features selected for the new data set (range 0-1)</param>
        /// <returns>Transformed matrix with reduced number of dimensions</returns>
        private InsightMatrix PerformSVD(InsightMatrix matrix, int? featureLimit, double? percentThreshold)
        {
            // Perform singlular value decomposition on the matrix
            // and retrieve the rank (number of singular values)
            MatrixFactorization svd = matrix.SingularValueDecomposition();
            int rows = matrix.RowCount, columns = matrix.ColumnCount;
            int rank = svd.Rank;

            // Determine the number of features to keep for the final data set
            // (default will use all available singular values)
            if (featureLimit != null)
            {
                // Enforce a raw numeric feature limit
                if (rank > featureLimit)
                    rank = featureLimit.Value;
            }
            else if (percentThreshold != null)
            {
                // Limit to a percent of the variance in the data set
                // (represented by the sum of the singular values)
                double totalVariance = svd.SingularValues.Sum() * percentThreshold.Value;
                double accumulatedVariance = 0;
                rank = 0;
                while (accumulatedVariance < totalVariance)
                {
                    accumulatedVariance += svd.SingularValues[rank];
                    rank++;
                }
            }

            // Re-compose the original matrix using a sub-set of the features
            InsightMatrix result = svd.LeftSingularVectors.SubMatrix(0, rows, 0, rows) *
                svd.SingularValuesDiagonal.SubMatrix(0, rows, 0, rank) *
                svd.RightSingularVectors.SubMatrix(0, rank, 0, rank);

            return result;
        }
Esempio n. 29
0
        static void Main(string[] args)
        {
            Console.WriteLine("Similarity & Distance Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightVector u = new InsightVector(new double[] { 1, 2, 3, 4, 5 });
            Console.WriteLine("Vector u:");
            Console.WriteLine(u.ToString());
            Console.WriteLine(Environment.NewLine);

            InsightVector v = new InsightVector(new double[] { 5, 4, 3, 2, 1 });
            Console.WriteLine("Vector v:");
            Console.WriteLine(v.ToString());
            Console.WriteLine(Environment.NewLine);

            double distance = u.DistanceFrom(v);
            Console.WriteLine("Euclidean distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            distance = u.DistanceFrom(v, DistanceMethod.HammingDistance);
            Console.WriteLine("Hamming distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            distance = u.DistanceFrom(v, DistanceMethod.ManhattanDistance);
            Console.WriteLine("Manhattan distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            double similarity = u.SimilarityTo(v);
            Console.WriteLine("Cosine similarity (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            similarity = u.SimilarityTo(v, SimilarityMethod.JaccardCoefficient);
            Console.WriteLine("Jaccard coefficient (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            similarity = u.SimilarityTo(v, SimilarityMethod.PearsonCorrelation);
            Console.WriteLine("Pearson correlation (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Covariance & Correlation Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix = new InsightMatrix(new double[,] {
                { 2.1, 8 }, { 2.5, 12 }, { 4.0, 14 }, { 3.6, 10 }
            });

            Console.WriteLine("Example matrix:");
            Console.WriteLine(matrix.ToString());
            Console.WriteLine(Environment.NewLine);

            var cov = matrix.CovarianceMatrix();
            Console.WriteLine("Covariance matrix:");
            Console.WriteLine(cov.ToString());
            Console.WriteLine(Environment.NewLine);

            var cor = matrix.CorrelationMatrix();
            Console.WriteLine("Correlation matrix:");
            Console.WriteLine(cor.ToString());
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Feature Extraction Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix2 = new InsightMatrix(new double[,] {
                { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 },
                { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 }
            });
            Console.WriteLine("First test matrix:");
            Console.WriteLine(matrix2.ToString());
            Console.WriteLine(Environment.NewLine);

            var pca = matrix2.ExtractFeatures(ExtractionMethod.PrincipalComponentAnalysis, 1);
            Console.WriteLine("Result of principal components analysis:");
            Console.WriteLine(pca.ToString());
            Console.WriteLine(Environment.NewLine);

            var svd = matrix2.ExtractFeatures(ExtractionMethod.SingularValueDecomposition, 1);
            Console.WriteLine("Result of singular value decomposition:");
            Console.WriteLine(svd.ToString());
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix3 = new InsightMatrix(new double[,] {
                { 4, 2, 1 }, { 2, 4, 1 }, { 2, 3, 1 }, { 3, 6, 1 }, { 4, 4, 1 },
                { 9, 10, 2 }, { 6, 8, 2 }, { 9, 5, 2 }, { 8, 7, 2 }, { 10, 8, 2 }
            });
            Console.WriteLine("Second test matrix:");
            Console.WriteLine(matrix3.ToString());
            Console.WriteLine(Environment.NewLine);

            var lda = matrix3.ExtractFeatures(ExtractionMethod.LinearDiscriminantAnalysis);
            Console.WriteLine("Result of linear discriminant analysis:");
            Console.WriteLine(lda.ToString());
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Optimization Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            var simpleHillClimber = new SimpleHillClimbing<double>();
            var transforms = new List<Func<double, double>>();
            transforms.Add(x => x + 1);
            transforms.Add(x => x - 1);
            transforms.Add(x => x + 0.3);
            transforms.Add(x => x - 0.3);
            transforms.Add(x => x + 0.1);
            transforms.Add(x => x - 0.1);
            var solution = simpleHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Simple hill climber solution = {0}", solution.Solution);
            Console.WriteLine("Simple hill climber score = {0}", solution.Score);
            Console.WriteLine(Environment.NewLine);

            var steepestAscentHillClimber = new SteepestAscentHillClimbing<double>();
            var solution2 = steepestAscentHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Steepest ascent hill climber solution = {0}", solution2.Solution);
            Console.WriteLine("Steepest ascent hill climber score = {0}", solution2.Score);
            Console.WriteLine(Environment.NewLine);

            var stocasticHillClimber = new StochasticHillClimbing<double>();
            var solution3 = stocasticHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Stocastic hill climber solution = {0}", solution3.Solution);
            Console.WriteLine("Stocastic hill climber score = {0}", solution3.Score);
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Data Loading Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix iris = DataLoader.ImportFromCSV("../../../data/iris.data", ',', false, true);

            Console.WriteLine("Iris data set:");
            Console.WriteLine(iris.ToString());

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Clustering Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            var clusterResults = iris.Cluster(ClusteringMethod.KMeans);
            Console.WriteLine("K-Means");
            Console.WriteLine("Distortion = {0}", clusterResults.Distortion);
            Console.WriteLine("Centroids:");
            Console.WriteLine(clusterResults.Centroids.ToString());

            Console.ReadKey();

            var clusterResults2 = iris.Cluster(ClusteringMethod.KMeans, DistanceMethod.EuclideanDistance, 3, 10);
            Console.WriteLine("K-Means (best of 10)");
            Console.WriteLine("Distortion = {0}", clusterResults.Distortion);
            Console.WriteLine("Centroids:");
            Console.WriteLine(clusterResults.Centroids.ToString());

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Linear Regression");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            Console.WriteLine("Loading regression sample data...");
            InsightMatrix data1 = DataLoader.ImportFromCSV("../../../data/ex1data1.txt", ',', false, false);

            Console.WriteLine("Training model...");
            var model1 = new LinearRegression();
            model1.Train(data1);
            Console.WriteLine("Model training complete.  Parameters:");
            Console.WriteLine(model1.Theta.ToString());
            Console.WriteLine(model1.Error.ToString());

            Console.WriteLine("Predicting output for first data point...");
            data1 = data1.RemoveColumn(data1.ColumnCount - 1);
            var prediction = model1.Predict(data1.Row(0));
            Console.WriteLine("Prediction = {0}", prediction);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Logistic Regression");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            Console.WriteLine("Loading classification sample data...");
            InsightMatrix data2 = DataLoader.ImportFromCSV("../../../data/ex2data1.txt", ',', false, false);

            Console.WriteLine("Training model...");
            var model2 = new LogisticRegression();
            model2.Train(data2);
            Console.WriteLine("Model training complete.  Parameters:");
            Console.WriteLine(model2.Theta.ToString());
            Console.WriteLine(model2.Error.ToString());

            Console.WriteLine("Predicting output for first data point...");
            data2 = data2.RemoveColumn(data2.ColumnCount - 1);
            var classification = model2.Classify(data2.Row(0));
            Console.WriteLine("Classification = {0}", classification);

            Console.ReadKey();
        }
 private InsightMatrix PerformICA(InsightMatrix matrix, int? featureLimit, double? percentThreshold)
 {
     // TODO
     throw new NotImplementedException();
 }