/// <summary>
 /// Matrix factorization constructor for eigenvalue decomposition.
 /// </summary>
 /// <param name="factorizationType">Factorization type</param>
 /// <param name="rank">Rank</param>
 /// <param name="determinant">Determinant</param>
 /// <param name="eigenvalues">Eigenvalues</param>
 /// <param name="eigenvectors">Eigenvectors</param>
 /// <param name="D">Diagonal eigenvalues matrix</param>
 public MatrixFactorization(string factorizationType, int rank, double determinant, 
     InsightVector eigenvalues, InsightMatrix eigenvectors, InsightMatrix D)
 {
     this.FactorizationType = factorizationType;
     this.Rank = rank;
     this.Determinant = determinant;
     this.Eigenvalues = eigenvalues;
     this.Eigenvectors = eigenvectors;
     this.EigenvaluesDiagonal = D;
 }
 /// <summary>
 /// Matrix factorization constructor for singular value decomposition.
 /// </summary>
 /// <param name="factorizationType">Factorization type</param>
 /// <param name="rank">Rank</param>
 /// <param name="l2Norm">L2 norm</param>
 /// <param name="S">Singular values</param>
 /// <param name="U">Left singular vectors</param>
 /// <param name="VT">Right singular vectors</param>
 /// <param name="W">Diagonal singular values matrix</param>
 public MatrixFactorization(string factorizationType, int rank, double l2Norm, 
     InsightVector S, InsightMatrix U, InsightMatrix VT, InsightMatrix W)
 {
     this.FactorizationType = factorizationType;
     this.Rank = rank;
     this.L2Norm = l2Norm;
     this.SingularValues = S;
     this.LeftSingularVectors = U;
     this.RightSingularVectors = VT;
     this.SingularValuesDiagonal = W;
 }
 /// <summary>
 /// Calculates the distance between two vectors.
 /// </summary>
 /// <param name="u">1st vector</param>
 /// <param name="v">2nd vector</param>
 /// <param name="distanceMethod">Algorithm to use for the distance calculation</param>
 /// <returns>Distance between the two vectors</returns>
 public static double DistanceFrom(this InsightVector u, InsightVector v, DistanceMethod distanceMethod)
 {
     switch (distanceMethod)
     {
         case DistanceMethod.EuclideanDistance:
             return new EuclideanDistance().CalculateDistance(u, v);
         case DistanceMethod.HammingDistance:
             return new HammingDistance().CalculateDistance(u, v);
         default:
             return new ManhattanDistance().CalculateDistance(u, v);
     }
 }
 /// <summary>
 /// Calculates the similarity between two vectors.
 /// </summary>
 /// <param name="u">1st vector</param>
 /// <param name="v">2nd vector</param>
 /// <param name="similarityMethod">Algorithm to use for the similarity calculation</param>
 /// <returns>Similarity between the two vectors</returns>
 public static double SimilarityTo(this InsightVector u, InsightVector v, SimilarityMethod similarityMethod)
 {
     switch (similarityMethod)
     {
         case SimilarityMethod.CosineSimilarity:
             return new CosineSimilarity().CalculateSimilarity(u, v);
         case SimilarityMethod.JaccardCoefficient:
             return new JaccardCoefficient().CalculateSimilarity(u, v);
         default:
             return new PearsonCorrelation().CalculateSimilarity(u, v);
     }
 }
        /// <summary>
        /// Calculates the distance between two vectors using Manhattan distance.
        /// </summary>
        /// <remarks>Range is 0 to infinity</remarks>
        /// <param name="u">1st vector</param>
        /// <param name="v">2nd vector</param>
        /// <returns>Distance between the two vectors</returns>
        public double CalculateDistance(InsightVector u, InsightVector v)
        {
            if (u.Count != v.Count)
                throw new Exception("Vector lengths must be equal.");

            int length = u.Count;
            double distance = 0;
            for (int i = 0; i < length; i++)
            {
                distance += Math.Abs(u[i] - v[i]);
            }

            return distance;
        }
        /// <summary>
        /// Calculates the distance between two vectors using Euclidean distance.
        /// </summary>
        /// <remarks>Range is 0 to infinity</remarks>
        /// <param name="u">1st vector</param>
        /// <param name="v">2nd vector</param>
        /// <returns>Distance between the two vectors</returns>
        public double CalculateDistance(InsightVector u, InsightVector v)
        {
            if (u.Count != v.Count)
                throw new Exception("Vector lengths must be equal.");

            int length = u.Count;
            double sumOfSquares = 0;
            for (int i = 0; i < length; i++)
            {
                sumOfSquares += (u[i] - v[i]) * (u[i] - v[i]);
            }

            return Math.Sqrt(sumOfSquares);
        }
        /// <summary>
        /// Calculates the similarity between two vectors using the extended Jaccard coefficient.
        /// </summary>
        /// <remarks>Range is 0 to 1</remarks>
        /// <param name="u">1st vector</param>
        /// <param name="v">2nd vector</param>
        /// <returns>Similarity between the two vectors</returns>
        public double CalculateSimilarity(InsightVector u, InsightVector v)
        {
            if (u.Count != v.Count)
                throw new Exception("Vector lengths must be equal.");

            int length = u.Count;
            double uSumSquared = 0, vSumSquared = 0, productSum = 0;
            for (int i = 0; i < length; i++)
            {
                uSumSquared += u[i] * u[i];
                vSumSquared += v[i] * v[i];
                productSum += u[i] * v[i];
            }

            double numerator = productSum;
            double denominator = uSumSquared + vSumSquared - productSum;

            return denominator == 0 ? 0 : numerator / denominator;
        }
示例#8
0
 /// <summary>
 /// Normalizes each column in the matrix by centering and then scaling each column
 /// in the original matrix.
 /// </summary>
 /// <param name="meanVector">Vector with pre-computed column means</param>
 /// <returns>Column-normalized matrix</returns>
 public InsightMatrix Normalize(InsightVector meanVector)
 {
     return this.Center(meanVector).Scale();
 }
        /// <summary>
        /// Performs linear discriminant analysis on the input data set.  Extra parameters 
        /// are used to specify the critera or methodology used to limit the number of features 
        /// in the transformed data set.  Only one extra parameter must be specified.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="featureLimit">Maximum number of features in the new data set</param>
        /// <param name="percentThreshold">Specifies the percent of the concept variance to use
        /// in limiting the number of features selected for the new data set (range 0-1)</param>
        /// <returns>Transformed matrix with reduced number of dimensions</returns>
        private InsightMatrix PerformLDA(InsightMatrix matrix, int? featureLimit, double? percentThreshold)
        {
            // Calculate the mean vector for the entire data set (skipping the class column)
            int columnCount = matrix.ColumnCount - 1;
            InsightVector totalMean = new InsightVector(columnCount);
            for (int i = 0; i < columnCount; i++)
            {
                totalMean[i] = matrix.Column(i).Mean();
            }

            // Derive a sub-matrix for each class in the data set
            List<InsightMatrix> classes = matrix.Decompose(columnCount);

            // Calculate the mean and covariance matrix for each class
            var meanVectors = new List<KeyValuePair<int, InsightVector>>();
            var covariances = new List<InsightMatrix>();
            foreach (var classMatrix in classes)
            {
                InsightVector means = new InsightVector(columnCount);
                for (int i = 0; i < columnCount; i++)
                {
                    means[i] = classMatrix.Column(i).Mean();
                }

                // Using a dictionary to keep the number of samples in the class in
                // addition to the mean vector - we'll need both later on
                meanVectors.Add(new KeyValuePair<int, InsightVector>(classMatrix.RowCount, means));

                // Drop the class column then compute the covariance matrix for this class
                InsightMatrix covariance = classMatrix.SubMatrix(0, classMatrix.RowCount, 0, classMatrix.ColumnCount - 1);
                covariance = covariance.Center().CovarianceMatrix(true);
                covariances.Add(covariance);
            }

            // Calculate the within-class scatter matrix
            InsightMatrix withinClassScatter = covariances.Aggregate((x, y) => new InsightMatrix((x + y)));

            // Calculate the between-class scatter matrix
            InsightMatrix betweenClassScatter = meanVectors.Aggregate(
                new InsightMatrix(totalMean.Count), (x, y) =>
                    x + (y.Key * (y.Value - totalMean).ToColumnMatrix() *
                    (y.Value - totalMean).ToColumnMatrix().Transpose()));

            // Compute the LDA projection and perform eigenvalue decomposition on the projected matrix
            InsightMatrix projection = new InsightMatrix(
                (withinClassScatter.Inverse() * betweenClassScatter));
            MatrixFactorization evd = projection.EigenvalueDecomposition();
            int rank = evd.Eigenvalues.Where(x => x > 0.001).Count();

            // Determine the number of features to keep for the final data set
            if (featureLimit != null)
            {
                // Enforce a raw numeric feature limit
                if (rank > featureLimit)
                    rank = featureLimit.Value;
            }
            else if (percentThreshold != null)
            {
                // Limit to a percent of the variance in the data set (represented by the sum of the eigenvalues)
                double totalVariance = evd.Eigenvalues.Sum() * percentThreshold.Value;
                double accumulatedVariance = 0;
                rank = 0;
                while (accumulatedVariance < totalVariance)
                {
                    accumulatedVariance += evd.Eigenvalues[rank];
                    rank++;
                }
            }

            // Extract the most important vectors (in order by eigenvalue size)
            InsightMatrix projectionVectors = new InsightMatrix(evd.Eigenvalues.Count, rank);
            for (int i = 0; i < rank; i++)
            {
                // Find the largest remaining eigenvalue
                int index = evd.Eigenvalues.MaxIndex();
                projectionVectors.SetColumn(i, evd.Eigenvectors.Column(index));

                // Set this position to zero so the next iteration captures the next-largest eigenvalue
                evd.Eigenvalues[index] = 0;
            }

            // Multiply each class matrix by the projection vectors
            for (int i = 0; i < classes.Count; i++)
            {
                // Save the class vector
                InsightVector classVector = classes[i].Column(0);

                // Create a new class matrix using the projection vectors
                classes[i] = (projectionVectors.Transpose() *
                    classes[i].SubMatrix(0, classes[i].RowCount, 1, classes[i].ColumnCount - 1)
                    .Transpose()).Transpose();

                // Insert the class vector back into the matrix
                classes[i] = classes[i].InsertColumn(0, classVector);
            }

            // Concatenate back into a single matrix
            InsightMatrix result = classes.Aggregate((x, y) => x.Stack(y));

            return result;
        }
 /// <summary>
 /// Computes the total error of the solution with parameters theta.
 /// </summary>
 /// <param name="X">Training data</param>
 /// <param name="y">Target variable</param>
 /// <param name="theta">Model parameters</param>
 /// <param name="lambda">Regularization weight</param
 /// <returns>Solution error</returns>
 private double ComputeError(InsightMatrix X, InsightVector y, InsightVector theta, double lambda)
 {
     var first = y.Multiply(Sigmoid((X * theta.ToColumnMatrix()).Column(0)).Log());
     var second = (1 - y).Multiply(1 - Sigmoid((X * theta.ToColumnMatrix()).Column(0)).Log());
     var thetaSub = theta.SubVector(1, theta.Count - 1);
     var reg = (lambda / 2 * X.RowCount) * thetaSub.Power(2).Sum();
     return (first - second).Sum() / X.RowCount + reg;
 }
 /// <summary>
 /// Returns the sigmoid (logit) of the original vector.
 /// </summary>
 /// <param name="value">Original vector</param>
 /// <returns>Sigmoid vector</returns>
 private InsightVector Sigmoid(InsightVector values)
 {
     return new InsightVector(values.Select(x => 1 / (1 + Math.Exp(-x))).ToList());
 }
 /// <summary>
 /// Calculates the similarity between two vectors.  Uses cosine similarity by default.
 /// </summary>
 /// <param name="u">1st vector</param>
 /// <param name="v">2nd vector</param>
 /// <returns>Similarity between the two vectors</returns>
 public static double SimilarityTo(this InsightVector u, InsightVector v)
 {
     return new CosineSimilarity().CalculateSimilarity(u, v);
 }
 /// <summary>
 /// Classifies a new instance of the data using the algorithm's trained model.
 /// </summary>
 /// <param name="instance">New instance</param>
 /// <returns>Classification</returns>
 public int Classify(InsightVector instance)
 {
     return Classify(instance.ToRowMatrix())[0];
 }
 /// <summary>
 /// Calculates the distance between two vectors.  Uses euclidean distance by default.
 /// </summary>
 /// <param name="u">1st vector</param>
 /// <param name="v">2nd vector</param>
 /// <returns>Distance between the two vectors</returns>
 public static double DistanceFrom(this InsightVector u, InsightVector v)
 {
     return new EuclideanDistance().CalculateDistance(u, v);
 }
 /// <summary>
 /// Computes the total error of the solution with parameters theta.
 /// </summary>
 /// <param name="X">Training data</param>
 /// <param name="y">Target variable</param>
 /// <param name="theta">Model parameters</param>
 /// <param name="lambda">Regularization weight</param
 /// <returns>Solution error</returns>
 private double ComputeError(InsightMatrix X, InsightVector y, InsightVector theta, double lambda)
 {
     var inner = ((X * theta.ToColumnMatrix()) - y.ToColumnMatrix()).Power(2);
     var thetaSub = theta.SubVector(1, theta.Count - 1);
     var reg = lambda * thetaSub.Multiply(thetaSub).Sum();
     return (inner.Column(0).Sum() / (2 * X.RowCount)) + reg;
 }
示例#16
0
        static void Main(string[] args)
        {
            Console.WriteLine("Similarity & Distance Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightVector u = new InsightVector(new double[] { 1, 2, 3, 4, 5 });
            Console.WriteLine("Vector u:");
            Console.WriteLine(u.ToString());
            Console.WriteLine(Environment.NewLine);

            InsightVector v = new InsightVector(new double[] { 5, 4, 3, 2, 1 });
            Console.WriteLine("Vector v:");
            Console.WriteLine(v.ToString());
            Console.WriteLine(Environment.NewLine);

            double distance = u.DistanceFrom(v);
            Console.WriteLine("Euclidean distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            distance = u.DistanceFrom(v, DistanceMethod.HammingDistance);
            Console.WriteLine("Hamming distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            distance = u.DistanceFrom(v, DistanceMethod.ManhattanDistance);
            Console.WriteLine("Manhattan distance (u, v) = {0}", distance.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            double similarity = u.SimilarityTo(v);
            Console.WriteLine("Cosine similarity (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            similarity = u.SimilarityTo(v, SimilarityMethod.JaccardCoefficient);
            Console.WriteLine("Jaccard coefficient (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            similarity = u.SimilarityTo(v, SimilarityMethod.PearsonCorrelation);
            Console.WriteLine("Pearson correlation (u, v) = {0}", similarity.ToString("F4"));
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Covariance & Correlation Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix = new InsightMatrix(new double[,] {
                { 2.1, 8 }, { 2.5, 12 }, { 4.0, 14 }, { 3.6, 10 }
            });

            Console.WriteLine("Example matrix:");
            Console.WriteLine(matrix.ToString());
            Console.WriteLine(Environment.NewLine);

            var cov = matrix.CovarianceMatrix();
            Console.WriteLine("Covariance matrix:");
            Console.WriteLine(cov.ToString());
            Console.WriteLine(Environment.NewLine);

            var cor = matrix.CorrelationMatrix();
            Console.WriteLine("Correlation matrix:");
            Console.WriteLine(cor.ToString());
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Feature Extraction Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix2 = new InsightMatrix(new double[,] {
                { 2.5, 2.4 }, { 0.5, 0.7 }, { 2.2, 2.9 }, { 1.9, 2.2 }, { 3.1, 3.0 }, { 2.3, 2.7 },
                { 2.0, 1.6 }, { 1.0, 1.1 }, { 1.5, 1.6 }, { 1.1, 0.9 }
            });
            Console.WriteLine("First test matrix:");
            Console.WriteLine(matrix2.ToString());
            Console.WriteLine(Environment.NewLine);

            var pca = matrix2.ExtractFeatures(ExtractionMethod.PrincipalComponentAnalysis, 1);
            Console.WriteLine("Result of principal components analysis:");
            Console.WriteLine(pca.ToString());
            Console.WriteLine(Environment.NewLine);

            var svd = matrix2.ExtractFeatures(ExtractionMethod.SingularValueDecomposition, 1);
            Console.WriteLine("Result of singular value decomposition:");
            Console.WriteLine(svd.ToString());
            Console.WriteLine(Environment.NewLine);

            InsightMatrix matrix3 = new InsightMatrix(new double[,] {
                { 4, 2, 1 }, { 2, 4, 1 }, { 2, 3, 1 }, { 3, 6, 1 }, { 4, 4, 1 },
                { 9, 10, 2 }, { 6, 8, 2 }, { 9, 5, 2 }, { 8, 7, 2 }, { 10, 8, 2 }
            });
            Console.WriteLine("Second test matrix:");
            Console.WriteLine(matrix3.ToString());
            Console.WriteLine(Environment.NewLine);

            var lda = matrix3.ExtractFeatures(ExtractionMethod.LinearDiscriminantAnalysis);
            Console.WriteLine("Result of linear discriminant analysis:");
            Console.WriteLine(lda.ToString());
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Optimization Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            var simpleHillClimber = new SimpleHillClimbing<double>();
            var transforms = new List<Func<double, double>>();
            transforms.Add(x => x + 1);
            transforms.Add(x => x - 1);
            transforms.Add(x => x + 0.3);
            transforms.Add(x => x - 0.3);
            transforms.Add(x => x + 0.1);
            transforms.Add(x => x - 0.1);
            var solution = simpleHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Simple hill climber solution = {0}", solution.Solution);
            Console.WriteLine("Simple hill climber score = {0}", solution.Score);
            Console.WriteLine(Environment.NewLine);

            var steepestAscentHillClimber = new SteepestAscentHillClimbing<double>();
            var solution2 = steepestAscentHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Steepest ascent hill climber solution = {0}", solution2.Solution);
            Console.WriteLine("Steepest ascent hill climber score = {0}", solution2.Score);
            Console.WriteLine(Environment.NewLine);

            var stocasticHillClimber = new StochasticHillClimbing<double>();
            var solution3 = stocasticHillClimber.FindMaxima(
                0,
                transforms,
                x => Math.Abs(x) - (x * x));
            Console.WriteLine("Stocastic hill climber solution = {0}", solution3.Solution);
            Console.WriteLine("Stocastic hill climber score = {0}", solution3.Score);
            Console.WriteLine(Environment.NewLine);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Data Loading Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            InsightMatrix iris = DataLoader.ImportFromCSV("../../../data/iris.data", ',', false, true);

            Console.WriteLine("Iris data set:");
            Console.WriteLine(iris.ToString());

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Clustering Examples");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            var clusterResults = iris.Cluster(ClusteringMethod.KMeans);
            Console.WriteLine("K-Means");
            Console.WriteLine("Distortion = {0}", clusterResults.Distortion);
            Console.WriteLine("Centroids:");
            Console.WriteLine(clusterResults.Centroids.ToString());

            Console.ReadKey();

            var clusterResults2 = iris.Cluster(ClusteringMethod.KMeans, DistanceMethod.EuclideanDistance, 3, 10);
            Console.WriteLine("K-Means (best of 10)");
            Console.WriteLine("Distortion = {0}", clusterResults.Distortion);
            Console.WriteLine("Centroids:");
            Console.WriteLine(clusterResults.Centroids.ToString());

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Linear Regression");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            Console.WriteLine("Loading regression sample data...");
            InsightMatrix data1 = DataLoader.ImportFromCSV("../../../data/ex1data1.txt", ',', false, false);

            Console.WriteLine("Training model...");
            var model1 = new LinearRegression();
            model1.Train(data1);
            Console.WriteLine("Model training complete.  Parameters:");
            Console.WriteLine(model1.Theta.ToString());
            Console.WriteLine(model1.Error.ToString());

            Console.WriteLine("Predicting output for first data point...");
            data1 = data1.RemoveColumn(data1.ColumnCount - 1);
            var prediction = model1.Predict(data1.Row(0));
            Console.WriteLine("Prediction = {0}", prediction);

            Console.ReadKey();

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Logistic Regression");
            Console.WriteLine("------------------------------");
            Console.WriteLine(Environment.NewLine);

            Console.WriteLine("Loading classification sample data...");
            InsightMatrix data2 = DataLoader.ImportFromCSV("../../../data/ex2data1.txt", ',', false, false);

            Console.WriteLine("Training model...");
            var model2 = new LogisticRegression();
            model2.Train(data2);
            Console.WriteLine("Model training complete.  Parameters:");
            Console.WriteLine(model2.Theta.ToString());
            Console.WriteLine(model2.Error.ToString());

            Console.WriteLine("Predicting output for first data point...");
            data2 = data2.RemoveColumn(data2.ColumnCount - 1);
            var classification = model2.Classify(data2.Row(0));
            Console.WriteLine("Classification = {0}", classification);

            Console.ReadKey();
        }
 /// <summary>
 /// Predicts the target for a new instance of the data using the algorithm's trained model.
 /// </summary>
 /// <param name="instance">New instance</param>
 /// <returns>Prediction</returns>
 public double Predict(InsightVector instance)
 {
     return Predict(instance.ToRowMatrix())[0];
 }
示例#18
0
 /// <summary>
 /// Creates a new matrix with the provided row inserted at the given index.
 /// </summary>
 /// <param name="index">Row index</param>
 /// <param name="vector">Row vector</param>
 /// <returns>Result matrix</returns>
 public InsightMatrix InsertRow(int index, InsightVector vector)
 {
     return new InsightMatrix(this.Data.InsertRow(index, vector.Data));
 }
示例#19
0
        /// <summary>
        /// Creates a new matrix with the provided value inserted as a row at the given index.
        /// </summary>
        /// <param name="index">Row index</param>
        /// <param name="value">Row value</param>
        /// <returns>Result matrix</returns>
        public InsightMatrix InsertRow(int index, double value)
        {
            var vector = new InsightVector(this.ColumnCount);
            for (int i = 0; i < vector.Count; i++)
            {
                vector[i] = value;
            }

            return new InsightMatrix(this.Data.InsertRow(index, vector.Data));
        }
示例#20
0
        /// <summary>
        /// Centers each column in the matrix by subtracting each value in the column by the mean.
        /// </summary>
        /// <param name="meanVector">Vector with pre-computed column means</param>
        /// <returns>Column-centered matrix</returns>
        public InsightMatrix Center(InsightVector meanVector)
        {
            var matrix = new InsightMatrix(this.Data);
            int colLength = matrix.Data.ColumnCount;

            for (int i = 0; i < colLength; i++)
            {
                int length = matrix.Data.RowCount;

                for (int j = 0; j < length; j++)
                {
                    matrix.Data[j, i] = matrix.Data[j, i] - meanVector.Data[i];
                }
            }

            return matrix;
        }
        /// <summary>
        /// Performs the K-Means clustering algorithm on the data set using the provided parameters.
        /// </summary>
        /// <param name="matrix">Input matrix</param>
        /// <param name="similarityMethod">Similarity measure used to compare instances</param>
        /// <param name="distanceMethod">Distance measure used to compare instances</param>
        /// <param name="clusters">Number of desired clusters</param>
        /// <returns>Result set that includes cluster centroids, cluster assignments, and total distortion</returns>
        private IClusteringResults PerformKMeansClustering(InsightMatrix matrix, 
            DistanceMethod? distanceMethod, int? clusters)
        {
            if (distanceMethod == null)
            {
                // Default to sum of squared error (equivalent to Euclidean distance)
                distanceMethod = DistanceMethod.EuclideanDistance;
            }

            if (clusters == null)
            {
                // Need to add some type of intelligent way to figure out a good number
                // of clusters to use based on an analysis of the data
                clusters = 3;
            }

            var assignments = new InsightVector(matrix.RowCount);
            var centroids = new InsightMatrix(clusters.Value, matrix.ColumnCount);
            var random = new Random();
            double distortion = -1;

            // Initialize means via random selection
            for (int i = 0; i < clusters; i++)
            {
                var samples = new List<int>();
                int sample = random.Next(0, matrix.RowCount - 1);

                // Make sure we don't use the same instance more than once
                while (samples.Exists(x => x == sample))
                {
                    sample = random.Next(0, matrix.RowCount - 1);
                }

                samples.Add(sample);
                centroids.SetRow(i, matrix.Row(sample));
            }

            // Keep going until convergence point is reached
            while (true)
            {
                // Re-initialize the distortion (total error)
                distortion = 0;

                // Assign each point to the nearest mean
                for (int i = 0; i < matrix.RowCount; i++)
                {
                    // Compute the proximity to each centroid to find the closest match
                    double closestProximity = -1;
                    for (int j = 0; j < clusters; j++)
                    {
                        double proximity = matrix.Row(i).DistanceFrom(centroids.Row(j), distanceMethod.Value);

                        if (j == 0)
                        {
                            closestProximity = proximity;
                            assignments[i] = j;
                        }
                        else if (proximity < closestProximity)
                        {
                            closestProximity = proximity;
                            assignments[i] = j;
                        }
                    }

                    // Add the proximity value to the total distortion for this solution
                    distortion += closestProximity;
                }

                // Calculate the new means for each centroid
                var newCentroids = new InsightMatrix(clusters.Value, matrix.ColumnCount);
                bool converged = true;

                for (int i = 0; i < clusters; i++)
                {
                    int instanceCount = assignments.Where(x => x == i).Count();

                    // Compute the means for each instance assigned to the current cluster
                    for (int j = 0; j < newCentroids.ColumnCount; j++)
                    {
                        double sum = 0;
                        for (int k = 0; k < matrix.RowCount; k++)
                        {
                            if (assignments[k] == i) sum += matrix[k, j];
                        }

                        if (instanceCount > 0)
                            newCentroids[i, j] = Math.Round(sum / instanceCount, 2);
                        else
                            newCentroids[i, j] = centroids[i, j];

                        if (newCentroids[i, j] != centroids[i, j])
                            converged = false;
                    }

                    centroids.SetRow(i, newCentroids.Row(i));
                }

                // If the new centroid means did not change then we've reached the final result
                if (converged) break;
            }

            return new ClusteringResults(centroids, assignments, distortion);
        }
 /// <summary>
 /// Trains the model using the supplied data.  Uses the default training
 /// parameters for the model.
 /// </summary>
 /// <param name="data">Training data</param>
 public void Train(InsightMatrix data)
 {
     var results = PerformLinearRegression(data, Alpha, Lambda, Iterations);
     Theta = results.Item1;
     Error = results.Item2;
 }
示例#23
0
        /// <summary>
        /// Copies the provided value into each column of the specified row.
        /// </summary>
        /// <param name="index">Row index</param>
        /// <param name="value">Row value</param>
        public void SetRow(int index, double value)
        {
            var vector = new InsightVector(this.ColumnCount);
            for (int i = 0; i < vector.Count; i++)
            {
                vector[i] = value;
            }

            this.Data.SetRow(index, vector.Data);
        }
        /// <summary>
        /// Performs linear regression on the input data.
        /// </summary>
        /// <param name="data">Training data</param>
        /// <param name="alpha">The learning rate for the algorithm</param>
        /// <param name="lambda">The regularization weight for the algorithm</param>
        /// <param name="iters">The number of training iterations to run</param>
        /// <returns>Tuple containing the parameter and error vectors</returns>
        private Tuple<InsightVector, InsightVector> PerformLinearRegression(InsightMatrix data, double alpha,
            double lambda, int iters)
        {
            // First add a ones column for the intercept term
            data = data.InsertColumn(0, 1);

            // Split the data into training data and the target variable
            var X = data.RemoveColumn(data.ColumnCount - 1);
            var y = data.Column(data.ColumnCount - 1);

            // Initialize several variables needed for the computation
            var theta = new InsightVector(X.ColumnCount);
            var temp = new InsightVector(X.ColumnCount);
            var error = new InsightVector(iters);

            // Perform gradient descent on the parameters theta
            for (int i = 0; i < iters; i++)
            {
                var delta = (X * theta.ToColumnMatrix()) - y.ToColumnMatrix();

                for (int j = 0; j < theta.Count; j++)
                {
                    var inner = delta.Multiply(X.SubMatrix(0, X.RowCount, j, 1));

                    if (j == 0)
                    {
                        temp[j] = theta[j] - ((alpha / X.RowCount) * inner.Column(0).Sum());
                    }
                    else
                    {
                        var reg = (2 * lambda) * theta[j];
                        temp[j] = theta[j] - ((alpha / X.RowCount) * inner.Column(0).Sum()) + reg;
                    }
                }

                theta = temp.Clone();
                error[i] = ComputeError(X, y, theta, lambda);
            }

            return new Tuple<InsightVector, InsightVector>(theta, error);
        }
示例#25
0
 /// <summary>
 /// Copies the values of the given vector to the specified row.
 /// </summary>
 /// <param name="index">Row index</param>
 /// <param name="vector">Row vector</param>
 public void SetRow(int index, InsightVector vector)
 {
     this.Data.SetRow(index, vector.Data);
 }
示例#26
0
        /// <summary>
        /// Sorts the rows of a matrix using the values in the designated column for comparison.
        /// </summary>
        /// <param name="columnIndex">Column to use for sorting</param>
        /// <returns>Row-sorted matrix</returns>
        public InsightMatrix Sort(int columnIndex)
        {
            var matrix = new InsightMatrix(this.Data);

            var sortKeys = Enumerable.Range(0, matrix.Data.RowCount)
                .Select(x => new
                {
                    Index = x,
                    Value = matrix.Data[x, columnIndex]
                })
                .OrderBy(x => x.Value)
                .ToList();

            var sortKeys2 = sortKeys
                .Select((x, i) => new
                {
                    NewIndex = i,
                    OldIndex = x.Index,
                })
                .OrderBy(x => x.NewIndex)
                .ToList();

            var sortKeys3 = sortKeys2
                .Select(x => x.OldIndex)
                .ToList();

            for (int i = 0; i < matrix.Data.RowCount; i++)
            {
                // Save the row at the current index
                InsightVector temp = new InsightVector(matrix.Data.Row(i));

                // Copy the row from the new index to the current index
                matrix.Data.SetRow(i, matrix.Data.Row(sortKeys3[i]));

                // Copy the saved row to the new index
                matrix.Data.SetRow(sortKeys3[i], temp.Data);

                // Update the index to show row at position i is now at sortkeys[i]
                int position = sortKeys3.IndexOf(i, i);
                sortKeys3[position] = sortKeys3[i];
            }

            return matrix;
        }
 /// <summary>
 /// Default constructor.
 /// </summary>
 /// <param name="centroids">Matrix of the centroids for each cluster</param>
 /// <param name="assignments">Cluster assignments</param>
 public ClusteringResults(InsightMatrix centroids, InsightVector assignments, double distortion)
 {
     Centroids = centroids;
     ClusterAssignments = assignments;
     Distortion = distortion;
 }