Exemple #1
0
 public Cluster(int id, Cluster left, Cluster right )
 {
     Id = id;
     Children = new Cluster[] { left, right };
     Points = left.Points.Concat(right.Points);
     // maybe only need item at leaves
     Members = left.Members.Concat(right.Members).ToArray();
 }
Exemple #2
0
        /// <summary>Generates a clustering.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="linker">The linker.</param>
        /// <param name="data">(Optional) the data.</param>
        /// <returns>The clustering.</returns>
        private Cluster GenerateClustering(Matrix X, ILinker linker, object[] data = null)
        {
            // Initialize
            Linker = linker;

            var clusters = new List<Cluster>();
            var distances = new Dictionary<Tuple<int, int>, double>();

            // Create a new cluster for each data point
            for (int i = 0; i < X.Rows; i++)
                clusters.Add(new Cluster
                {
                    Id = i,
                    Points = new Vector[] { (Vector)X.Row(i) },
                    Members = data != null ? new object[] { data[i] } : new object[] { X.Row(i) }
                });

            // Set the current closest distance/pair to the first pair of clusters
            var key = new Tuple<int, int>(0, 0);
            var distance = 0.0;

            var clusterId = X.Rows;

            while (clusters.Count > 1)
            {
                var closestClusters = new Tuple<int, int>(0, 1);
                var smallestDistance = Linker.Distance(clusters[0].Points, clusters[1].Points);

                // this needs to be parallelized....
                // Loop through each of the clusters looking for the two closest
                for (int i = 0; i < clusters.Count; i++)
                {
                    for (int j = i + 1; j < clusters.Count; j++)
                    {
                        key = new Tuple<int, int>(clusters[i].Id, clusters[j].Id);

                        // Cache the distance if it hasn't been calculated yet
                        if (!distances.ContainsKey(key))
                            distances.Add(key, Linker.Distance(clusters[i].Points, clusters[j].Points));

                        // Update closest clusters and distance if necessary
                        distance = distances[key];

                        if (distance < smallestDistance)
                        {
                            smallestDistance = distance;
                            closestClusters = new Tuple<int, int>(i, j);
                        }
                    }
                }

                // order clusters by distance
                var min = System.Math.Min(closestClusters.Item1, closestClusters.Item2);
                var max = System.Math.Max(closestClusters.Item1, closestClusters.Item2);

                var newCluster = new Cluster(clusterId, clusters[min], clusters[max]);

                // Remove the merged clusters
                clusters.RemoveAt(min);
                clusters.RemoveAt(max - 1);

                // Add new cluster
                clusters.Add(newCluster);
                clusterId++;
            }

            return clusters.Single();
        }