/// <summary>
        /// Performs hierarchical clustering based on a matrix of distances.
        /// </summary>
        /// <param name="distMatrix">The matrix of distances. It is lower triangular, excluding the diagonal.</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(MatrixIndexer distMatrix, HierarchicalClusterLinkage linkage,
                                                     bool preserveOrder, bool periodic, int nthreads, Action <int> progress)
        {
            double avDist = CalcAverageDistance(distMatrix);

            switch (linkage)
            {
            case HierarchicalClusterLinkage.Average:
                return(preserveOrder
                                                ? AverageLinkageClusterLinear(distMatrix, periodic)
                                                : AverageLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Maximum:
                return(preserveOrder
                                                ? MaximumLinkageClusterLinear(distMatrix, periodic)
                                                : MaximumLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Single:
                return(preserveOrder
                                                ? SingleLinkageClusterLinear(distMatrix, periodic)
                                                : SingleLinkageCluster(distMatrix, nthreads, avDist));

            default:
                throw new ArgumentException();
            }
        }
        /// <summary>
        /// Performs hierarchical clustering based on a matrix of distances.
        /// </summary>
        /// <param name="distMatrix">The matrix of distances. It is lower triangular, excluding the diagonal.</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(float[,] distMatrix, HierarchicalClusterLinkage linkage,
                                                     bool preserveOrder, bool periodic, int nthreads, Action <int> progress)
        {
            double avDist = CalcAverageDistance(distMatrix);

            switch (linkage)
            {
            case HierarchicalClusterLinkage.Average:
                return(preserveOrder
                                                ? AverageLinkageClusterLinear(distMatrix, periodic)
                                                : AverageLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Maximum:
                return(preserveOrder
                                                ? MaximumLinkageClusterLinear(distMatrix, periodic)
                                                : MaximumLinkageCluster(distMatrix, nthreads, avDist));

            case HierarchicalClusterLinkage.Single:
                return(preserveOrder
                                                ? SingleLinkageClusterLinear(distMatrix, periodic)
                                                : SingleLinkageCluster(distMatrix, nthreads, avDist));

            default:
                throw new NotImplementedException($"Linkage method {linkage} not implemented");
            }
        }
        /// <summary>
        /// Performs a hierarchical clustering on the the given data matrix.
        /// </summary>
        /// <param name="data">Data matrix that is going to be clustered.</param>
        /// <param name="access">Specifies whether rows or columns are to be clustered</param>
        /// <param name="distance">Defines the distance between two elements</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(MatrixIndexer data, MatrixAccess access, IDistance distance,
			HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, Action<int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;
            if (nelements < 2){
                return new HierarchicalClusterNode[0];
            }
            float[,] distMatrix = DistanceMatrix(data, distance, access);
            return TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress);
        }
        /// <summary>
        /// Performs a hierarchical clustering on the the given data matrix.
        /// </summary>
        /// <param name="data">Data matrix that is going to be clustered.</param>
        /// <param name="access">Specifies whether rows or columns are to be clustered</param>
        /// <param name="distance">Defines the distance between two elements</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(MatrixIndexer data, MatrixAccess access, IDistance distance,
                                                     HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, Action <int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;

            if (nelements < 2)
            {
                return(new HierarchicalClusterNode[0]);
            }
            float[,] distMatrix = DistanceMatrix(data, distance, access);
            return(TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress));
        }
예제 #5
0
        public void TestKmeansClusteringPreculusteringWithDuplicateRows(
            [ValueSource(nameof(_data))] float[,] values,
            [ValueSource(nameof(_linkages))] HierarchicalClusterLinkage linkage,
            [ValueSource(nameof(_distances))] IDistance distance)
        {
            HierarchicalClustering hclust = new HierarchicalClustering();
            FloatMatrixIndexer     data   = new FloatMatrixIndexer(values);
            var clusterNodes = hclust.TreeClusterKmeans(data, MatrixAccess.Columns, distance, linkage, false, false, 1,
                                                        2, 1, 1000, i => { });

            Assert.AreEqual(3, clusterNodes.Length);
        }
        public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance,
                                                           HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts,
                                                           int maxIter, Action <int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;

            if (nelements <= nmeans)
            {
                return(TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress));
            }
            float[,] c;
            int[] inds;
            if (access == MatrixAccess.Rows)
            {
                KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds);
            }
            else
            {
                KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds);
            }
            float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows);
            HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress);
            Dictionary <int, int[]>   clusters;
            Dictionary <int, int>     singletons;

            RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons);
            HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1];
            int fill = nelements - nmeans;

            Array.Copy(nodes, 0, newNodes, fill, nodes.Length);
            int pos = 0;

            for (int i = fill; i < newNodes.Length; i++)
            {
                HierarchicalClusterNode node = newNodes[i];
                if (node.left < 0)
                {
                    node.left -= fill;
                }
                else if (singletons.ContainsKey(node.left))
                {
                    node.left = singletons[node.left];
                }
                else
                {
                    if (clusters.ContainsKey(node.left))
                    {
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos      += branch.Length;
                        node.left = -pos;
                    }
                }
                if (node.right < 0)
                {
                    node.right -= fill;
                }
                else if (singletons.ContainsKey(node.right))
                {
                    node.right = singletons[node.right];
                }
                else
                {
                    if (clusters.ContainsKey(node.right))
                    {
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos       += branch.Length;
                        node.right = -pos;
                    }
                }
            }
            return(newNodes);
        }
        /// <summary>
        /// Performs hierarchical clustering based on a matrix of distances.
        /// </summary>
        /// <param name="distMatrix">The matrix of distances. It is lower triangular, excluding the diagonal.</param>
        /// <param name="linkage">Specifies the linkage for the clustering.</param>
        /// <param name="preserveOrder"></param>
        /// <param name="periodic"></param>
        /// <param name="nthreads"></param>
        /// <param name="progress"></param>
        /// <returns>An array of cluster nodes defining the resulting tree.</returns>
        public HierarchicalClusterNode[] TreeCluster(float[,] distMatrix, HierarchicalClusterLinkage linkage,
			bool preserveOrder, bool periodic, int nthreads, Action<int> progress)
        {
            double avDist = CalcAverageDistance(distMatrix);
            switch (linkage){
                case HierarchicalClusterLinkage.Average:
                    return preserveOrder
                        ? AverageLinkageClusterLinear(distMatrix, periodic)
                        : AverageLinkageCluster(distMatrix, nthreads, avDist);
                case HierarchicalClusterLinkage.Maximum:
                    return preserveOrder
                        ? MaximumLinkageClusterLinear(distMatrix, periodic)
                        : MaximumLinkageCluster(distMatrix, nthreads, avDist);
                case HierarchicalClusterLinkage.Single:
                    return preserveOrder
                        ? SingleLinkageClusterLinear(distMatrix, periodic)
                        : SingleLinkageCluster(distMatrix, nthreads, avDist);
                default:
                    throw new NotImplementedException($"Linkage method {linkage} not implemented");
            }
        }
        public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance,
			HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts,
			int maxIter, Action<int> progress)
        {
            int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount;
            if (nelements <= nmeans){
                return TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress);
            }
            float[,] c;
            int[] inds;
            if (access == MatrixAccess.Rows){
                KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds);
            } else{
                KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds);
            }
            float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows);
            HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress);
            Dictionary<int, int[]> clusters;
            Dictionary<int, int> singletons;
            RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons);
            HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1];
            int fill = nelements - nmeans;
            Array.Copy(nodes, 0, newNodes, fill, nodes.Length);
            int pos = 0;
            for (int i = fill; i < newNodes.Length; i++){
                HierarchicalClusterNode node = newNodes[i];
                if (node.left < 0){
                    node.left -= fill;
                } else if (singletons.ContainsKey(node.left)){
                    node.left = singletons[node.left];
                } else{
                    if (clusters.ContainsKey(node.left)){
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos += branch.Length;
                        node.left = -pos;
                    }
                }
                if (node.right < 0){
                    node.right -= fill;
                } else if (singletons.ContainsKey(node.right)){
                    node.right = singletons[node.right];
                } else{
                    if (clusters.ContainsKey(node.right)){
                        HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos);
                        Array.Copy(branch, 0, newNodes, pos, branch.Length);
                        pos += branch.Length;
                        node.right = -pos;
                    }
                }
            }
            return newNodes;
        }