/// <summary> /// Performs a hierarchical clustering on the the given data matrix. /// </summary> /// <param name="data">Data matrix that is going to be clustered.</param> /// <param name="access">Specifies whether rows or columns are to be clustered</param> /// <param name="distance">Defines the distance between two elements</param> /// <param name="linkage">Specifies the linkage for the clustering.</param> /// <param name="preserveOrder"></param> /// <param name="periodic"></param> /// <param name="nthreads"></param> /// <param name="progress"></param> /// <returns>An array of cluster nodes defining the resulting tree.</returns> public HierarchicalClusterNode[] TreeCluster(MatrixIndexer data, MatrixAccess access, IDistance distance, HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, Action<int> progress) { int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount; if (nelements < 2){ return new HierarchicalClusterNode[0]; } float[,] distMatrix = DistanceMatrix(data, distance, access); return TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress); }
/// <summary> /// Create distance matrix from <see cref="IDistance"/>. /// </summary> /// <param name="data"></param> /// <param name="distance"></param> public GenericDistanceMatrix(MatrixIndexer data, IDistance distance) { N = data.RowCount; _distances = new double[N * (N - 1) / 2]; int k = 0; for (int i = 0; i < N; i++) { var xi = data.GetRow(i); for (int j = i+1; j < N; j++) { _distances[k++] = distance.Get(xi, data.GetRow(j)); } } }
private static BaseVector GetVector(MatrixIndexer data, int index, MatrixAccess access) { return access == MatrixAccess.Rows ? data.GetRow(index) : data.GetColumn(index); }
private static float[,] DistanceMatrix(MatrixIndexer data, IDistance distance, MatrixAccess access) { int nrows = data.RowCount; int ncols = data.ColumnCount; int nelements = (access == MatrixAccess.Rows) ? nrows : ncols; float[,] result = new float[nelements, nelements]; for (int i = 0; i < nelements; i++){ for (int j = 0; j < i; j++){ result[i, j] = (float) distance.Get(GetVector(data, i, access), GetVector(data, j, access)); } } return result; }
public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance, HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts, int maxIter, Action<int> progress) { int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount; if (nelements <= nmeans){ return TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress); } float[,] c; int[] inds; if (access == MatrixAccess.Rows){ KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds); } else{ KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds); } float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows); HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress); Dictionary<int, int[]> clusters; Dictionary<int, int> singletons; RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons); HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1]; int fill = nelements - nmeans; Array.Copy(nodes, 0, newNodes, fill, nodes.Length); int pos = 0; for (int i = fill; i < newNodes.Length; i++){ HierarchicalClusterNode node = newNodes[i]; if (node.left < 0){ node.left -= fill; } else if (singletons.ContainsKey(node.left)){ node.left = singletons[node.left]; } else{ if (clusters.ContainsKey(node.left)){ HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.left = -pos; } } if (node.right < 0){ node.right -= fill; } else if (singletons.ContainsKey(node.right)){ node.right = singletons[node.right]; } else{ if (clusters.ContainsKey(node.right)){ HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.right = -pos; } } } return newNodes; }