public override bool Equals(object obj) { if (obj == null || GetType() != obj.GetType()) { return(false); } HierarchicalClusterNode other = (HierarchicalClusterNode)obj; bool sameOrFlipped = ((left == other.left) && (right == other.right)) || ((right == other.left) && (left == other.right)); return(sameOrFlipped && (Math.Abs(distance - other.distance) < 0.0001)); }
//TODO: do this without recusions private static int CalcSizes(IList <HierarchicalClusterNode> nodes, IList <int> sizes, int i) { if (i < 0 || i >= nodes.Count) { return(0); } HierarchicalClusterNode node = nodes[i]; int leftSize = node.left >= 0 ? 1 : CalcSizes(nodes, sizes, -node.left - 1); int rightSize = node.right >= 0 ? 1 : CalcSizes(nodes, sizes, -node.right - 1); sizes[i] = leftSize + rightSize; return(leftSize + rightSize); }
/// <summary> /// Utility format for reading clustering results from R. /// </summary> /// <param name="left">first column of <code>hclust$merge</code></param> /// <param name="right">second column of <code>hclust$merge</code></param> /// <param name="distance"><code>hclust$height</code></param> /// <returns></returns> public static HierarchicalClusterNode[] FromRFormat(int[] left, int[] right, double[] distance) { int n = distance.Length; HierarchicalClusterNode[] nodes = new HierarchicalClusterNode[n]; for (int i = 0; i < n; i++) { nodes[i] = new HierarchicalClusterNode { distance = distance[i], left = left[i] < 0 ? -left[i] - 1 : -left[i], right = right[i] < 0 ? -right[i] - 1 : -right[i] }; } return(nodes); }
private static HierarchicalClusterNode[] FillTerminalBranch(IList <int> inds, int firstInd) { HierarchicalClusterNode[] result = new HierarchicalClusterNode[inds.Count - 1]; result[0] = new HierarchicalClusterNode { left = inds[0], right = inds[1] }; for (int i = 1; i < result.Length; i++) { int nodeInd = firstInd + i - 1; result[i] = new HierarchicalClusterNode { left = inds[i + 1], right = -1 - nodeInd }; } return(result); }
/// <summary> /// /// </summary> /// <param name="nodes">The cluster nodes serve as input here.</param> /// <param name="sizes"></param> /// <param name="start"></param> /// <param name="end"></param> /// <param name="itemOrder"></param> /// <param name="itemOrderInv"></param> public static void CalcTree(HierarchicalClusterNode[] nodes, out int[] sizes, out int[] start, out int[] end, out int[] itemOrder, out int[] itemOrderInv) { if (nodes == null){ sizes = null; start = null; end = null; itemOrder = null; itemOrderInv = null; return; } sizes = new int[nodes.Length]; start = new int[nodes.Length]; end = new int[nodes.Length]; itemOrder = new int[nodes.Length + 1]; int count = 0; CalcItemOrder(nodes, itemOrder, nodes.Length - 1, ref count); CalcSizes(nodes, sizes, nodes.Length - 1); CalcStartEnd(nodes, sizes, start, end, nodes.Length - 1, 0, nodes.Length + 1); itemOrderInv = InvertOrder(itemOrder); }
//TODO: do this without recusions private static void CalcStartEnd(IList <HierarchicalClusterNode> nodes, IList <int> sizes, IList <int> start, IList <int> end, int i, int s, int e) { if (i == -1) { return; } start[i] = s; end[i] = e; HierarchicalClusterNode node = nodes[i]; int size = sizes[i]; int leftSize = node.left >= 0 ? 1 : sizes[-1 - node.left]; int rightSize = size - leftSize; if (leftSize > 1) { CalcStartEnd(nodes, sizes, start, end, -1 - node.left, s, s + leftSize); } if (rightSize > 1) { CalcStartEnd(nodes, sizes, start, end, -1 - node.right, s + leftSize, e); } }
public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance, HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts, int maxIter, Action <int> progress) { int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount; if (nelements <= nmeans) { return(TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress)); } float[,] c; int[] inds; if (access == MatrixAccess.Rows) { KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds); } else { KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds); } float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows); HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress); Dictionary <int, int[]> clusters; Dictionary <int, int> singletons; RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons); HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1]; int fill = nelements - nmeans; Array.Copy(nodes, 0, newNodes, fill, nodes.Length); int pos = 0; for (int i = fill; i < newNodes.Length; i++) { HierarchicalClusterNode node = newNodes[i]; if (node.left < 0) { node.left -= fill; } else if (singletons.ContainsKey(node.left)) { node.left = singletons[node.left]; } else { if (clusters.ContainsKey(node.left)) { HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.left = -pos; } } if (node.right < 0) { node.right -= fill; } else if (singletons.ContainsKey(node.right)) { node.right = singletons[node.right]; } else { if (clusters.ContainsKey(node.right)) { HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.right = -pos; } } } return(newNodes); }
private static HierarchicalClusterNode[] FillTerminalBranch(IList<int> inds, int firstInd) { HierarchicalClusterNode[] result = new HierarchicalClusterNode[inds.Count - 1]; result[0] = new HierarchicalClusterNode{left = inds[0], right = inds[1]}; for (int i = 1; i < result.Length; i++){ int nodeInd = firstInd + i - 1; result[i] = new HierarchicalClusterNode{left = inds[i + 1], right = -1 - nodeInd}; } return result; }
public HierarchicalClusterNode[] TreeClusterKmeans(MatrixIndexer data, MatrixAccess access, IDistance distance, HierarchicalClusterLinkage linkage, bool preserveOrder, bool periodic, int nthreads, int nmeans, int restarts, int maxIter, Action<int> progress) { int nelements = (access == MatrixAccess.Rows) ? data.RowCount : data.ColumnCount; if (nelements <= nmeans){ return TreeCluster(data, access, distance, linkage, preserveOrder, periodic, nthreads, progress); } float[,] c; int[] inds; if (access == MatrixAccess.Rows){ KmeansClustering.GenerateClusters(data, nmeans, maxIter, restarts, progress, out c, out inds); } else{ KmeansClustering.GenerateClusters(data.Transpose(), nmeans, maxIter, restarts, progress, out c, out inds); } float[,] distMatrix = DistanceMatrix(new FloatMatrixIndexer(c), distance, MatrixAccess.Rows); HierarchicalClusterNode[] nodes = TreeCluster(distMatrix, linkage, preserveOrder, periodic, nthreads, progress); Dictionary<int, int[]> clusters; Dictionary<int, int> singletons; RearrangeClusters(inds, c.GetLength(0), out clusters, out singletons); HierarchicalClusterNode[] newNodes = new HierarchicalClusterNode[nelements - 1]; int fill = nelements - nmeans; Array.Copy(nodes, 0, newNodes, fill, nodes.Length); int pos = 0; for (int i = fill; i < newNodes.Length; i++){ HierarchicalClusterNode node = newNodes[i]; if (node.left < 0){ node.left -= fill; } else if (singletons.ContainsKey(node.left)){ node.left = singletons[node.left]; } else{ if (clusters.ContainsKey(node.left)){ HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.left], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.left = -pos; } } if (node.right < 0){ node.right -= fill; } else if (singletons.ContainsKey(node.right)){ node.right = singletons[node.right]; } else{ if (clusters.ContainsKey(node.right)){ HierarchicalClusterNode[] branch = FillTerminalBranch(clusters[node.right], pos); Array.Copy(branch, 0, newNodes, pos, branch.Length); pos += branch.Length; node.right = -pos; } } } return newNodes; }