public Dictionary <double, List <HClusterNode> > GetClustersByLevels() { Dictionary <double, List <HClusterNode> > dic = new Dictionary <double, List <HClusterNode> >(); Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (!dic.ContainsKey(current.levelDist)) { dic.Add(current.levelDist, new List <HClusterNode>()); } dic[current.levelDist].Add(current); if (current.joined != null) { foreach (var item in current.joined) { st.Push(item); } } } return(dic); }
public List <HClusterNode> RearangeDendrogram(HClusterNode rRoot, double dist) { List <HClusterNode> rList = new List <HClusterNode>(); Queue <HClusterNode> lQueue = new Queue <HClusterNode>(); lQueue.Enqueue(rRoot); while (lQueue.Count != 0) { HClusterNode h = lQueue.Dequeue(); if (h.joined != null) { for (int i = 0; i < h.joined.Count; i++) { if (h.joined[i].levelDist < dist) { rList.Add(h.joined[i]); } else if (h.joined.Count > 0) { lQueue.Enqueue(h.joined[i]); } } } } return(rList); }
private void MakeDendrogs(AglomerativeType linkage) { ClusterOutput outCl; hierarchicalCluster dendrog = new hierarchicalCluster(dMeasure, input, dirName); currentV = 0; maxV = leaves.Count + 1; double remProgress = currentProgress; for (int i = 0; i < leaves.Count; i++) { HClusterNode c = leaves[i]; dendrog.mustRefStructure = c.setStruct[0]; outCl = dendrog.HierarchicalClustering(c.setStruct); dendrogList.Add(c); c.levelDist = outCl.hNode.levelDist; c.realDist = dMeasure.GetRealValue(c.levelDist); c.refStructure = outCl.hNode.refStructure; if (outCl.hNode.joined != null) { c.joined = new List <HClusterNode>(); foreach (var item in outCl.hNode.joined) { c.joined.Add(item); } } currentV++; currentProgress = remProgress + 1.0 / maxProgress * (double)currentV / maxV; } maxV = currentV; currentProgress = remProgress; }
public Dictionary <HClusterNode, System.Drawing.Color> CutDendrog(int distThreshold) { Dictionary <HClusterNode, System.Drawing.Color> returnList = new Dictionary <HClusterNode, System.Drawing.Color>(); Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.levelDist <= distThreshold) { returnList.Add(current, System.Drawing.Color.Red); if (current.joined != null) { foreach (var item in current.joined) { if (item.levelDist == current.levelDist) { returnList.Add(item, System.Drawing.Color.Red); } } } } else if (current.joined != null) { foreach (var item in current.joined) { st.Push(item); } } } return(returnList); }
public List <HClusterNode> GetLeaves() { List <HClusterNode> listH = new List <HClusterNode>(); Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.joined != null) { foreach (var item in current.joined) { st.Push(item); } } else { listH.Add(current); } } return(listH); }
public bool IsVisible(HClusterNode node) { Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; if (this == node) { return(true); } st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.joined != null) { foreach (var item in current.joined) { if (item == node) { return(true); } st.Push(item); } } } return(false); }
public void ColorNode(string structMark, int color) { Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.joined != null) { if (current.setStruct.Contains(structMark)) { current.color = Color.Red; } else { current.color = Color.Green; } foreach (var item in current.joined) { st.Push(item); } } } }
public int SearchMaxDist() { Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; int kMax; kMax = -(int)this.levelDist; kMin = (int)this.levelDist; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.levelDist > kMax) { kMax = (int)current.levelDist; } if (current.levelDist < kMin) { kMin = (int)current.levelDist; } if (current.joined != null) { foreach (var item in current.joined) { st.Push(item); } } } return(kMax); }
public Dictionary <HClusterNode, System.Drawing.Color> MarkNodes(List <string> toMark, System.Drawing.Color color) { Dictionary <HClusterNode, System.Drawing.Color> returnList = new Dictionary <HClusterNode, System.Drawing.Color>(); Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.joined == null || current.joined.Count == 0) { foreach (var item in toMark) { if (current.setStruct.Contains(item)) { returnList.Add(current, color); break; } } } else if (current.joined != null) { foreach (var item in current.joined) { st.Push(item); } } } return(returnList); }
private void AddDistance(HClusterNode node) { if (node.joined == null) { return; } for (int i = 0; i < node.joined.Count; i++) { node.joined[i].levelDist = node.levelDist - 1; node.joined[i].realDist = dMeasure.GetRealValue(node.joined[i].levelDist); if (node.joined[i].joined != null && node.joined[i].joined.Count > 0) { AddDistance(node.joined[i]); } } }
void FastCluster(HClusterNode parent) { HClusterNode c; // if (parent.setStruct.Count > 2) // { ClusterOutput outClust; //outClust = DivideSpaceKmeans(parent.setStruct); //outClust = DivideSpace1DJury(parent.setStruct); if (hConcensus) { outClust = DivideSpaceHamming(parent.setStruct); } else { outClust = DivideSpace1DJury(parent.setStruct); } if (outClust == null || outClust.clusters.Count <= 1) { leaves.Add(parent); return; } //dist = dMeasure.GetDistance(outClust.clusters[0][0], outClust.clusters[1][0]); // if (!dMeasure.SimilarityThreshold(distThreshold,dist)) parent.joined = new List <HClusterNode>(); for (int i = 0; i < outClust.clusters.Count; i++) { // dist =(int)(dMeasure.GetDistance(outClust.clusters[0][0], root.setStruct[0])); c = new HClusterNode(); // c.levelDist = dist; c.setStruct = outClust.clusters[i]; if (c.setStruct.Count > 20) { parent.joined.Add(c); st.Add(c); } else { leaves.Add(c); } } //} }
HClusterNode JoinNodes(List <HClusterNode> nodes) { ClusterOutput output; HClusterNode aux = new HClusterNode(); aux.setStruct = new List <string>(); aux.stateFreq = nodes[0].stateFreq; if (nodes.Count > 1) { foreach (var it in nodes) { aux.setStruct.AddRange(it.setStruct); /* if (!it.Equals(nodes[0])) * { * for (int i = 0; i < aux.stateFreq.Length; i++) * foreach (var st in it.stateFreq[i]) * if (aux.stateFreq[i].ContainsKey(st.Key)) * aux.stateFreq[i][st.Key] += st.Value; * else * aux.stateFreq[i].Add(st.Key, 1); * * }*/ it.parent = aux; } for (int i = 1; i < nodes.Count; i++) { nodes[i].stateFreq = null; } //output = juryLocal.JuryOptWeights(aux.setStruct, aux.stateFreq); // aux.consistency = hCluster.CalcClusterConsistency(aux.setStruct); // aux.refStructure = output.juryLike[0].Key; aux.refStructure = nodes[0].refStructure; } else { aux.setStruct = nodes[0].setStruct; aux.stateFreq = nodes[0].stateFreq; aux.refStructure = nodes[0].refStructure; aux.consistency = nodes[0].consistency; } aux.joined = nodes; aux.parent = null; return(aux); }
public int SearchKmax(HClusterNode hNode) { int kMax = hNode.levelNum; if (hNode.joined == null) { return(kMax); } foreach (var item in hNode.joined) { if (SearchKmax(item) > kMax) { kMax = item.levelNum; } } return(kMax); }
private int LMinimalDist(List <HClusterNode> levelNodes) { int min = Int32.MaxValue; for (int i = 0; i < levelNodes.Count; i++) { HClusterNode refStruct = levelNodes[i]; for (int j = i + 1; j < levelNodes.Count; j++) { int dist = dMeasure.FindMinimalDistance(refStruct, levelNodes[j], linkageType).Key; if (min > dist) { min = dist; } } } return(min); }
public void ClearColors() { Stack <HClusterNode> st = new Stack <HClusterNode>(); HClusterNode current = null; st.Push(this); while (st.Count != 0) { current = st.Pop(); if (current.joined != null) { current.color = Color.Green; foreach (var item in current.joined) { st.Push(item); } } } }
/*private void CheckRefDistances() * { * HClusterNode current; * hierarchicalCluster dendrog = new hierarchicalCluster(dMeasure); * float dist; * Stack <HClusterNode> localSt=new Stack<HClusterNode>(); * st.Clear(); * st.Push(root); * current = root; * while (st.Count != 0) * { * current=st.Pop(); * st.Push(current.joined[0]); * st.Push(current.joined[1]); * * localSt.Push(current); * } * while(localSt.Count!=0) * { * float dist2; * * current=localSt.Pop(); * dist = dMeasure.GetDistance(current.setStruct[0], current.joined[0].setStruct[0]); * dist2 = dMeasure.GetDistance(current.setStruct[0], current.joined[1].setStruct[0]); * //current.levelDist = (dist > dist2) ? dist : dist2; * //current.levelDist = (dist + current.joined[0].levelDist + dist2 + current.joined[1].levelDist) / 2; * current.levelDist = dMeasure.GetDistance(current.joined[0].setStruct[0], current.joined[1].setStruct[0]); * * } * }*/ private HClusterNode JoinNodes(List <HClusterNode> nodes) { HClusterNode node = new HClusterNode(); node.joined = new List <HClusterNode>(); node.setStruct = new List <string>(); foreach (var item in nodes) { node.joined.Add(item); foreach (var itemN in item.setStruct) { node.setStruct.Add(itemN); } } List <string> refList = null; if (node.joined != null) { refList = new List <string>(); foreach (var item in node.joined) { refList.Add(item.refStructure); } } string refStr = dMeasure.GetReferenceStructure(node.setStruct, refList); node.refStructure = refStr; for (int i = 0; i < node.setStruct.Count; i++) { if (refStr == node.setStruct[i]) { refStr = node.setStruct[0]; node.setStruct[0] = node.setStruct[i]; node.setStruct[i] = refStr; break; } } return(node); }
public ClusterOutput HierarchicalClustering(List <string> structures) { List <List <HClusterNode> > level = new List <List <HClusterNode> >(); List <HClusterNode> levelNodes = new List <HClusterNode>(); List <HClusterNode> rowNodes = new List <HClusterNode>(); ClusterOutput outCl = new ClusterOutput(); int levelCount = 0; bool end = false; HClusterNode node; if (structures.Count <= 1) { outCl.hNode = new HClusterNode(); outCl.hNode.setStruct = structures; outCl.hNode.refStructure = structures[0]; outCl.hNode.levelDist = 0; outCl.hNode.joined = null; return(outCl); } progressRead = 1; dMeasure.CalcDistMatrix(structures); for (int i = 0; i < structures.Count; i++) { node = new HClusterNode(); node.refStructure = structures[i]; node.joined = null; node.setStruct.Add(structures[i]); node.levelNum = levelCount; node.levelDist = dMeasure.maxSimilarity; node.realDist = dMeasure.GetRealValue(node.levelDist); levelNodes.Add(node); } maxV = levelNodes.Count + 1; level.Add(levelNodes); while (!end) { levelNodes = new List <HClusterNode>(); List <List <HClusterNode> > rowList = LevelMinimalDist(level[level.Count - 1]); if (rowList.Count > 0) { foreach (var item in rowList) { node = new HClusterNode(); node.joined = item; node.levelDist = min; node.realDist = dMeasure.GetRealValue(min); node.levelNum = level.Count; for (int m = 0; m < item.Count; m++) { node.setStruct.AddRange(item[m].setStruct); item[m].fNode = true; } node.refStructure = dMeasure.GetReferenceStructure(node.setStruct); List <string> refList = new List <string>(); foreach (var itemJoined in node.joined) { refList.Add(itemJoined.refStructure); } node.refStructure = null; if (mustRefStructure != null) { foreach (var itemRef in refList) { if (itemRef == mustRefStructure) { node.refStructure = mustRefStructure; } } } if (node.refStructure == null) { node.refStructure = dMeasure.GetReferenceStructure(node.setStruct, refList); } levelNodes.Add(node); } } if (levelNodes.Count > 0) { level.Add(levelNodes); for (int i = 0; i < level[level.Count - 2].Count; i++) { if (!level[level.Count - 2][i].fNode) { level[level.Count - 1].Add(level[level.Count - 2][i]); } } currentV = maxV - levelNodes.Count; } if (level[level.Count - 1].Count == 1) { end = true; } } outCl.hNode = level[level.Count - 1][0]; outCl.hNode.levelNum = 0; //At the end level num must be set properly Queue <HClusterNode> qq = new Queue <HClusterNode>(); HClusterNode h; for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { level[i][j].fNode = true; } } for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { if (level[i][j].fNode) { level[i][j].levelDist = Math.Abs(level[i][j].levelDist - dMeasure.maxSimilarity); level[i][j].realDist = dMeasure.GetRealValue(level[i][j].levelDist); level[i][j].fNode = false; } } } qq.Enqueue(level[level.Count - 1][0]); while (qq.Count != 0) { h = qq.Dequeue(); if (h.joined != null) { foreach (var item in h.joined) { item.levelNum = h.levelNum + 1; qq.Enqueue(item); } } } outCl.hNode.dirName = dirName; outCl.clusters = null; outCl.juryLike = null; currentV = maxV; outCl.runParameters = hierOpt.GetVitalParameters(); return(outCl); }
static public void ExportToAtr(HClusterNode node, string fileName) { List <HClusterNode> leafs = node.GetLeaves(); List <List <string> > clusters = new List <List <string> >(); foreach (var cl in leafs) { clusters.Add(cl.setStruct); } string newName = Path.GetFileNameWithoutExtension(fileName) + "_leafs.dat"; StreamWriter stream = new StreamWriter(newName); Save(clusters, stream, true); stream.Close(); stream = new StreamWriter(fileName); Queue <HClusterNode> ww = new Queue <HClusterNode>(); Dictionary <HClusterNode, string> nodeName = new Dictionary <HClusterNode, string>(); Dictionary <string, int> leafNum = new Dictionary <string, int>(); int counter = 1; ww.Enqueue(node); while (ww.Count != 0) { HClusterNode aux = ww.Dequeue(); if (aux.joined != null) { foreach (var item in aux.joined) { ww.Enqueue(item); item.parent = aux; } } } foreach (var item in leafs) { //ww.Enqueue(item); leafNum.Add(item.refStructure, counter++); } ww.Enqueue(node); int nodeCounter = 0; while (ww.Count != 0) { HClusterNode aux = ww.Dequeue(); string parentNode; if (!nodeName.ContainsKey(aux)) { parentNode = "Node_" + (++nodeCounter); nodeName.Add(aux, parentNode); } else { parentNode = nodeName[aux]; } if (aux.joined != null) { List <HClusterNode> hList = aux.joined; for (int i = 0; i < hList.Count; i++) { string name1; if (nodeName.ContainsKey(hList[i])) { name1 = nodeName[hList[i]]; } else { name1 = hList[i].refStructure + "_" + leafNum[hList[i].refStructure]; if (hList[i].joined != null) { name1 = "Node_" + (++nodeCounter); } nodeName.Add(hList[i], name1); } for (int j = i + 1; j < hList.Count; j++) { string name2; if (nodeName.ContainsKey(hList[j])) { name2 = nodeName[hList[j]]; } else { name2 = hList[j].refStructure + "_" + leafNum[hList[j].refStructure]; if (hList[j].joined != null) { name2 = "Node_" + (++nodeCounter); } nodeName.Add(hList[j], name2); } stream.WriteLine(parentNode + " " + name1 + " " + name2 + " " + aux.realDist); } ww.Enqueue(aux.joined[i]); } } } stream.Close(); }
private HClusterNode ConnectDendrogs(AglomerativeType linkage) { List <Dictionary <int, int> > sim = new List <Dictionary <int, int> >(); HClusterNode rootNode; int maxV = 1000000000; int minV = maxV - 1; while (minV != maxV && dendrogList.Count > 2) { int[,] distanceM = new int[dendrogList.Count, dendrogList.Count]; minV = maxV; for (int i = 0; i < dendrogList.Count; i++) { for (int j = i + 1; j < dendrogList.Count; j++) { distanceM[i, j] = dMeasure.GetDistance(dendrogList[i].refStructure, dendrogList[j].refStructure); //distanceM[i, j] = dMeasure.FindMinimalDistance(dendrogList[i], dendrogList[j],linkage); if (distanceM[i, j] < minV) { minV = distanceM[i, j]; } } } if (minV != maxV) { sim.Clear(); for (int i = 0; i < dendrogList.Count; i++) { Dictionary <int, int> aux = new Dictionary <int, int>(); aux.Add(i, 0); for (int j = i + 1; j < dendrogList.Count; j++) { if (distanceM[i, j] == minV) { aux.Add(j, 0); } } if (aux.Keys.Count > 1) { sim.Add(aux); } } for (int i = 0; i < sim.Count; i++) { for (int j = i + 1; j < sim.Count; j++) { foreach (var item in sim[j].Keys) { if (sim[i].ContainsKey(item)) { foreach (var itemCopy in sim[j].Keys) { if (!sim[i].ContainsKey(itemCopy)) { sim[i].Add(itemCopy, 0); } } sim.RemoveAt(j); i = -1; j = sim.Count; break; } } } } List <HClusterNode> lNodes = new List <HClusterNode>(); List <int> removeList = new List <int>(); for (int n = sim.Count - 1; n >= 0; n--) { HClusterNode node = new HClusterNode(); node.joined = new List <HClusterNode>(); node.setStruct = new List <string>(); lNodes.Clear(); foreach (var item in sim[n].Keys) { if (!lNodes.Contains(dendrogList[item])) { lNodes.Add(dendrogList[item]); } } node = JoinNodes(lNodes); node.levelDist = minV; node.realDist = dMeasure.GetRealValue(minV); List <int> keys = new List <int>(sim[n].Keys); keys.Sort(); dendrogList[keys[0]] = node; for (int i = keys.Count - 1; i >= 1; i--) { if (!removeList.Contains(keys[i])) { removeList.Add(keys[i]); } // dendrogList.RemoveAt(keys[i]); } } removeList.Sort(); for (int i = removeList.Count - 1; i >= 0; i--) { dendrogList.RemoveAt(removeList[i]); } } } if (dendrogList.Count > 1) { rootNode = JoinNodes(dendrogList); } else { rootNode = dendrogList[0]; } return(rootNode); }
public ClusterOutput Run(List <string> structs) { maxProgress = 5; currentProgress = 0; if (hConcensus) { maxProgress++; consensus = new HammingConsensus(dMeasure.dirName, null, false, input.consensusProfile); progressObject = consensus; consensus.InitMeasure(); currentProgress += 1.0 / maxProgress; } jury = new jury1D(); progressObject = jury; currentProgress += 1.0 / maxProgress; progressObject = null; jury.PrepareJury(dMeasure.dirName, dMeasure.alignFile, input.jury1DProfileFast); currentProgress += 1.0 / maxProgress; ClusterOutput clOut = new ClusterOutput(); root.setStruct = structs; // if(hConcensus) // consensus.ToConsensusStates(structs); FastCluster(root); maxV = initNodesNum; while (st.Count > 0 && (leaves.Count + st.Count) < initNodesNum) { st.Sort( delegate(HClusterNode p1, HClusterNode p2) { return(p2.setStruct.Count.CompareTo(p1.setStruct.Count)); } ); HClusterNode node = st[0]; st.RemoveAt(0); FastCluster(node); currentV += leaves.Count + st.Count; } currentV = maxV; currentProgress += 1.0 / maxProgress; while (st.Count > 0) { HClusterNode node = st[0]; st.RemoveAt(0); leaves.Add(node); } MakeDendrogs(linkage); currentProgress += 1.0 / maxProgress; PrepareList(); root = ConnectDendrogs(linkage); root.levelDist = root.SearchMaxDist(); root.realDist = dMeasure.GetRealValue(root.levelDist); //CheckRefDistances(); //dendrogList = RearangeDendrogram(root); //root = ConnectDendrogs(); clOut.hNode = root; currentProgress += 1.0 / maxProgress; return(clOut); }
private HClusterNode MakeNodes(List <string> sNames, int levelNum) { HClusterNode node = new HClusterNode(); HClusterNode nodeInside; List <List <string> > clusters = null; if (sNames.Count == 0) { return(null); } node.setStruct = sNames; node.iterNum = loopCounter; node.num = ++levelNum; if (node.num > maxDist) { maxDist = node.num; } KeyValuePair <float, List <List <string> > > cl = FindClustersNumber(sNames); if (sNames.Count == dMeasure.structNames.Count) { clusters = cl.Value; } else if (cl.Key < BMIndex) { clusters = cl.Value; } else { clusters = null; } if (clusters == null) { hcurrentV += sNames.Count; } if (clusters != null && clusters.Count > 0) { for (int i = 0; i < clusters.Count; i++) { if (clusters[i].Count > threshold) { nodeInside = MakeNodes(clusters[i], levelNum); if (nodeInside != null) { if (node.joined == null) { node.joined = new List <HClusterNode>(); } node.joined.Add(nodeInside); } } else { hcurrentV += clusters[i].Count; } } avrIterNumber += levelNum; all++; } return(node); }
public ClusterOutput RunHTree() { HClusterNode root = null; ClusterOutput outClust = hCluster.RunHashCluster(); Dictionary <string, List <int> > clusters = hCluster.dicFinal; juryLocal.PrepareJury(hCluster.al); ClusterOutput output; List <HClusterNode> groundLevel = new List <HClusterNode>(); foreach (var item in clusters) { HClusterNode aux = new HClusterNode(); aux.parent = null; aux.joined = null; aux.setStruct = new List <string>(item.Value.Count + 1); foreach (var index in item.Value) { aux.setStruct.Add(hCluster.structNames[index]); } output = juryLocal.JuryOptWeights(aux.setStruct); aux.stateFreq = juryLocal.columns; aux.refStructure = output.juryLike[0].Key; aux.realDist = 0; aux.levelDist = 0; aux.dirName = item.Key; aux.consistency = hCluster.CalcClusterConsistency(aux.setStruct); groundLevel.Add(aux); } currentV = 20; int size = groundLevel[0].dirName.Length; double step = 80.0 / size; for (int i = 1; i < size; i++) { Dictionary <string, List <HClusterNode> > join = new Dictionary <string, List <HClusterNode> >(); foreach (var item in groundLevel) { string key = item.dirName; //key = key.Substring(0, key.Length -1); key = key.Remove(key.Length - 1); if (join.ContainsKey(key)) { join[key].Add(item); } else { List <HClusterNode> aux = new List <HClusterNode>(); aux.Add(item); join.Add(key, aux); } } groundLevel = new List <HClusterNode>(); foreach (var item in join) { HClusterNode aux = JoinNodes(item.Value); aux.levelDist = aux.levelNum = i; aux.realDist = i; aux.dirName = item.Key; groundLevel.Add(aux); } currentV += (int)step; } if (groundLevel.Count == 1) { root = groundLevel[0]; } else { root = JoinNodes(groundLevel); root.levelDist = size; root.realDist = size; } currentV = 100; ClusterOutput outHTree = new ClusterOutput(); outHTree.hNode = root; return(outHTree); }