public ClusterOutput HierarchicalClustering(List <string> structures) { List <List <HClusterNode> > level = new List <List <HClusterNode> >(); List <HClusterNode> levelNodes = new List <HClusterNode>(); List <HClusterNode> rowNodes = new List <HClusterNode>(); ClusterOutput outCl = new ClusterOutput(); int levelCount = 0; bool end = false; HClusterNode node; if (structures.Count <= 1) { outCl.hNode = new HClusterNode(); outCl.hNode.setStruct = structures; outCl.hNode.refStructure = structures[0]; outCl.hNode.levelDist = 0; outCl.hNode.joined = null; return(outCl); } progressRead = 1; dMeasure.CalcDistMatrix(structures); for (int i = 0; i < structures.Count; i++) { node = new HClusterNode(); node.refStructure = structures[i]; node.joined = null; node.setStruct.Add(structures[i]); node.levelNum = levelCount; node.levelDist = dMeasure.maxSimilarity; node.realDist = dMeasure.GetRealValue(node.levelDist); levelNodes.Add(node); } maxV = levelNodes.Count + 1; level.Add(levelNodes); while (!end) { levelNodes = new List <HClusterNode>(); List <List <HClusterNode> > rowList = LevelMinimalDist(level[level.Count - 1]); if (rowList.Count > 0) { foreach (var item in rowList) { node = new HClusterNode(); node.joined = item; node.levelDist = min; node.realDist = dMeasure.GetRealValue(min); node.levelNum = level.Count; for (int m = 0; m < item.Count; m++) { node.setStruct.AddRange(item[m].setStruct); item[m].fNode = true; } node.refStructure = dMeasure.GetReferenceStructure(node.setStruct); List <string> refList = new List <string>(); foreach (var itemJoined in node.joined) { refList.Add(itemJoined.refStructure); } node.refStructure = null; if (mustRefStructure != null) { foreach (var itemRef in refList) { if (itemRef == mustRefStructure) { node.refStructure = mustRefStructure; } } } if (node.refStructure == null) { node.refStructure = dMeasure.GetReferenceStructure(node.setStruct, refList); } levelNodes.Add(node); } } if (levelNodes.Count > 0) { level.Add(levelNodes); for (int i = 0; i < level[level.Count - 2].Count; i++) { if (!level[level.Count - 2][i].fNode) { level[level.Count - 1].Add(level[level.Count - 2][i]); } } currentV = maxV - levelNodes.Count; } if (level[level.Count - 1].Count == 1) { end = true; } } outCl.hNode = level[level.Count - 1][0]; outCl.hNode.levelNum = 0; //At the end level num must be set properly Queue <HClusterNode> qq = new Queue <HClusterNode>(); HClusterNode h; for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { level[i][j].fNode = true; } } for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { if (level[i][j].fNode) { level[i][j].levelDist = Math.Abs(level[i][j].levelDist - dMeasure.maxSimilarity); level[i][j].realDist = dMeasure.GetRealValue(level[i][j].levelDist); level[i][j].fNode = false; } } } qq.Enqueue(level[level.Count - 1][0]); while (qq.Count != 0) { h = qq.Dequeue(); if (h.joined != null) { foreach (var item in h.joined) { item.levelNum = h.levelNum + 1; qq.Enqueue(item); } } } outCl.hNode.dirName = dirName; outCl.clusters = null; outCl.juryLike = null; currentV = maxV; outCl.runParameters = hierOpt.GetVitalParameters(); return(outCl); }
public ClusterOutput DendrogUsingMeasures(List <string> structures) { jury1D juryLocal = new jury1D(); juryLocal.PrepareJury(al); ClusterOutput outC = null; Dictionary <string, List <int> > dic; //Console.WriteLine("Start after jury " + Process.GetCurrentProcess().PeakWorkingSet64); maxV = refPoints * 20 * 4; currentV = 0; dic = PrepareKeys(structures, false); //DebugClass.DebugOn(); // input.relClusters = input.reqClusters; // input.perData = 90; if (dic.Count > input.relClusters) { if (!input.combine) { dic = HashEntropyCombine(dic, structures, input.relClusters); } else { dic = Rpart(dic, structures, false); } //dic = FastCombineKeysNew(dic, structures, false); } Dictionary <string, int> xx = ReadLeafs(); dic = SelectClusters(xx, dic); maxV = 3; currentV = 1; //Console.WriteLine("Entropy ready after jury " + Process.GetCurrentProcess().PeakWorkingSet64); DebugClass.WriteMessage("Entropy ready"); //Alternative way to start of UQclust Tree must be finished //input.relClusters = 10000; //dic = FastCombineKeys(dic, structures, true); DebugClass.WriteMessage("dic size" + dic.Count); currentV++; //Console.WriteLine("Combine ready after jury " + Process.GetCurrentProcess().PeakWorkingSet64); DebugClass.WriteMessage("Combine Keys ready"); Dictionary <string, string> translateToCluster = new Dictionary <string, string>(dic.Count); List <string> structuresToDendrogram = new List <string>(dic.Count); List <string> structuresFullPath = new List <string>(dic.Count); DebugClass.WriteMessage("Number of clusters: " + dic.Count); int cc = 0; List <string> order = new List <string>(dic.Keys); order.Sort(delegate(string a, string b) { if (dic[b].Count == dic[a].Count) { for (int i = 0; i < a.Length; i++) { if (a[i] != b[i]) { if (a[i] == '0') { return(-1); } else { return(1); } } } } return(dic[b].Count.CompareTo(dic[a].Count)); }); foreach (var item in order) { if (dic[item].Count > 2) { List <string> cluster = new List <string>(dic[item].Count); foreach (var str in dic[item]) { cluster.Add(structures[str]); } ClusterOutput output = juryLocal.JuryOptWeights(cluster); structuresToDendrogram.Add(output.juryLike[0].Key); if (alignFile == null) { structuresFullPath.Add(dirName + Path.DirectorySeparatorChar + output.juryLike[0].Key); } else { structuresFullPath.Add(output.juryLike[0].Key); } translateToCluster.Add(output.juryLike[0].Key, item); } else { structuresToDendrogram.Add(structures[dic[item][0]]); if (alignFile == null) { structuresFullPath.Add(dirName + Path.DirectorySeparatorChar + structures[dic[item][0]]); } else { structuresFullPath.Add(structures[dic[item][0]]); } translateToCluster.Add(structures[dic[item][0]], item); } cc++; } currentV++; DebugClass.WriteMessage("Jury finished"); switch (dMeasure) { case DistanceMeasures.HAMMING: if (refJuryProfile == null || !jury1d) { throw new Exception("Sorry but for jury measure you have to define 1djury profile to find reference structure"); } else { dist = new JuryDistance(structuresFullPath, alignFile, true, profileName, refJuryProfile); } break; case DistanceMeasures.COSINE: dist = new CosineDistance(structuresFullPath, alignFile, jury1d, profileName, refJuryProfile); break; case DistanceMeasures.RMSD: dist = new Rmsd(structuresFullPath, "", jury1d, atoms, refJuryProfile); break; case DistanceMeasures.MAXSUB: dist = new MaxSub(structuresFullPath, "", jury1d, refJuryProfile); break; } // return new ClusterOutput(); DebugClass.WriteMessage("Start hierarchical"); //Console.WriteLine("Start hierarchical " + Process.GetCurrentProcess().PeakWorkingSet64); currentV = maxV; hk = new hierarchicalCluster(dist, hier, dirName); dist.InitMeasure(); //Now just add strctures to the leaves outC = hk.HierarchicalClustering(structuresToDendrogram); DebugClass.WriteMessage("Stop hierarchical"); List <HClusterNode> hLeaves = outC.hNode.GetLeaves(); foreach (var item in hLeaves) { if (translateToCluster.ContainsKey(item.setStruct[0])) { foreach (var str in dic[translateToCluster[item.setStruct[0]]]) { if (item.setStruct[0] != structures[str]) { item.setStruct.Add(structures[str]); } } item.consistency = CalcClusterConsistency(item.setStruct); } else { throw new Exception("Cannot add structure. Something is wrong"); } } outC.hNode.RedoSetStructures(); outC.runParameters = hier.GetVitalParameters(); outC.runParameters += input.GetVitalParameters(); return(outC); }