private void RunFastHCluster(string name, string dirName, string alignFile = null, DCDFile dcd = null) { DateTime cpuPart1 = DateTime.Now; ClusterOutput clustOut = null; DistanceMeasure distance = null; if (dcd == null) { distance = CreateMeasure(name, dirName, opt.hierarchical.distance, opt.hierarchical.atoms, opt.hierarchical.reference1DjuryFast, alignFile, opt.hierarchical.hammingProfile, opt.hierarchical.jury1DProfileFast); } else { distance = CreateMeasureForDCD(dcd, opt.hierarchical.distance, opt.hierarchical.atoms, opt.hierarchical.reference1DjuryFast, opt.hierarchical.alignmentFileName, opt.hierarchical.hammingProfile, opt.hierarchical.jury1DProfileFast); } FastDendrog km; km = new FastDendrog(distance, opt.hierarchical, dirName); if (beginJob != null) { beginJob(currentProcessName, km.ToString(), dirName, distance.ToString()); } progressDic.Add(name, km); distance.InitMeasure(); DateTime cpuPart2 = DateTime.Now; clType = km.ToString(); clustOut = km.Run(new List <string>(distance.structNames.Keys)); UpdateOutput(name, dirName, alignFile, clustOut, distance.ToString(), cpuPart1, cpuPart2, km); }
static public ClusterOutput LoadExternalPconsD(string fileName, string dirName) { ClusterOutput aux = new ClusterOutput(); string[] tmp = null;; string line; StreamReader r = new StreamReader(fileName); aux.clusters = new List <List <string> >(); List <KeyValuePair <string, double> > auxList = new List <KeyValuePair <string, double> >(); line = r.ReadLine(); while (!r.EndOfStream) { tmp = line.Split(' '); if (tmp.Length > 0) { KeyValuePair <string, double> auxK = new KeyValuePair <string, double>(tmp[0], Convert.ToDouble(tmp[1], System.Globalization.CultureInfo.InvariantCulture)); auxList.Add(auxK); } line = r.ReadLine(); } aux.dirName = dirName; tmp = fileName.Split('\\'); aux.name = Path.GetFileName(tmp[tmp.Length - 1]); aux.clusters = null; aux.juryLike = auxList; return(aux); }
private void UpdateOutput(string name, string dirName, string alignFile, ClusterOutput output, string distStr, DateTime cpuPart1, DateTime cpuPart2, object obj) { output.clusterType = obj.ToString(); output.measure = distStr.ToString(); DateTime cc = DateTime.Now; TimeSpan preprocess = new TimeSpan(); TimeSpan cluster = new TimeSpan(); if (cpuPart1 != null && cpuPart2 != null) { preprocess = cpuPart2.Subtract(cpuPart1); } if (cpuPart2 != null) { cluster = cc.Subtract(cpuPart2); } output.time = "Prep=" + String.Format("{0:F2}", preprocess.TotalMinutes); if (cpuPart2 != null) { output.time += " Clust=" + String.Format("{0:F2}", cluster.TotalMinutes); } output.name = name; output.dirName = dirName; output.alignFile = alignFile; output.peekMemory = Process.GetCurrentProcess().PeakWorkingSet64; Process.GetCurrentProcess().Refresh(); progressDic.Remove(name); //Process.GetCurrentProcess(). clOutput.Add(output.name, output); UpadateJobInfo(name, false, false); }
private void RunBakerCluster(string name, string dirName, string alignFile = null, DCDFile dcd = null) { DateTime cpuPart1 = DateTime.Now; ClusterOutput output = null; DistanceMeasure distance = null; if (dcd == null) { distance = CreateMeasure(name, dirName, opt.threshold.hDistance, opt.threshold.hAtoms, opt.threshold.reference1Djury, alignFile, opt.threshold.hammingProfile, null); } else { distance = CreateMeasureForDCD(dcd, opt.threshold.hDistance, opt.threshold.hAtoms, opt.threshold.reference1Djury, opt.threshold.alignmentFileName, opt.threshold.hammingProfile, null); } ThresholdCluster bk = new ThresholdCluster(distance, opt.threshold.distThresh, opt.threshold.bakerNumberofStruct); if (beginJob != null) { beginJob(currentProcessName, bk.ToString(), dirName, distance.ToString()); } progressDic.Add(name, bk); distance.InitMeasure(); DateTime cpuPart2 = DateTime.Now; clType = bk.ToString(); output = bk.OrgClustering(); UpdateOutput(name, dirName, alignFile, output, distance.ToString(), cpuPart1, cpuPart2, bk); }
void RunHTree(string name, string dirName, string alignmentFile = null, DCDFile dcd = null) { DateTime cpuPart1 = DateTime.Now; HashCluster hCluster; if (dcd != null) { hCluster = new HashCluster(dcd, opt.hash); } else if (alignmentFile != null) { hCluster = new HashCluster("", alignmentFile, opt.hash); } else { hCluster = new HashCluster(dirName, null, opt.hash); } HTree h = new HTree(dirName, alignmentFile, hCluster); beginJob(currentProcessName, h.ToString(), dirName, "HAMMING"); progressDic.Add(name, h); hCluster.InitHashCluster(); DateTime cpuPart2 = DateTime.Now; ClusterOutput output = new ClusterOutput(); output = h.RunHTree(); UpdateOutput(name, dirName, alignmentFile, output, "NONE", cpuPart1, cpuPart2, h); }
List<KeyValuePair<string,double>> CLusterRepresentJury(string dirName, List<string> targets,string profileName) { List<string> fileNames = new List<string>(targets.Count); foreach (var item in targets) fileNames.Add(dirName + Path.DirectorySeparatorChar + item); jury1D jury = new jury1D(); jury.PrepareJury(fileNames, null, profileName); ClusterOutput opt = jury.JuryOptWeights(targets); //StreamWriter kk; //if (File.Exists("cccc.txt")) // kk = File.AppendText("cccc.txt"); //else // kk = File.CreateText("cccc.txt"); //kk.WriteLine("dirNamae=" + dirName + " size=" + opt.juryLike.Count); //for (int i = 0; i < opt.juryLike.Count; i++) // kk.WriteLine(opt.juryLike[i].Key + " " + opt.juryLike[i].Value); //kk.Close(); return opt.juryLike; }
public ClusterOutput Run3DJury() { ClusterOutput output = new ClusterOutput(); List <KeyValuePair <string, double> > li = new List <KeyValuePair <string, double> >(); long[] distTab = new long[dMeasure.structNames.Count]; progressRead = 1; dMeasure.CalcDistMatrix(new List <string>(dMeasure.structNames.Keys)); maxV = dMeasure.structNames.Count + 1; for (int i = 0; i < dMeasure.structNames.Count; i++) { long sum = 0; for (int j = 0; j < dMeasure.structNames.Count; j++) { sum += dMeasure.GetDistance(i, j); } distTab[i] = sum; currentV++; } KeyValuePair <string, double> v; List <string> structKeys = new List <string>(dMeasure.structNames.Keys); for (int m = 0; m < structKeys.Count; m++) { v = new KeyValuePair <string, double>(structKeys[m], (double)(distTab[m] / (100.0 * dMeasure.structNames.Count))); li.Add(v); } if (dMeasure.order == false) { li.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); }); } else { li.Sort((firstPair, nextPair) => { return(firstPair.Value.CompareTo(nextPair.Value)); }); } output.juryLike = li; currentV = maxV; output.runParameters = "Distance measure: " + this.dMeasure; return(output); }
public void SaveOutput(string fileName) { StreamWriter w = new StreamWriter(fileName); int count = 0; foreach (var item in clOutput.Keys) { string name = fileName + count++; w.WriteLine(name); ClusterOutput.Save(name, clOutput[item]); } w.Close(); }
private void RunSift(string name, string dirName, DCDFile dcd = null) { DateTime cpuStart = DateTime.Now; ClusterOutput output = null; Sift s = new Sift(); if (beginJob != null) { beginJob(currentProcessName, s.ToString(), dirName, "NONE"); } progressDic.Add(name, s); clType = s.ToString(); output = s.RunSift(dirName); UpdateOutput(name, dirName, null, output, "Sift", cpuStart, DateTime.Now, s); }
ClusterOutput Shape() { PrepareSift(); MakeHistogram(); FindMaxMin(); field.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); }); ClusterOutput siftOut = new ClusterOutput(); siftOut.juryLike = field; currentV = maxV; return(siftOut); }
/*private List<string> RefStructFind(int k, List<string> allStruct) * { * ClusterOutput clustOut; * List <string> refStruct=new List<string>(); * Random rand=new Random(); * int [][] refDist=new int[k][]; * clustOut = jury.JuryOpt(allStruct); * int step = clustOut.juryLike.Count / k; * * * for(int i=0;i<k;i++) * //refStruct.Add(clustOut.juryLike[rand.Next(0,clustOut.juryLike.Count/4)].Key); * refStruct.Add(clustOut.juryLike[i].Key); * * return refStruct; * * int [] sum=new int[allStruct.Count]; * * * for(int n=1;n<k;n++) * { * for(int i=0;i<sum.Length;i++) * sum[i]=0; * * for(int i=0;i<refStruct.Count;i++) * { * refDist[i]=dMeasure.GetDistance(refStruct[i],allStruct); * * for(int m=0;m<sum.Length;m++) * sum[m]+=refDist[i][m]; * } * List <KeyValuePair<string,int>> li=new List<KeyValuePair<string, int>>(); * KeyValuePair <string, int> v; * * for(int m=0;m<sum.Length;m++) * { * v=new KeyValuePair<string, int>(allStruct[m],sum[m]); * li.Add(v); * } * * li.Sort((firstPair,nextPair) => * { * return nextPair.Value.CompareTo(firstPair.Value); * }); * * * refStruct.Add(li[rand.Next(0,sum.Length/3)].Key); * } * * return refStruct; * * }*/ public ClusterOutput kMeansL(int k, int maxIter, List <string> allStruct) { ClusterOutput clustOut, remClust = new ClusterOutput(); float cost, remCost = 100; for (int r = 0; r < 1; r++) { clustOut = kMeansLevel(k, maxIter, allStruct); cost = CalculateDaviesBouldinIndex(clustOut.clusters); if (remCost > cost) { remCost = cost; remClust = clustOut; } } return(remClust); }
private double CalcStandDev(ClusterOutput outC) { if (outC.juryLike != null) { double sum = 0, avr, dev = 0; for (int i = 0; i < outC.juryLike.Count; i++) { sum += outC.juryLike[i].Value; } avr = sum / outC.juryLike.Count; for (int i = 0; i < outC.juryLike.Count; i++) { double aux = avr - outC.juryLike[i].Value; dev += aux * aux; } dev /= outC.juryLike.Count; return(Math.Sqrt(dev)); } return(-1); }
static public ClusterOutput LoadExternal(string fileName, string dirName) { ClusterOutput aux = new ClusterOutput(); string line; StreamReader r = new StreamReader(fileName); aux.clusters = new List <List <string> >(); aux.dirName = dirName; while (!r.EndOfStream) { line = r.ReadLine(); if (!line.Contains("#") && line.Contains(":")) { line = line.Replace(" ", ""); line = line.Replace(" ", ""); line = line.Replace(" : ", ":"); line = line.TrimEnd(' '); string[] tmp = line.Split(' '); List <string> auxList = new List <string>(); string[] tmp2 = tmp[1].Split(':'); string[] loc = tmp2[1].Split('/'); aux.dirName = "F:\\casp10\\" + loc[loc.Length - 2]; aux.name = loc[loc.Length - 2]; //auxList.Add(loc[loc.Length - 1]+".pdb"); auxList.Add(loc[loc.Length - 1]); for (int i = 4; i < tmp.Length; i++) { loc = tmp[i].Split('/'); if (!auxList.Contains(loc[loc.Length - 1])) { auxList.Add(loc[loc.Length - 1]);//+".pdb"); } } aux.clusters.Add(auxList); } } return(aux); }
public void LoadOutput(string fileName) { ClusterOutput outP; if (File.Exists(fileName)) { clOutput.Clear(); string line; StreamReader r = new StreamReader(fileName); while (!r.EndOfStream) { line = r.ReadLine(); if (File.Exists(line)) { outP = ClusterOutput.Load(line); clOutput.Add(outP.name, outP); } } r.Close(); } }
private void RunHKMeans(string name, string dirName, string alignFile = null, DCDFile dcd = null) { DateTime cpuPart1 = DateTime.Now; ClusterOutput clustOut = null; DistanceMeasure distance = null; if (dcd == null) { distance = CreateMeasure(name, dirName, opt.hierarchical.distance, opt.hierarchical.atoms, opt.hierarchical.reference1DjuryKmeans, alignFile, opt.hierarchical.hammingProfile, opt.hierarchical.jury1DProfileKmeans); } else { distance = CreateMeasureForDCD(dcd, opt.hierarchical.distance, opt.hierarchical.atoms, opt.hierarchical.reference1DjuryKmeans, opt.hierarchical.alignmentFileName, opt.hierarchical.hammingProfile, opt.hierarchical.jury1DProfileKmeans); } kMeans km; km = new kMeans(distance, true); if (beginJob != null) { beginJob(currentProcessName, km.ToString(), dirName, distance.ToString()); } progressDic.Add(name, km); DateTime cpuPart2 = DateTime.Now; distance.InitMeasure(); clType = km.ToString(); km.BMIndex = opt.hierarchical.indexDB; km.threshold = opt.hierarchical.numberOfStruct; km.maxRepeat = opt.hierarchical.repeatTime; km.maxK = opt.hierarchical.maxK; clustOut = km.HierarchicalKMeans(); UpdateOutput(name, dirName, alignFile, clustOut, distance.ToString(), cpuPart1, cpuPart2, km); }
ClusterOutput DivideSpace1DJury(List <string> list) { ClusterOutput outC, finalOut = new ClusterOutput(); outC = jury.JuryOptWeights(list); double dev = CalcStandDev(outC); List <string> clust1 = new List <string>(); List <string> clust2 = new List <string>(); dev += dev; if (useKMeans) { clust1.Add(outC.juryLike[0].Key); clust1.Add(outC.juryLike[outC.juryLike.Count - 1].Key); kMeans km = new kMeans(dMeasure); return(km.kMeansRun(kMeansIter, list, clust1)); } else { for (int i = 0; i < outC.juryLike.Count; i++) { if (i < outC.juryLike.Count / 2) { clust1.Add(outC.juryLike[i].Key); } else { clust2.Add(outC.juryLike[i].Key); } } finalOut.clusters = new List <List <string> >(); finalOut.clusters.Add(clust1); finalOut.clusters.Add(clust2); } return(finalOut); }
public ClusterOutput HierarchicalKMeans() { HClusterNode node; maxDist = 0; currentV = 0; List <string> availStruct = new List <string>(dMeasure.structNames.Keys); hmaxV = availStruct.Count; node = MakeNodes(availStruct, 0); hcurrentV = hmaxV; node.levelDist = maxDist; node.realDist = dMeasure.GetRealValue(maxDist); AddDistance(node); ClusterOutput outClust = new ClusterOutput(); outClust.hNode = node; clusterName = "H-Kmeans"; return(outClust); }
public ClusterFileVis(ClusterOutput output) : base(output) { }
public ClusterOutput RunHTree() { HClusterNode root = null; ClusterOutput outClust = hCluster.RunHashCluster(); Dictionary <string, List <int> > clusters = hCluster.dicFinal; juryLocal.PrepareJury(hCluster.al); ClusterOutput output; List <HClusterNode> groundLevel = new List <HClusterNode>(); foreach (var item in clusters) { HClusterNode aux = new HClusterNode(); aux.parent = null; aux.joined = null; aux.setStruct = new List <string>(item.Value.Count + 1); foreach (var index in item.Value) { aux.setStruct.Add(hCluster.structNames[index]); } output = juryLocal.JuryOptWeights(aux.setStruct); aux.stateFreq = juryLocal.columns; aux.refStructure = output.juryLike[0].Key; aux.realDist = 0; aux.levelDist = 0; aux.dirName = item.Key; aux.consistency = hCluster.CalcClusterConsistency(aux.setStruct); groundLevel.Add(aux); } currentV = 20; int size = groundLevel[0].dirName.Length; double step = 80.0 / size; for (int i = 1; i < size; i++) { Dictionary <string, List <HClusterNode> > join = new Dictionary <string, List <HClusterNode> >(); foreach (var item in groundLevel) { string key = item.dirName; //key = key.Substring(0, key.Length -1); key = key.Remove(key.Length - 1); if (join.ContainsKey(key)) { join[key].Add(item); } else { List <HClusterNode> aux = new List <HClusterNode>(); aux.Add(item); join.Add(key, aux); } } groundLevel = new List <HClusterNode>(); foreach (var item in join) { HClusterNode aux = JoinNodes(item.Value); aux.levelDist = aux.levelNum = i; aux.realDist = i; aux.dirName = item.Key; groundLevel.Add(aux); } currentV += (int)step; } if (groundLevel.Count == 1) { root = groundLevel[0]; } else { root = JoinNodes(groundLevel); root.levelDist = size; root.realDist = size; } currentV = 100; ClusterOutput outHTree = new ClusterOutput(); outHTree.hNode = root; return(outHTree); }
public ClusterOutput ConsensusJury(List <string> structNames) { List <KeyValuePair <string, double> > distCons = new List <KeyValuePair <string, double> >(); byte locState; List <Dictionary <byte, int> > cons = new List <Dictionary <byte, int> >(structNames.Count); List <byte> finalCons = new List <byte>(); foreach (string name in structNames) { for (int i = 0; i < stateAlign[name].Count; i++) { cons.Add(new Dictionary <byte, int>()); } break; } foreach (string name in structNames) { if (!stateAlign.ContainsKey(name)) { continue; } for (int i = 0; i < stateAlign[name].Count; i++) { locState = stateAlign[name][i]; if (cons[i].ContainsKey(locState)) { cons[i][locState]++; } else { cons[i].Add(locState, 1); } } } foreach (var item in cons) { var items = from pair in item orderby pair.Value descending select pair; // Display results. foreach (KeyValuePair <byte, int> pair in items) { finalCons.Add(pair.Key); break; } } foreach (string name in structNames) { if (!stateAlign.ContainsKey(name)) { continue; } float dist = 0; for (int i = 0; i < stateAlign[name].Count; i++) { if (stateAlign[name][i] == finalCons[i]) { dist++; } } KeyValuePair <string, double> v = new KeyValuePair <string, double>(name, dist); distCons.Add(v); } distCons.Sort((firstPair, nextPair) => { return(firstPair.Value.CompareTo(nextPair.Value)); }); ClusterOutput juryRes = new ClusterOutput(); juryRes.juryLike = distCons; return(juryRes); }
public ClusterOutput JuryOptWeights(List <string> structNames, Dictionary <byte, int>[] locColumns = null) { res1Djury = new List <KeyValuePair <string, double> >(structNames.Count); Dictionary <byte, int>[] columns = null; List <string> aux = new List <string>(structNames); foreach (var item in structNames) { if (!stateAlign.ContainsKey(item) || stateAlign[item].Count == 0) { aux.Remove(item); } } if (locColumns == null) { columns = MakeColumns(aux); } else { columns = locColumns; this.columns = locColumns; } currentV++; if (columns == null) { return(null); } allStructures = new List <string>(aux); if (weights == null || weights.Count == 0) { return(null); } resetEvents = new ManualResetEvent[threadNumbers]; /*weights["0"]["0"] = 1; * weights["1"]["1"] = columns.Length / 10;//Do usuniecia!!!!!!!!!!!!!!! * weights["1"]["0"] = 0; * weights["0"]["1"] = 0;*/ for (int n = 0; n < threadNumbers; n++) { ThreadParam pp = new ThreadParam(); //int p = n; //int start = (int)(n * allStructures.Count / Convert.ToDouble(threadNumbers)); //int stop = (int)((n + 1) * allStructures.Count / Convert.ToDouble(threadNumbers)); pp.num = n; pp.start = (int)(n * allStructures.Count / Convert.ToDouble(threadNumbers)); pp.stop = (int)((n + 1) * allStructures.Count / Convert.ToDouble(threadNumbers)); resetEvents[n] = new ManualResetEvent(false); //ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadingScoreCalc), new object[] { p, start, stop }); ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadingScoreCalc), (object)pp); } for (int n = 0; n < threadNumbers; n++) { resetEvents[n].WaitOne(); } currentV++; res1Djury.Sort((firstPair, nextPair) => { return(nextPair.Value.CompareTo(firstPair.Value)); }); ClusterOutput juryRes = new ClusterOutput(); juryRes.runParameters = "Profile: " + this.currentProfile; juryRes.juryLike = res1Djury; currentV = maxV; return(juryRes); }
public ClusterOutput RunHashDendrogCombine() { ClusterOutput output = DendrogUsingMeasures(stateAlignKeys); return(output); }
public ClusterOutput DendrogUsingMeasures(List <string> structures) { jury1D juryLocal = new jury1D(); juryLocal.PrepareJury(al); ClusterOutput outC = null; Dictionary <string, List <int> > dic; //Console.WriteLine("Start after jury " + Process.GetCurrentProcess().PeakWorkingSet64); maxV = refPoints * 20 * 4; currentV = 0; dic = PrepareKeys(structures, false); //DebugClass.DebugOn(); // input.relClusters = input.reqClusters; // input.perData = 90; if (dic.Count > input.relClusters) { if (!input.combine) { dic = HashEntropyCombine(dic, structures, input.relClusters); } else { dic = Rpart(dic, structures, false); } //dic = FastCombineKeysNew(dic, structures, false); } Dictionary <string, int> xx = ReadLeafs(); dic = SelectClusters(xx, dic); maxV = 3; currentV = 1; //Console.WriteLine("Entropy ready after jury " + Process.GetCurrentProcess().PeakWorkingSet64); DebugClass.WriteMessage("Entropy ready"); //Alternative way to start of UQclust Tree must be finished //input.relClusters = 10000; //dic = FastCombineKeys(dic, structures, true); DebugClass.WriteMessage("dic size" + dic.Count); currentV++; //Console.WriteLine("Combine ready after jury " + Process.GetCurrentProcess().PeakWorkingSet64); DebugClass.WriteMessage("Combine Keys ready"); Dictionary <string, string> translateToCluster = new Dictionary <string, string>(dic.Count); List <string> structuresToDendrogram = new List <string>(dic.Count); List <string> structuresFullPath = new List <string>(dic.Count); DebugClass.WriteMessage("Number of clusters: " + dic.Count); int cc = 0; List <string> order = new List <string>(dic.Keys); order.Sort(delegate(string a, string b) { if (dic[b].Count == dic[a].Count) { for (int i = 0; i < a.Length; i++) { if (a[i] != b[i]) { if (a[i] == '0') { return(-1); } else { return(1); } } } } return(dic[b].Count.CompareTo(dic[a].Count)); }); foreach (var item in order) { if (dic[item].Count > 2) { List <string> cluster = new List <string>(dic[item].Count); foreach (var str in dic[item]) { cluster.Add(structures[str]); } ClusterOutput output = juryLocal.JuryOptWeights(cluster); structuresToDendrogram.Add(output.juryLike[0].Key); if (alignFile == null) { structuresFullPath.Add(dirName + Path.DirectorySeparatorChar + output.juryLike[0].Key); } else { structuresFullPath.Add(output.juryLike[0].Key); } translateToCluster.Add(output.juryLike[0].Key, item); } else { structuresToDendrogram.Add(structures[dic[item][0]]); if (alignFile == null) { structuresFullPath.Add(dirName + Path.DirectorySeparatorChar + structures[dic[item][0]]); } else { structuresFullPath.Add(structures[dic[item][0]]); } translateToCluster.Add(structures[dic[item][0]], item); } cc++; } currentV++; DebugClass.WriteMessage("Jury finished"); switch (dMeasure) { case DistanceMeasures.HAMMING: if (refJuryProfile == null || !jury1d) { throw new Exception("Sorry but for jury measure you have to define 1djury profile to find reference structure"); } else { dist = new JuryDistance(structuresFullPath, alignFile, true, profileName, refJuryProfile); } break; case DistanceMeasures.COSINE: dist = new CosineDistance(structuresFullPath, alignFile, jury1d, profileName, refJuryProfile); break; case DistanceMeasures.RMSD: dist = new Rmsd(structuresFullPath, "", jury1d, atoms, refJuryProfile); break; case DistanceMeasures.MAXSUB: dist = new MaxSub(structuresFullPath, "", jury1d, refJuryProfile); break; } // return new ClusterOutput(); DebugClass.WriteMessage("Start hierarchical"); //Console.WriteLine("Start hierarchical " + Process.GetCurrentProcess().PeakWorkingSet64); currentV = maxV; hk = new hierarchicalCluster(dist, hier, dirName); dist.InitMeasure(); //Now just add strctures to the leaves outC = hk.HierarchicalClustering(structuresToDendrogram); DebugClass.WriteMessage("Stop hierarchical"); List <HClusterNode> hLeaves = outC.hNode.GetLeaves(); foreach (var item in hLeaves) { if (translateToCluster.ContainsKey(item.setStruct[0])) { foreach (var str in dic[translateToCluster[item.setStruct[0]]]) { if (item.setStruct[0] != structures[str]) { item.setStruct.Add(structures[str]); } } item.consistency = CalcClusterConsistency(item.setStruct); } else { throw new Exception("Cannot add structure. Something is wrong"); } } outC.hNode.RedoSetStructures(); outC.runParameters = hier.GetVitalParameters(); outC.runParameters += input.GetVitalParameters(); return(outC); }
public ClusterVis(ClusterOutput output) { this.output = output; }
public ClusterOutput Run(List <string> structs) { maxProgress = 5; currentProgress = 0; if (hConcensus) { maxProgress++; consensus = new HammingConsensus(dMeasure.dirName, null, false, input.consensusProfile); progressObject = consensus; consensus.InitMeasure(); currentProgress += 1.0 / maxProgress; } jury = new jury1D(); progressObject = jury; currentProgress += 1.0 / maxProgress; progressObject = null; jury.PrepareJury(dMeasure.dirName, dMeasure.alignFile, input.jury1DProfileFast); currentProgress += 1.0 / maxProgress; ClusterOutput clOut = new ClusterOutput(); root.setStruct = structs; // if(hConcensus) // consensus.ToConsensusStates(structs); FastCluster(root); maxV = initNodesNum; while (st.Count > 0 && (leaves.Count + st.Count) < initNodesNum) { st.Sort( delegate(HClusterNode p1, HClusterNode p2) { return(p2.setStruct.Count.CompareTo(p1.setStruct.Count)); } ); HClusterNode node = st[0]; st.RemoveAt(0); FastCluster(node); currentV += leaves.Count + st.Count; } currentV = maxV; currentProgress += 1.0 / maxProgress; while (st.Count > 0) { HClusterNode node = st[0]; st.RemoveAt(0); leaves.Add(node); } MakeDendrogs(linkage); currentProgress += 1.0 / maxProgress; PrepareList(); root = ConnectDendrogs(linkage); root.levelDist = root.SearchMaxDist(); root.realDist = dMeasure.GetRealValue(root.levelDist); //CheckRefDistances(); //dendrogList = RearangeDendrogram(root); //root = ConnectDendrogs(); clOut.hNode = root; currentProgress += 1.0 / maxProgress; return(clOut); }
static public ClusterOutput LoadExternalPleiades(string fileName, string dirName) { ClusterOutput aux = new ClusterOutput(); string line; StreamReader r = new StreamReader(fileName); aux.clusters = new List <List <string> >(); List <KeyValuePair <int, int> > clustSize = new List <KeyValuePair <int, int> >(); aux.dirName = dirName; List <string> auxList = null; int clSize = 0, index = 0; while (!r.EndOfStream) { line = r.ReadLine(); if (line.Contains("Cluster")) { if (auxList != null) { aux.clusters.Add(auxList); clustSize.Add(new KeyValuePair <int, int>(clSize, index)); index++; } string[] aa = line.Split(','); aa = aa[0].Split(':'); clSize = Convert.ToInt32(aa[1], System.Globalization.CultureInfo.InvariantCulture); auxList = new List <string>(); } else if (line.Contains("/") && !line.Contains("#")) { line = line.TrimStart(' '); line = line.Replace("\t", " "); line = line.Replace(" ", " "); line = line.Replace(" ", " "); string[] tmp = line.Split(' '); string[] loc = tmp[1].Split('/'); string[] xx = Path.GetFileName(fileName).Split('_'); //aux.dirName = Path.GetDirectoryName(fileName)+Path.DirectorySeparatorChar+xx[0]; //aux.dirName = "/home/raad/work/casp10/" + loc[loc.Length - 2]; aux.name = loc[loc.Length - 2]; auxList.Add(loc[loc.Length - 1]); } } if (auxList.Count > 0) { aux.clusters.Add(auxList); clustSize.Add(new KeyValuePair <int, int>(clSize, index)); } clustSize.Sort(delegate(KeyValuePair <int, int> first, KeyValuePair <int, int> second) { return(second.Key.CompareTo(first.Key)); }); ClusterOutput final = new ClusterOutput(); final.clusters = new List <List <string> >(); final.name = aux.name; final.dirName = aux.dirName; foreach (var item in clustSize) { final.clusters.Add(aux.clusters[item.Value]); } return(final); }
public ClusterOutput kMeansRun(int iterNum, List <string> allStruct, List <string> refStruct) { ClusterOutput clustOut; int currentBest = allStruct.Count; int bestCounter = 0; bool end = false; int index = 0, changeCounter = 1000; int[][] refDist = new int[refStruct.Count][]; List <List <string> > clusters = new List <List <string> >(refStruct.Count); int[] status = new int[allStruct.Count]; Random rand = new Random(); if (refStruct == null || refStruct.Count <= 1) { return(null); } for (int j = 0; j < refStruct.Count; j++) { clusters.Add(new List <string>()); } //refStruct = SimpleRefStruct(k, allStruct); for (int i = 0; i < status.Length; i++) { status[i] = -1; } loopCounter = 0; while (!end) { for (int i = 0; i < refStruct.Count; i++) { clusters[i].Add(refStruct[i]); } refDist = dMeasure.GetDistance(refStruct, allStruct); //for(int i=0;i<refStruct.Count;i++) // refDist[i]=dMeasure.GetDistance(refStruct[i],allStruct); changeCounter = 0; for (int i = 0; i < allStruct.Count; i++) { int min = refDist[0][i]; index = 0; for (int j = 1; j < refStruct.Count; j++) { if (refDist[j][i] < min) { min = refDist[j][i]; index = j; } } if (allStruct[i].Contains(clusters[index][0])) { continue; } clusters[index].Add(allStruct[i]); if (status[i] != index) { status[i] = index; changeCounter++; } } if (currentBest <= changeCounter) { bestCounter++; } else { bestCounter = 0; currentBest = changeCounter; } if (changeCounter <= allStruct.Count / 100 || loopCounter > iterNum || bestCounter >= 5) { end = true; } else { loopCounter++; for (int i = 0; i < refStruct.Count; i++) { if (clusters[i].Count > 0) { refStruct[i] = dMeasure.GetReferenceStructure(clusters[i]); } else { refStruct[i] = allStruct[rand.Next(0, allStruct.Count)]; } clusters[i].Clear(); } currentV++; } } List <List <string> > finalClusters = new List <List <string> >(); for (int i = 0; i < clusters.Count; i++) { List <string> aux = new List <string>(); aux.Add(refStruct[i]); foreach (var item in clusters[i]) { if (item != refStruct[i]) { aux.Add(item); } } if (aux.Count >= 1) { finalClusters.Add(aux); } } clustOut = new ClusterOutput(); clustOut.clusters = finalClusters; // clustOut.clusters = clusters; currentV = maxV; return(clustOut); }
ClusterOutput DivideSpaceHamming(List <string> list) { ClusterOutput output = new ClusterOutput(); Dictionary <string, int> aux = new Dictionary <string, int>(); ClusterOutput outC = jury.JuryOptWeights(list); if (outC == null) { return(null); } List <string> clust1 = new List <string>(); List <string> clust2 = new List <string>(); //consensus.ToConsensusStates(list, jury.GetStructureStates(outC.juryLike[0].Key)); consensus.ToConsensusStates(list, outC.juryLike[0].Key); foreach (var item in list) { aux.Add(item, consensus.distanceOrdered[item]); } if (useKMeans) { clust1.Add(outC.juryLike[0].Key); var item = aux.OrderByDescending(j => j.Value); int dist = item.First().Value; foreach (var ll in item) { if (dist != ll.Value) { break; } else { clust2.Add(ll.Key); } } Dictionary <string, double> dic = new Dictionary <string, double>(); foreach (var it in outC.juryLike) { dic.Add(it.Key, it.Value); } double min = Double.MaxValue; string rem = ""; foreach (var it in clust2) { if (dic[it] < min) { min = dic[it]; rem = it; } } clust1.Add(rem); kMeans km = new kMeans(dMeasure); return(km.kMeansRun(kMeansIter, list, clust1)); } else { int i = 0; foreach (var item in aux.OrderBy(j => j.Value)) { if (i < list.Count() / 2) { clust1.Add(item.Key); } else { clust2.Add(item.Key); } i++; } output.clusters = new List <List <string> >(); output.clusters.Add(clust1); output.clusters.Add(clust2); } return(output); }
public ClusterOutput HierarchicalClustering(List <string> structures) { List <List <HClusterNode> > level = new List <List <HClusterNode> >(); List <HClusterNode> levelNodes = new List <HClusterNode>(); List <HClusterNode> rowNodes = new List <HClusterNode>(); ClusterOutput outCl = new ClusterOutput(); int levelCount = 0; bool end = false; HClusterNode node; if (structures.Count <= 1) { outCl.hNode = new HClusterNode(); outCl.hNode.setStruct = structures; outCl.hNode.refStructure = structures[0]; outCl.hNode.levelDist = 0; outCl.hNode.joined = null; return(outCl); } progressRead = 1; dMeasure.CalcDistMatrix(structures); for (int i = 0; i < structures.Count; i++) { node = new HClusterNode(); node.refStructure = structures[i]; node.joined = null; node.setStruct.Add(structures[i]); node.levelNum = levelCount; node.levelDist = dMeasure.maxSimilarity; node.realDist = dMeasure.GetRealValue(node.levelDist); levelNodes.Add(node); } maxV = levelNodes.Count + 1; level.Add(levelNodes); while (!end) { levelNodes = new List <HClusterNode>(); List <List <HClusterNode> > rowList = LevelMinimalDist(level[level.Count - 1]); if (rowList.Count > 0) { foreach (var item in rowList) { node = new HClusterNode(); node.joined = item; node.levelDist = min; node.realDist = dMeasure.GetRealValue(min); node.levelNum = level.Count; for (int m = 0; m < item.Count; m++) { node.setStruct.AddRange(item[m].setStruct); item[m].fNode = true; } node.refStructure = dMeasure.GetReferenceStructure(node.setStruct); List <string> refList = new List <string>(); foreach (var itemJoined in node.joined) { refList.Add(itemJoined.refStructure); } node.refStructure = null; if (mustRefStructure != null) { foreach (var itemRef in refList) { if (itemRef == mustRefStructure) { node.refStructure = mustRefStructure; } } } if (node.refStructure == null) { node.refStructure = dMeasure.GetReferenceStructure(node.setStruct, refList); } levelNodes.Add(node); } } if (levelNodes.Count > 0) { level.Add(levelNodes); for (int i = 0; i < level[level.Count - 2].Count; i++) { if (!level[level.Count - 2][i].fNode) { level[level.Count - 1].Add(level[level.Count - 2][i]); } } currentV = maxV - levelNodes.Count; } if (level[level.Count - 1].Count == 1) { end = true; } } outCl.hNode = level[level.Count - 1][0]; outCl.hNode.levelNum = 0; //At the end level num must be set properly Queue <HClusterNode> qq = new Queue <HClusterNode>(); HClusterNode h; for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { level[i][j].fNode = true; } } for (int i = 0; i < level.Count; i++) { for (int j = 0; j < level[i].Count; j++) { if (level[i][j].fNode) { level[i][j].levelDist = Math.Abs(level[i][j].levelDist - dMeasure.maxSimilarity); level[i][j].realDist = dMeasure.GetRealValue(level[i][j].levelDist); level[i][j].fNode = false; } } } qq.Enqueue(level[level.Count - 1][0]); while (qq.Count != 0) { h = qq.Dequeue(); if (h.joined != null) { foreach (var item in h.joined) { item.levelNum = h.levelNum + 1; qq.Enqueue(item); } } } outCl.hNode.dirName = dirName; outCl.clusters = null; outCl.juryLike = null; currentV = maxV; outCl.runParameters = hierOpt.GetVitalParameters(); return(outCl); }
public ClusterOutput OrgClustering() { int [] count; int [] index; bool end; ClusterOutput output = new ClusterOutput(); List <List <string> > clusters = new List <List <string> >(); List <string> items; pointMark = new bool[dMeasure.structNames.Count]; for (int i = 0; i < pointMark.Length; i++) { pointMark[i] = false; } progressRead = 1; dMeasure.CalcDistMatrix(new List <string>(dMeasure.structNames.Keys)); maxV = dMeasure.structNames.Count; count = new int[dMeasure.structNames.Count]; index = new int[dMeasure.structNames.Count]; end = false; while (!end) { for (int i = 0; i < pointMark.Length; i++) { count[i] = 0; if (pointMark[i]) { continue; } index[i] = i; for (int j = 0; j < pointMark.Length; j++) { if (!pointMark[j] && dMeasure.GetDistance(i, j) < threshold) { count[i]++; } } } Array.Sort <int>(index, (a, b) => count[b].CompareTo(count[a])); if (count[index[0]] < minCluster) { end = true; break; } items = CreateCluster(index[0]); if (items.Count > minCluster) { clusters.Add(items); } else { end = true; } currentV += items.Count; } output.clusters = clusters; currentV = maxV; return(output); }