public Partition combineClusters(Partition partition, int minK) { int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count]; LightWeightGraph g = (LightWeightGraph)_data; // for quick reference let's make a list of which nodes are in which clusters int[] clustAssignments = new int[g.Nodes.Count()]; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId; } } // now go through each node and count its edges out to each cluster // add these edges to the connections[] matrix for (int i = 0; i < g.Nodes.Count(); i++) { int currentCluster = clustAssignments[i]; for (int e = 0; e < g.Nodes[i].Edge.Count(); e++) { int adjacentNode = g.Nodes[i].Edge[e]; int adjacentCluster = clustAssignments[adjacentNode]; connections[currentCluster, adjacentCluster]++; } } return(partition); }
//this calculates the distance list //And works private void calculateNearestNeighborGraph(KPoint.DistType distType) { points = new PointSet(pointSetFile); graphPrefix = pointSetFileShort.Substring(0, pointSetFileShort.IndexOf('.')) + "_" + distType.ToString() + "_KNN_"; //Now we set the Distance //Dista nce matrix distMatrix = points.GetDistanceMatrix(distType); distances = distMatrix.GetSortedDistanceList(); minConnectIndex = LightWeightGraph.BinSearchKNNMinConnectivity(2, points.Count - 1, points.Count, distMatrix); label1.Text = String.Format("Minimum Connectivity:({0} Neighbors)", minConnectIndex); float sum = distances.Cast <float>().Sum(); sum /= distances.Count; label4.Text = "Mean Dist:" + sum; //Set the track bars trackBar1.Minimum = 2; trackBar1.Maximum = minConnectIndex; trackBar2.Minimum = minConnectIndex; trackBar2.Maximum = points.Count - 1; trackBar1.Value = trackBar2.Value = minConnectIndex; distMin.Text = String.Format("Min:({0} Neighbors)", trackBar1.Value); distMax.Text = String.Format("Max:({0} Neighbors)", trackBar2.Value); }
public Partition GetPartition() { //Get our graph LightWeightGraph lwg = VATResult.GetAttackedGraphWithReassignment(); //Get our cluster Assignment List <List <int> > componentList = lwg.GetComponents(); //Setup our Clusters List <Cluster> clusterList = new List <Cluster>(); for (int i = 0; i < componentList.Count; i++) { Cluster c = new Cluster(i); foreach (var n in componentList[i]) { c.AddPoint(new ClusteredItem(n)); } clusterList.Add(c); } String meta = "VATClust: \nRemoved Count:" + VATResult.numNodesRemoved + "\n" + String.Join(",", VATResult.nodeRemovalOrder.GetRange(0, VATResult.numNodesRemoved)); return(new Partition(clusterList, _distanceMatrix, meta)); }
public Partition(List <Cluster> clusters, LightWeightGraph graph, String m = "") { Clusters = clusters; MetaData = m; Graph = graph; PartitionDataType = DataType.Graph; }
static void Main(string[] args) { PointSet swissPoints = new PointSet("iris.txt"); LightWeightGraph minIris = LightWeightGraph.GetMinKnnGraph(swissPoints.GetDistanceMatrix()); var map = minIris.GetEdgeIndexMap(); float[] BCEdge = NetMining.Graphs.BetweenessCentrality.BrandesBcEdges(minIris); for (int n = 0; n < minIris.NumNodes; n++) { foreach (int e in minIris.Nodes[n].Edge) { KeyValuePair <int, int> edge = new KeyValuePair <int, int>(n, e); if (map.ContainsKey(edge)) { Console.WriteLine("{0} {1} = {2}", edge.Key, edge.Value, BCEdge[map[edge]]); } } } //minSwiss.SaveGML("iris.gml"); //minSwiss.SaveGraph("iris.graph"); /* * minSwiss.SaveGML("SwissRoll.gml"); * HVATClust vClust = new HVATClust(swissPoints, 4, false, true, 1); * Partition p = vClust.GetPartition(); * p.SavePartition("swissRoll", "SwissRoll.txt", p.MetaData); * //LightWeightGraph lwg = LightWeightGraph.GetGraphFromFile("g.graph"); * * * PointSet points = new PointSet("iris.txt"); * var distMatrix = points.GetDistanceMatrix(); * * var lwg = LightWeightGraph.GetMinKnnGraph(distMatrix, 1); * lwg.IsWeighted = true; * * VAT v = new VAT(lwg); * var nlwg = v.GetAttackedGraphWithReassignment(); * List<List<int>> components = nlwg.GetComponents(); * * var dist2_0 = distMatrix.GetReducedDataSet(components[0]); * var lwg2_0 = LightWeightGraph.GetMinKnnGraph(dist2_0.Mat, 1); * bool lwg2_0C = lwg2_0.isConnected(); * lwg2_0.IsWeighted = true; * var dist2_1 = distMatrix.GetReducedDataSet(components[1]); * var lwg2_1 = LightWeightGraph.GetMinKnnGraph(dist2_1.Mat, 1); * bool lwg2_1C = lwg2_1.isConnected(); * lwg2_1.IsWeighted = true; * * VAT v2_0 = new VAT(lwg2_0); * List<List<int>> components2_0 = v2_0.GetAttackedGraphWithReassignment().GetComponents(); * VAT v2_1 = new VAT(lwg2_1); * List<List<int>> components2_1 = v2_1.GetAttackedGraphWithReassignment().GetComponents(); * */ Console.ReadKey(); }
/// <summary> /// Reads a .cluster file into a partition /// </summary> /// <param name="filename"></param> public Partition(String filename) { Clusters = new List <Cluster>(); using (StreamReader sr = new StreamReader(filename)) { String dataString = sr.ReadLine(); String dataType = dataString.Substring(0, dataString.IndexOf(' ')); String dataFileName = dataString.Substring(dataString.IndexOf(' ') + 1); String folder = filename.Substring(0, filename.LastIndexOf('\\')); //Get the DataPoints switch (dataType) { case "Points": Data = new PointSet(dataFileName); break; case "DistanceMatrix": Data = new DistanceMatrix(dataFileName); break; case "Graph": String extension = dataFileName.Substring(dataFileName.LastIndexOf('.') + 1); if (extension == "gml") { Data = LightWeightGraph.GetGraphFromGML(dataFileName); } else if (extension == "graph") { Data = LightWeightGraph.GetGraphFromFile(dataFileName); } break; default: throw new InvalidDataException("dataType"); } //Parse the Clusters String line = sr.ReadLine(); int numClusters = int.Parse(line.Split(' ')[1]); for (int i = 0; i < numClusters; i++) { Cluster C = new Cluster(i); int numItems = int.Parse(sr.ReadLine()); line = sr.ReadLine(); String[] split = line.Split(' '); for (int k = 0; k < numItems; k++) { int pointIndex = int.Parse(split[k]); C.AddPoint(new ClusteredItem(pointIndex)); } Clusters.Add(C); } } }
public VATClust(DistanceMatrix distanceMatrix, Boolean useWeights, Boolean knnGraph, int knnOffset = 0, float alpha = 1.0f, float beta = 0.0f) { _distanceMatrix = distanceMatrix; UseWeights = useWeights; KNNGraph = knnGraph; _alpha = alpha; _beta = beta; //Now compute a graph LightWeightGraph lwg = LightWeightGraph.GetMinKnnGraph(_distanceMatrix, knnOffset); lwg.IsWeighted = UseWeights; //Run VAT on it VATResult = new VAT(lwg, _alpha, _beta); }
//This Will set the trackbars on the CAST tab private void button16_Click(object sender, EventArgs e) { if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK) { trackBar3.Enabled = false; trackBar4.Enabled = false; button17.Enabled = false; textBox6.Text = openFileDialog1.FileName; textBox4.Text = openFileDialog1.FileName; textBox3.Text = openFileDialog1.FileName; textBox2.Text = openFileDialog1.FileName; textBox1.Text = openFileDialog1.FileName; pointSetFile = textBox6.Text; pointSetFileShort = openFileDialog1.SafeFileName; points = new PointSet(pointSetFile); //Now we set the Distance //Distance matrix distMatrix = points.GetDistanceMatrix(); distances = distMatrix.GetSortedDistanceList(); //Find minimum Connectivity (can make this binary search) minConnectIndex = LightWeightGraph.BinSearchGeoMinConnectivity(0, distances.Count - 1, distMatrix.Count, distMatrix, distances); label25.Text = String.Format("Minimum Connectivity:({0})={1}", minConnectIndex, distances[minConnectIndex]); float sum = distances.Sum(); sum /= distances.Count; label22.Text = "Mean Dist:" + sum; //Set the track bars trackBar4.Minimum = 0; trackBar4.Maximum = minConnectIndex; trackBar3.Minimum = minConnectIndex; trackBar3.Maximum = distances.Count - 1; trackBar4.Value = trackBar3.Value = minConnectIndex; label24.Text = String.Format("Min:({0}) {1}", trackBar4.Value, distances[trackBar4.Value]); label23.Text = String.Format("Max:({0}) {1}", trackBar3.Value, distances[trackBar3.Value]); trackBar3.Enabled = true; trackBar4.Enabled = true; button17.Enabled = true; } }
private static List <DataOutStruct> CombineOuts(LightWeightGraph healthy, LightWeightGraph infected, List <DataOutStruct> outData) { List <DataOutStruct> finalOut = new List <DataOutStruct>(); List <DataOutStruct> healthyNodes = new List <DataOutStruct>(); List <DataOutStruct> infectedNodes = new List <DataOutStruct>(); foreach (var node in healthy.Nodes) { DataOutStruct singleNode = new DataOutStruct { GroupNum = "0", Bacteria = node.sharedName, ClusterType = "" }; healthyNodes.Add(singleNode); } foreach (var node in infected.Nodes) { DataOutStruct singleNode = new DataOutStruct { GroupNum = "0", Bacteria = node.sharedName, ClusterType = "" }; infectedNodes.Add(singleNode); } foreach (DataOutStruct bacteria in outData) { foreach (DataOutStruct healthyNode in healthyNodes) { if (bacteria.Bacteria.Equals(healthyNode.Bacteria)) { healthyNode.GroupNum = "1"; } } foreach (DataOutStruct infectedNode in infectedNodes) { if (bacteria.Bacteria.Equals(infectedNode.Bacteria)) { infectedNode.GroupNum = "1"; } } } finalOut = healthyNodes.Union(infectedNodes).Distinct().OrderByDescending(x => x.GroupNum).ThenBy(x => x.Bacteria).ToList(); return(finalOut); }
public static Partition GetPartition(LightWeightGraph lwg) { //Get our cluster Assignment List <List <int> > componentList = lwg.GetComponents(); //Setup our Clusters List <Cluster> clusterList = new List <Cluster>(); for (int i = 0; i < componentList.Count; i++) { Cluster c = new Cluster(i); foreach (var n in componentList[i]) { c.AddPoint(new ClusteredItem(lwg[n].Label)); } clusterList.Add(c); } return(new Partition(clusterList, lwg)); }
private static GeneralCluster ReturnClusterAndPartition(LightWeightGraph healthy, LightWeightGraph infected, int healthyClusters, int infectedClusters, String healthyfile, String infectedfile) { String workingDir = Directory.GetCurrentDirectory(); GeneralCluster cluster = new GeneralCluster(); cluster.Int0.Cluster = new HIntegrityClust(healthy, healthyClusters + 1, false, 1, 0, false, false); cluster.Int0.Partition = cluster.Int0.Cluster.GetPartition(); cluster.Int0.Partition.SavePartition($"{workingDir}/Data/{healthyfile}_INT.cluster", $"{workingDir}/{healthyfile}.graph"); cluster.Int1.Cluster = new HIntegrityClust(infected, infectedClusters + 1, false, 1, 0, false, false); cluster.Int1.Partition = cluster.Int1.Cluster.GetPartition(); cluster.Int1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_INT.cluster", $"{workingDir}/{infectedfile}.graph"); cluster.HealthyIntCount = new int[cluster.Int0.Partition.DataCount]; cluster.InfectedIntCount = new int[cluster.Int1.Partition.DataCount]; cluster.Ten0.Cluster = new HTenacityClust(healthy, healthyClusters + 1, false, 1, 0, false, false); cluster.Ten0.Partition = cluster.Ten0.Cluster.GetPartition(); cluster.Ten0.Partition.SavePartition($"{workingDir}/Data/{healthyfile}_TEN.cluster", $"{workingDir}/{healthyfile}.graph"); cluster.Ten1.Cluster = new HTenacityClust(infected, infectedClusters + 1, false, 1, 0, false, false); cluster.Ten1.Partition = cluster.Ten1.Cluster.GetPartition(); cluster.Ten1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_TEN.cluster", $"{workingDir}/{infectedfile}.graph"); cluster.HealthyTenCount = new int[cluster.Ten0.Partition.DataCount]; cluster.InfectedTenCount = new int[cluster.Ten1.Partition.DataCount]; cluster.Vat0.Cluster = new HVATClust(healthy, healthyClusters + 1, false, 1, 0, false, false); cluster.Vat0.Partition = cluster.Vat0.Cluster.GetPartition(); cluster.Vat0.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_VAT.cluster", $"{workingDir}/{infectedfile}.graph"); cluster.Vat1.Cluster = new HVATClust(infected, infectedClusters + 1, false, 1, 0, false, false); cluster.Vat1.Partition = cluster.Vat1.Cluster.GetPartition(); cluster.Vat1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_VAT.cluster", $"{workingDir}/{infectedfile}.graph"); cluster.HealthyVatCount = new int[cluster.Vat0.Partition.DataCount]; cluster.InfectedVatCount = new int[cluster.Vat1.Partition.DataCount]; return(cluster); }
//This Generates Graphs when clicked private void button2_Click(object sender, EventArgs e) { int numGraphs = ((int)trackBar2.Value - (int)trackBar1.Value) / (int)numericUpDown1.Value; //sanity check on gui if (numGraphs > 20 && MessageBox.Show("Are you sure you want to generate " + numGraphs + " different graphs?", "Generate Graphs", MessageBoxButtons.YesNo) == System.Windows.Forms.DialogResult.No) { return; } for (int i = trackBar1.Value; i <= trackBar2.Value; i += (int)numericUpDown1.Value) { LightWeightGraph lwg = null; if (embeddingComboBox.SelectedIndex == 0) { lwg = LightWeightGraph.GetGeometricGraph(distMatrix, distances[i]); } else if (embeddingComboBox.SelectedIndex == 1) { lwg = LightWeightGraph.GetKNNGraph(distMatrix, i); } else { lwg = LightWeightGraph.GetStackedMST(distMatrix, (int)trackBar1.Value); } //Save GML String folder = pointSetFile.Substring(0, pointSetFile.LastIndexOf('\\')); lwg.SaveGML(folder + "\\" + graphPrefix + i + ".gml"); openFileDialog2.InitialDirectory = folder; //Save Graph format lwg.SaveGraph(folder + "\\" + graphPrefix + i + ".graph"); } MessageBox.Show("Graphs have been Generated!"); }
//this calculates the distance list private void CalculateMinConnectivityGeoGraph(KPoint.DistType distType) { points = new PointSet(pointSetFile); graphPrefix = pointSetFileShort.Substring(0, pointSetFileShort.IndexOf('.')) + "_" + distType.ToString() + "_"; //Now we set the Distance //Distance matrix distMatrix = points.GetDistanceMatrix(distType); distances = distMatrix.GetSortedDistanceList(); //Find minimum Connectivity (can make this binary search) int pointCount = points.Count; minConnectIndex = LightWeightGraph.BinSearchGeoMinConnectivity(0, distances.Count - 1, pointCount, distMatrix, distances); label1.Text = String.Format("Minimum Connectivity:({0})={1}", minConnectIndex, distances[minConnectIndex]); float sum = 0; foreach (float dist in distances) { sum += dist; } sum /= distances.Count; label4.Text = "Mean Dist:" + sum; //Set the track bars trackBar1.Minimum = 0; trackBar1.Maximum = minConnectIndex; trackBar2.Minimum = minConnectIndex; trackBar2.Maximum = distances.Count - 1; trackBar1.Value = trackBar2.Value = minConnectIndex; distMin.Text = String.Format("Min:({0}) {1}", trackBar1.Value, distances[trackBar1.Value]); distMax.Text = String.Format("Max:({0}) {1}", trackBar2.Value, distances[trackBar2.Value]); }
static void Main(string[] args) { if (args.Length != 4) { Console.WriteLine( "Usage: Program.cs <Healthyfile> <Infectedfile> <Outputfile> <Group> "); Environment.Exit(0); } // AUTOMATING IBD // We need both a healthy network and an IBD network // COMMAND LINE: clusteringanalysis.exe healthyNet infectedNet VATorINTorTEN //convert from gml to graph String healthyfile = BackSlashRemover(args[0]); String infectedfile = BackSlashRemover(args[1]); String healthyFileName = ""; String infectedFileName = ""; String workingDir = Directory.GetCurrentDirectory(); String datapath = workingDir + "/Data"; datapath = BackSlashRemover(datapath); if (!Directory.Exists(datapath)) { Directory.CreateDirectory(datapath); } String outPath = BackSlashRemover(args[2]); if (outPath.Split('/').Length == 1) { outPath = $"{workingDir}/{outPath}"; } if (healthyfile.Contains("/")) { healthyFileName = healthyfile.Split('/').Last().Split('.').First(); } if (infectedfile.Contains("/")) { infectedFileName = infectedfile.Split('/').Last().Split('.').First(); } LightWeightGraph healthy = LightWeightGraph.GetGraphFromGML(healthyfile); LightWeightGraph infected = LightWeightGraph.GetGraphFromGML(infectedfile); healthy.SaveGraph($"{datapath}/{healthyFileName}.graph"); infected.SaveGraph($"{datapath}/{infectedFileName}.graph"); // Makes a list of what the nodes reference using (StreamWriter sw = new StreamWriter($"{datapath}/{healthyFileName}.txt", true)) { for (int i = 0; i < healthy.Nodes.Length; i++) { sw.WriteLine(healthy.Nodes[i].sharedName); } } using (StreamWriter sw = new StreamWriter($"{datapath}/{infectedFileName}.txt", true)) { for (int i = 0; i < infected.Nodes.Length; i++) { sw.WriteLine(infected.Nodes[i].sharedName); } } //we don't actually know the number of clusters in each graph - we want to cluster for 1 more than we start with //so cluster for 1 just to get the file. //HVATClust clust1 = new HVATClust(lwg2, K, useweights, 1, 0, reassign, hillclimb); HVATClust healthyClust1 = new HVATClust(healthy, 1, false, 1, 0, false, false); Partition t1 = healthyClust1.GetPartition(); int healthyClusters = t1.Clusters.Count; HVATClust infectedClust1 = new HVATClust(infected, 1, false, 1, 0, false, false); Partition t2 = infectedClust1.GetPartition(); int infectedClusters = t2.Clusters.Count; // Now we know the intital number of clusters, do the actual clustering //HVATClust clust1 = new HVATClust(lwg2, K, useweights, 1, 0, reassign, hillclimb); // This sees if the input cluster type can be parsed as the Enum, and if so // Uses a switch statement to decide which clustering to run. if (args.Length == 4) { List <DataOutStruct> outData = ConstructList(args[3], healthy, infected, healthyFileName, infectedFileName, healthyClusters, infectedClusters); outData = CombineOuts(infected, healthy, outData); using (StreamWriter sw = new StreamWriter(outPath)) { for (int i = 0; i < outData.Count(); i++) { sw.WriteLine($"{outData[i].Bacteria} {outData[i].GroupNum}"); } } Console.WriteLine("Done."); Console.WriteLine($"Output in: {outPath}"); } else { Console.WriteLine( "Please enter 3 parameters, in this order:\n " + "Healthy data path(.gml)\n " + "Unhealthy data path(.gml)\n " + "Desired Output Group(listed in Readme)\n"); } }
/// <summary> /// combineClusters is used when the partitioning achieved has too many clusters. /// </summary> /// <param name="partition">A partitioning of a graph with any number of clusters</param> /// <param name="minK">The desired number of clusters</param> /// <returns>A new partitioning with the desired number of clusters</returns> public Partition combineClusters(Partition partition, int minK) { // we want to do (partition.Clusters.count - minK) merges int startPartitions = partition.Clusters.Count; for (int numMerges = 0; numMerges < startPartitions - minK; numMerges++) { int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count]; LightWeightGraph g = (LightWeightGraph)_data; // for quick reference let's make a list of which nodes are in which clusters int[] clustAssignments = new int[g.Nodes.Count()]; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId; } } // now go through each node and count its edges out to each cluster // add these edges to the connections[] matrix for (int i = 0; i < g.Nodes.Count(); i++) { int currentCluster = clustAssignments[i]; for (int e = 0; e < g.Nodes[i].Edge.Count(); e++) { int adjacentNode = g.Nodes[i].Edge[e]; int adjacentCluster = clustAssignments[adjacentNode]; connections[currentCluster, adjacentCluster]++; } } // keep a list of which partitions will be merged // List<int> merges = new List<int>(); // find the largest connections[i,j] and merge clusters i and j int largestI = 0; int largestJ = 0; double largestValue = 0; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters.Count; j++) { if (j <= i) { continue; } int sizeI = partition.Clusters[i].Points.Count; int sizeJ = partition.Clusters[j].Points.Count; double score = ((double)connections[i, j]) / (sizeI * sizeJ); //double score = connections[i, j]; if (score > largestValue) { largestValue = score; largestI = i; largestJ = j; } // we want to merge smaller into larger clusters if (sizeI > sizeJ) { int temp = largestI; largestI = largestJ; largestJ = temp; } } } // if everything's zero, there is no hope ;-) if (largestValue == 0) { continue; } // now we want to merge cluster largestJ into cluster largestI, // remove cluster largestJ, and renumber all clusters after the first // adds the points of the second cluster to the first cluster for (int i = 0; i < partition.Clusters[largestJ].Points.Count; i++) { partition.Clusters[largestI].Points.Add(partition.Clusters[largestJ].Points[i]); } // remove largestJ cluster partition.Clusters.RemoveAt(largestJ); // renumber the clusters for (int i = 0; i < partition.Clusters.Count; i++) { partition.Clusters[i].Points.Sort(); for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { partition.Clusters[i].Points[j].ClusterId = i; } } } return(partition); }
/// <summary> /// GetGPartition is different from GetPartition in 2 ways: /// 1. It does not require a connected graph. /// 2. If there are too many clusters, it combines them such that the desired number of clusters is returned /// </summary> /// <returns>A partitioning of the graph</returns> public Partition GetGPartition() { DistanceMatrix mat = null; if (_data.Type == AbstractDataset.DataType.DistanceMatrix) { mat = (DistanceMatrix)_data; } else if (_data.Type == AbstractDataset.DataType.PointSet) { mat = ((PointSet)_data).GetDistanceMatrix(); } //get the actual partition (if graph not necessarily connected) Partition partition = Partition.GetPartition((LightWeightGraph)_data); //Dictionary to hold VAT var vatMap = new Dictionary <int, VatABC>(); //Dictionary to hold subset array var subsetMap = new Dictionary <int, int[]>(); while (partition.Clusters.Count < _minK) //while (clusterList.Count < _minK) { Console.WriteLine("Count = " + partition.Clusters.Count); Console.WriteLine("mink = " + _minK); //Calculate the VAT for all values foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId))) { //We must calculate a graph for this subset of data List <int> clusterSubset = c.Points.Select(p => p.Id).ToList(); //Now calculate Vat LightWeightGraph lwg; if (_data.Type == AbstractDataset.DataType.Graph) { bool[] exclusion = new bool[_data.Count]; for (int i = 0; i < _data.Count; i++) { exclusion[i] = true; } foreach (var p in c.Points) { exclusion[p.Id] = false; } lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion); } else //Distance matrix or Pointset { Debug.Assert(mat != null, "mat != null"); var subMatrix = mat.GetReducedDataSet(clusterSubset); //Generate our graph lwg = _graphGen.GenerateGraph(subMatrix.Mat); } subsetMap.Add(c.ClusterId, clusterSubset.ToArray()); lwg.IsWeighted = _weighted; VatABC v = new VatABC(lwg, _M, _k, _reassignNodes, _alpha, _beta); _vatNodeRemovalOrder = v.NodeRemovalOrder; _vatNumNodesRemoved = v.NumNodesRemoved; //if (_hillClimb) // v.HillClimb(); ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta); vatMap.Add(c.ClusterId, v); Console.WriteLine("Calculated Vat for cluster " + c.ClusterId); } meta.AppendLine("All calculated VATs:"); //Now find the minimum vat value int minVatCluster = 0; double minVatValue = double.MaxValue; foreach (var c in vatMap) { meta.Append(String.Format("{0} ", c.Value.MinVat)); if (c.Value.MinVat < minVatValue) { minVatCluster = c.Key; minVatValue = c.Value.MinVat; } } meta.AppendLine(); //now merge the partition into the cluster var minVAT = vatMap[minVatCluster]; var subPartition = minVAT.GetPartition(); var nodeIndexMap = subsetMap[minVatCluster]; meta.AppendFormat("Vat: MinVat={0}\r\n", minVAT.MinVat); meta.AppendFormat("Removed Count:{0}\r\n", minVAT.NumNodesRemoved); meta.AppendLine(String.Join(",", minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c]))); partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster); vatMap.Remove(minVatCluster); subsetMap.Remove(minVatCluster); Console.WriteLine("Found min cluster"); Console.WriteLine(meta); } partition.MetaData = meta.ToString(); // The idea is now that we have partitions, combine them so that partition.Clusters.Count == minK if (partition.Clusters.Count > _minK) { combineClusters(partition, _minK); } return(partition); }
//public HVatABCClust(AbstractDataset data, int minK, IPointGraphGenerator graphGen, bool weighted = true, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true) // : this(minK, M, K, weighted, graphGen, alpha, beta, reassignNodes, hillClimb) // { // _data = data; // } public HVatABCClust(LightWeightGraph data, int minK, int myM, int myK, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true) : this(minK, weighted, myM, myK, null, alpha, beta, reassignNodes, hillClimb) { _data = data; }
public static void combineClusters(String saveLocation, String clusterfileName, int minK) { //get the Partion file Partition partition = new Partition(saveLocation + clusterfileName + ".cluster"); // we want to do (partition.Clusters.count - minK) merges int startPartitions = partition.Clusters.Count; LightWeightGraph g = (LightWeightGraph)partition.Data; //get the name of the graph file from the partition file String graphFile = ""; using (StreamReader sr = new StreamReader(saveLocation + clusterfileName + ".cluster")) { String dataString = sr.ReadLine(); graphFile = dataString.Substring(6); } for (int numMerges = 0; numMerges < startPartitions - minK; numMerges++) { int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count]; // for quick reference let's make a list of which nodes are in which clusters int[] clustAssignments = new int[g.Nodes.Count()]; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId; } } // now go through each node and count its edges out to each cluster // add these edges to the connections[] matrix for (int i = 0; i < g.Nodes.Count(); i++) { int currentCluster = clustAssignments[i]; for (int e = 0; e < g.Nodes[i].Edge.Count(); e++) { int adjacentNode = g.Nodes[i].Edge[e]; int adjacentCluster = clustAssignments[adjacentNode]; connections[currentCluster, adjacentCluster]++; } } // keep a list of which partitions will be merged // List<int> merges = new List<int>(); // find the largest connections[i,j] and merge clusters i and j int largestI = 0; int largestJ = 0; double largestValue = 0; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters.Count; j++) { if (j <= i) { continue; } int sizeI = partition.Clusters[i].Points.Count; int sizeJ = partition.Clusters[j].Points.Count; double score = ((double)connections[i, j]) / (sizeI * sizeJ); //double score = connections[i, j]; //if (sizeI > 40 || sizeJ > 40) score = 0; if (score > largestValue) { largestValue = score; largestI = i; largestJ = j; } // we want to merge smaller into larger clusters if (sizeI > sizeJ) { int temp = largestI; largestI = largestJ; largestJ = temp; } } } // if everything's zero, there is no hope ;-) if (largestValue == 0) { continue; } // now we want to merge cluster largestJ into cluster largestI, // remove cluster largestJ, and renumber all clusters after the first // adds the points of the second cluster to the first cluster for (int i = 0; i < partition.Clusters[largestJ].Points.Count; i++) { partition.Clusters[largestI].Points.Add(partition.Clusters[largestJ].Points[i]); } // remove largestJ cluster partition.Clusters.RemoveAt(largestJ); // renumber the clusters for (int i = 0; i < partition.Clusters.Count; i++) { partition.Clusters[i].Points.Sort(); for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { partition.Clusters[i].Points[j].ClusterId = i; } } } partition.SavePartition(saveLocation + clusterfileName + minK + ".cluster", graphFile); }
public HyperVATClust(List <List <int> > overlaps, LightWeightGraph data, int k, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true) : this(overlaps, k, weighted, null, alpha, beta, reassignNodes, hillClimb) { _data = data; }
private static List <DataOutStruct> ConstructList(string args, LightWeightGraph healthy, LightWeightGraph infected, String healthyfile, String infectedfile, int healthyClusters, int infectedClusters) { List <List <DataOutStruct> > outList = new List <List <DataOutStruct> >(); List <DataOutStruct> dataOut = new List <DataOutStruct>(); if (Enum.TryParse <ClusterType>(args, ignoreCase: true, result: out var userOut)) { GeneralCluster cluster = ReturnClusterAndPartition(healthy, infected, healthyClusters, infectedClusters, healthyfile, infectedfile); List <DataOutStruct> d1; List <DataOutStruct> d2; List <DataOutStruct> d3; switch (userOut) { case ClusterType.G1I: dataOut = G1(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList)); break; case ClusterType.G1T: dataOut = G1(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); break; case ClusterType.G1V: dataOut = G1(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); break; case ClusterType.G2I: dataOut = G2( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); break; case ClusterType.G2T: dataOut = G2( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); break; case ClusterType.G2V: dataOut = G2( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); break; case ClusterType.G3I: dataOut = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); break; case ClusterType.G3T: dataOut = G3( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); break; case ClusterType.G3V: dataOut = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); break; case ClusterType.G4I: dataOut = G4(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList)); break; case ClusterType.G4T: dataOut = G4(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); break; case ClusterType.G4V: dataOut = G4(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); break; ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// // start of G13 - G25 // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// case ClusterType.G13: d1 = G1(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); d2 = G2( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G2( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G14: d1 = G1(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); d2 = G2( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); d3 = G2( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G15: d1 = G1(GroupInitializer(cluster.Ten0.Partition, cluster.Ten0.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); d2 = G2( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); d3 = G2( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G16: d1 = G1(GroupInitializer(cluster.Int0.Partition, cluster.Int0.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList)); d2 = G2( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G2( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G17: d1 = G1(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G18: d1 = G1(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G19: d1 = G1(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G20: d1 = G1(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G21: d1 = G4(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G22: d1 = G4(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G23: d1 = G4(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); d2 = G3( GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G24: d1 = G4(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList)); d2 = G3( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; case ClusterType.G25: d1 = G4(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList)); d2 = G3( GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten); d3 = G3( GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat); dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList(); break; } } return(dataOut); }
public HIntegrityClust(LightWeightGraph data, int k, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true) : this(k, weighted, null, alpha, beta, reassignNodes, hillClimb) { _data = data; }
/// <summary> /// combineClusters is used when the partitioning achieved has too many clusters. /// </summary> /// <param name="partition">A partitioning of a graph with any number of clusters</param> /// <param name="minK">The desired number of clusters</param> /// <returns>A new partitioning with the desired number of clusters</returns> public Partition combineClustersOld(Partition partition, int minK) { int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count]; LightWeightGraph g = (LightWeightGraph)_data; // for quick reference let's make a list of which nodes are in which clusters int[] clustAssignments = new int[g.Nodes.Count()]; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters[i].Points.Count; j++) { clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId; } } // now go through each node and count its edges out to each cluster // add these edges to the connections[] matrix for (int i = 0; i < g.Nodes.Count(); i++) { int currentCluster = clustAssignments[i]; for (int e = 0; e < g.Nodes[i].Edge.Count(); e++) { int adjacentNode = g.Nodes[i].Edge[e]; int adjacentCluster = clustAssignments[adjacentNode]; connections[currentCluster, adjacentCluster]++; } } // we want to do (partition.Clusters.count - minK) merges // keep a list of which partitions will be merged List <int> merges = new List <int>(); for (int numMerges = 0; numMerges < partition.Clusters.Count - minK; numMerges++) { // find the largest connections[i,j] and merge clusters i and j int largestI = 0; int largestJ = 0; double largestValue = 0; for (int i = 0; i < partition.Clusters.Count; i++) { for (int j = 0; j < partition.Clusters.Count; j++) { if (j <= i) { continue; } int sizeI = partition.Clusters[i].Points.Count; int sizeJ = partition.Clusters[j].Points.Count; double score = ((double)connections[i, j]) / (sizeI * sizeJ); if (score > largestValue) { largestValue = score; largestI = i; largestJ = j; } } } // if everything's zero, there is no hope ;-) if (largestValue == 0) { continue; } merges.Add(largestI); merges.Add(largestJ); // it is possible to merge J multiple times, if its nodes are split between clusters. // we only want to merget J once, so we need to zero out all largestJ for (int i = 0; i < partition.Clusters.Count; i++) { connections[i, largestJ] = 0; } } // now we have the list *merges*, the idea is to take 2 numbers off it, // the first is smaller than the second. We need to merge the second into the first, // remove the second, and renumber all clusters after the first for (int numMerges = 0; numMerges < merges.Count / 2; numMerges++) { int firstCluster = merges[numMerges * 2]; int secondCluster = merges[(numMerges * 2) + 1]; // adds the points of the second cluster to the first cluster for (int i = 0; i < partition.Clusters[secondCluster].Points.Count; i++) { partition.Clusters[firstCluster].Points.Add(partition.Clusters[secondCluster].Points[i]); } } // remove all the second clusters (count from the bottom // so that the numbering doesn't get messed up...) int[] toRemove = new int[merges.Count / 2]; for (int numMerges = 0; numMerges < merges.Count / 2; numMerges++) { int firstCluster = merges[numMerges * 2]; int secondCluster = merges[(numMerges * 2) + 1]; toRemove[numMerges] = secondCluster; } Array.Sort(toRemove); for (int i = toRemove.Length - 1; i >= 0; i--) { partition.Clusters.RemoveAt(toRemove[i]); } // renumber the clusters for (int i = 0; i < partition.Clusters.Count; i++) { partition.Clusters[i].Points.Sort(); partition.Clusters[i].ClusterId = i; } return(partition); }
public LightWeightGraph GetNormalizedRandomGraph(DistanceMatrix d) { int numNodes = d.Count; var nodes = new LightWeightGraph.LightWeightNode[numNodes]; // make an array to hold all possible edges, less the edges in the mst List <oneNode> myDistances = new List <oneNode>(); LightWeightGraph mst = LightWeightGraph.GetStackedMST(d, 1); LightWeightGraph.LightWeightNode[] mstNodes = mst.Nodes; //Create a list to hold edge values List <int>[] edges = new List <int> [numNodes]; List <double>[] weights = new List <double> [numNodes]; for (int i = 0; i < numNodes; i++) { edges[i] = new List <int>(); weights[i] = new List <double>(); } double largestMSTEdge = 0.0; // add edges from the mst to the edges list, to facilitate adding additional edges later on // Also find the largest edge to use as a cutoff for (int i = 0; i < numNodes; i++) { for (int j = 0; j < mstNodes[i].Edge.Length; j++) { largestMSTEdge = Math.Max(largestMSTEdge, mst[i].EdgeWeights[j]); edges[i].Add(mstNodes[i].Edge[j]); weights[i].Add(mstNodes[i].EdgeWeights[j]); } } largestMSTEdge *= _cutoffProp; // cycle through each possible edge // if the edge exists in the mst, continue // otherwise, add the edge to distances array, and add distance to the edge to the cummulative total for (int i = 0; i < numNodes - 1; i++) { for (int j = i + 1; j < numNodes; j++) { double dist = d[i, j]; if (dist >= largestMSTEdge || mstNodes[i].Edge.Contains(j)) { continue; } else { //Probability function goes here double addlProb = _normAlpha * (1.0 / Math.Exp(_xScale * dist / largestMSTEdge)); oneNode nd = new oneNode { prob = addlProb, fromNode = i, toNode = j, alreadyExists = false }; myDistances.Add(nd); } } } // walk through the array until you find the random number for (int m = 0; m < myDistances.Count; m++) { double rand = Utility.Util.Rng.NextDouble(); if (myDistances[m].prob > rand) { // we have found the edge to add. // add the edge if it does not already exist if (!myDistances[m].alreadyExists) { int from = myDistances[m].fromNode; int to = myDistances[m].toNode; double dist = d[from, to]; edges[from].Add(to); weights[from].Add(dist); edges[to].Add(from); weights[to].Add(dist); myDistances[m].alreadyExists = true; } } } for (int i = 0; i < numNodes; i++) { nodes[i] = new LightWeightGraph.LightWeightNode(i, true, edges[i], weights[i]); } return(new LightWeightGraph(nodes, true)); }
public Partition GetPartition() { DistanceMatrix mat = null; if (_data.Type == AbstractDataset.DataType.DistanceMatrix) { mat = (DistanceMatrix)_data; } else if (_data.Type == AbstractDataset.DataType.PointSet) { mat = ((PointSet)_data).GetDistanceMatrix(); } //Setup our partition with a single cluster, with all points List <Cluster> clusterList = new List <Cluster> { new Cluster(0, Enumerable.Range(0, _data.Count).ToList()) }; Partition partition = new Partition(clusterList, _data); //Dictionary to hold VAT var vatMap = new Dictionary <int, Integrity>(); //Dictionary to hold subset array var subsetMap = new Dictionary <int, int[]>(); while (clusterList.Count < _minK) { //Calculate the VAT for all values foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId))) { //We must calculate a graph for this subset of data List <int> clusterSubset = c.Points.Select(p => p.Id).ToList(); //Now calculate Vat LightWeightGraph lwg; if (_data.Type == AbstractDataset.DataType.Graph) { bool[] exclusion = new bool[_data.Count]; for (int i = 0; i < _data.Count; i++) { exclusion[i] = true; } foreach (var p in c.Points) { exclusion[p.Id] = false; } lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion); } else //Distance matrix or Pointset { Debug.Assert(mat != null, "mat != null"); var subMatrix = mat.GetReducedDataSet(clusterSubset); //Generate our graph lwg = _graphGen.GenerateGraph(subMatrix.Mat); } subsetMap.Add(c.ClusterId, clusterSubset.ToArray()); lwg.IsWeighted = _weighted; Integrity v = new Integrity(lwg, _reassignNodes, _alpha, _beta); _vatNodeRemovalOrder = v.NodeRemovalOrder; _vatNumNodesRemoved = v.NumNodesRemoved; if (_hillClimb) { v.HillClimb(); } ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta); vatMap.Add(c.ClusterId, v); } meta.AppendLine("All calculated Integritys:"); //Now find the minimum vat value int minVatCluster = 0; double minVatValue = double.MaxValue; foreach (var c in vatMap) { meta.Append(String.Format("{0} ", c.Value.MinVat)); if (c.Value.MinVat < minVatValue) { minVatCluster = c.Key; minVatValue = c.Value.MinVat; } } meta.AppendLine(); //now merge the partition into the cluster var minVAT = vatMap[minVatCluster]; var subPartition = minVAT.GetPartition(); var nodeIndexMap = subsetMap[minVatCluster]; meta.AppendFormat("Integrity: MinIntegrity={0}\r\n", minVAT.MinVat); meta.AppendFormat("Removed Count:{0} \r\n", minVAT.NumNodesRemoved); meta.AppendLine(String.Join(",", minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c]))); partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster); vatMap.Remove(minVatCluster); subsetMap.Remove(minVatCluster); } partition.MetaData = meta.ToString(); return(partition); }
/// <summary> /// Creates a random graph, based on an mst. /// </summary> /// <param name="d">distance matrix used to construct the graph</param> /// <param name="alpha">the resulting average degree of the graph</param> /// /// <param name="expP">The probability of adding an edge depends on its distance: 1/d^expP</param> /// <returns></returns> public static LightWeightGraph GetRandomGraph(DistanceMatrix d, int alpha, double expP) { int numNodes = d.Count; var nodes = new LightWeightGraph.LightWeightNode[numNodes]; // make an array to hold all possible edges, less the edges in the mst oneNode[] myDistances = new oneNode[numNodes * (numNodes - 1) / 2 - (numNodes - 1)]; LightWeightGraph mst = LightWeightGraph.GetStackedMST(d, 1); LightWeightGraph.LightWeightNode[] mstNodes = mst.Nodes; int myDistancesIndex = 0; double myDistancesTotal = 0; //Create a list to hold edge values List <int>[] edges = new List <int> [numNodes]; List <double>[] weights = new List <double> [numNodes]; for (int i = 0; i < numNodes; i++) { edges[i] = new List <int>(); } // add edges from the mst to the edges list, to facilitate adding additional edges later on for (int i = 0; i < numNodes; i++) { for (int j = 0; j < mstNodes[i].Edge.Length; j++) { edges[i].Add(mstNodes[i].Edge[j]); } } // cycle through each possible edge // if the edge exists in the mst, continue // otherwise, add the edge to distances array, and add distance to the edge to the cummulative total for (int i = 0; i < numNodes - 1; i++) { for (int j = i + 1; j < numNodes; j++) { if (mstNodes[i].Edge.Contains(j)) { continue; } else { double addlProb = 1.0 / Math.Pow(d[i, j], expP); myDistancesTotal += addlProb; oneNode nd = new oneNode { prob = myDistancesTotal, fromNode = i, toNode = j, alreadyExists = false }; myDistances[myDistancesIndex] = nd; myDistancesIndex++; } } } // how many edges do we want to add? int desiredNewEdges = (alpha * numNodes) - (numNodes - 1); Random rnd = Utility.Util.Rng; // add edges randomly until we have added the desired number of edges while (desiredNewEdges > 0) { // generate a random number between 0 and myDistancesTotal double rand = rnd.NextDouble() * myDistancesTotal; // walk through the array until you find the random number for (int m = 0; m < myDistances.Length; m++) { if (myDistances[m].prob > rand) { // we have found the edge to add. // add the edge if it does not already exist if (!myDistances[m].alreadyExists) { edges[myDistances[m].fromNode].Add(myDistances[m].toNode); edges[myDistances[m].toNode].Add(myDistances[m].fromNode); myDistances[m].alreadyExists = true; desiredNewEdges--; break; } } } } for (int i = 0; i < numNodes; i++) { nodes[i] = new LightWeightGraph.LightWeightNode(i, true, edges[i], weights[i]); } return(new LightWeightGraph(nodes, true)); }