Exemple #1
0
        public Partition combineClusters(Partition partition, int minK)
        {
            int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count];
            LightWeightGraph g = (LightWeightGraph)_data;

            // for quick reference let's make a list of which nodes are in which clusters
            int[] clustAssignments = new int[g.Nodes.Count()];
            for (int i = 0; i < partition.Clusters.Count; i++)
            {
                for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                {
                    clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId;
                }
            }
            // now go through each node and count its edges out to each cluster
            // add these edges to the connections[] matrix
            for (int i = 0; i < g.Nodes.Count(); i++)
            {
                int currentCluster = clustAssignments[i];
                for (int e = 0; e < g.Nodes[i].Edge.Count(); e++)
                {
                    int adjacentNode    = g.Nodes[i].Edge[e];
                    int adjacentCluster = clustAssignments[adjacentNode];
                    connections[currentCluster, adjacentCluster]++;
                }
            }

            return(partition);
        }
Exemple #2
0
        //this calculates the distance list
        //And works
        private void calculateNearestNeighborGraph(KPoint.DistType distType)
        {
            points = new PointSet(pointSetFile);

            graphPrefix = pointSetFileShort.Substring(0, pointSetFileShort.IndexOf('.')) + "_" + distType.ToString() + "_KNN_";

            //Now we set the Distance
            //Dista nce matrix
            distMatrix = points.GetDistanceMatrix(distType);
            distances  = distMatrix.GetSortedDistanceList();


            minConnectIndex = LightWeightGraph.BinSearchKNNMinConnectivity(2, points.Count - 1, points.Count, distMatrix);

            label1.Text = String.Format("Minimum Connectivity:({0} Neighbors)", minConnectIndex);

            float sum = distances.Cast <float>().Sum();

            sum        /= distances.Count;
            label4.Text = "Mean Dist:" + sum;

            //Set the track bars
            trackBar1.Minimum = 2;
            trackBar1.Maximum = minConnectIndex;
            trackBar2.Minimum = minConnectIndex;
            trackBar2.Maximum = points.Count - 1;

            trackBar1.Value = trackBar2.Value = minConnectIndex;
            distMin.Text    = String.Format("Min:({0} Neighbors)", trackBar1.Value);
            distMax.Text    = String.Format("Max:({0} Neighbors)", trackBar2.Value);
        }
Exemple #3
0
        public Partition GetPartition()
        {
            //Get our graph
            LightWeightGraph lwg = VATResult.GetAttackedGraphWithReassignment();

            //Get our cluster Assignment
            List <List <int> > componentList = lwg.GetComponents();

            //Setup our Clusters
            List <Cluster> clusterList = new List <Cluster>();

            for (int i = 0; i < componentList.Count; i++)
            {
                Cluster c = new Cluster(i);
                foreach (var n in componentList[i])
                {
                    c.AddPoint(new ClusteredItem(n));
                }
                clusterList.Add(c);
            }

            String meta = "VATClust: \nRemoved Count:" + VATResult.numNodesRemoved + "\n"
                          + String.Join(",", VATResult.nodeRemovalOrder.GetRange(0, VATResult.numNodesRemoved));

            return(new Partition(clusterList, _distanceMatrix, meta));
        }
Exemple #4
0
 public Partition(List <Cluster> clusters, LightWeightGraph graph, String m = "")
 {
     Clusters          = clusters;
     MetaData          = m;
     Graph             = graph;
     PartitionDataType = DataType.Graph;
 }
Exemple #5
0
        static void Main(string[] args)
        {
            PointSet         swissPoints = new PointSet("iris.txt");
            LightWeightGraph minIris     = LightWeightGraph.GetMinKnnGraph(swissPoints.GetDistanceMatrix());
            var map = minIris.GetEdgeIndexMap();

            float[] BCEdge = NetMining.Graphs.BetweenessCentrality.BrandesBcEdges(minIris);

            for (int n = 0; n < minIris.NumNodes; n++)
            {
                foreach (int e in minIris.Nodes[n].Edge)
                {
                    KeyValuePair <int, int> edge = new KeyValuePair <int, int>(n, e);
                    if (map.ContainsKey(edge))
                    {
                        Console.WriteLine("{0} {1} = {2}", edge.Key, edge.Value, BCEdge[map[edge]]);
                    }
                }
            }
            //minSwiss.SaveGML("iris.gml");
            //minSwiss.SaveGraph("iris.graph");

            /*
             * minSwiss.SaveGML("SwissRoll.gml");
             * HVATClust vClust = new HVATClust(swissPoints, 4, false, true, 1);
             * Partition p = vClust.GetPartition();
             * p.SavePartition("swissRoll", "SwissRoll.txt", p.MetaData);
             * //LightWeightGraph lwg = LightWeightGraph.GetGraphFromFile("g.graph");
             *
             *
             * PointSet points = new PointSet("iris.txt");
             * var distMatrix = points.GetDistanceMatrix();
             *
             * var lwg = LightWeightGraph.GetMinKnnGraph(distMatrix, 1);
             * lwg.IsWeighted = true;
             *
             * VAT v = new VAT(lwg);
             * var nlwg = v.GetAttackedGraphWithReassignment();
             * List<List<int>> components = nlwg.GetComponents();
             *
             * var dist2_0 = distMatrix.GetReducedDataSet(components[0]);
             * var lwg2_0 = LightWeightGraph.GetMinKnnGraph(dist2_0.Mat, 1);
             * bool lwg2_0C = lwg2_0.isConnected();
             * lwg2_0.IsWeighted = true;
             * var dist2_1 = distMatrix.GetReducedDataSet(components[1]);
             * var lwg2_1 = LightWeightGraph.GetMinKnnGraph(dist2_1.Mat, 1);
             * bool lwg2_1C = lwg2_1.isConnected();
             * lwg2_1.IsWeighted = true;
             *
             * VAT v2_0 = new VAT(lwg2_0);
             * List<List<int>> components2_0 = v2_0.GetAttackedGraphWithReassignment().GetComponents();
             * VAT v2_1 = new VAT(lwg2_1);
             * List<List<int>> components2_1 = v2_1.GetAttackedGraphWithReassignment().GetComponents();
             *
             */

            Console.ReadKey();
        }
        /// <summary>
        /// Reads a .cluster file into a partition
        /// </summary>
        /// <param name="filename"></param>
        public Partition(String filename)
        {
            Clusters = new List <Cluster>();

            using (StreamReader sr = new StreamReader(filename))
            {
                String dataString   = sr.ReadLine();
                String dataType     = dataString.Substring(0, dataString.IndexOf(' '));
                String dataFileName = dataString.Substring(dataString.IndexOf(' ') + 1);
                String folder       = filename.Substring(0, filename.LastIndexOf('\\'));

                //Get the DataPoints
                switch (dataType)
                {
                case "Points":
                    Data = new PointSet(dataFileName);
                    break;

                case "DistanceMatrix":
                    Data = new DistanceMatrix(dataFileName);
                    break;

                case "Graph":
                    String extension = dataFileName.Substring(dataFileName.LastIndexOf('.') + 1);
                    if (extension == "gml")
                    {
                        Data = LightWeightGraph.GetGraphFromGML(dataFileName);
                    }
                    else if (extension == "graph")
                    {
                        Data = LightWeightGraph.GetGraphFromFile(dataFileName);
                    }
                    break;

                default:
                    throw new InvalidDataException("dataType");
                }


                //Parse the Clusters
                String line        = sr.ReadLine();
                int    numClusters = int.Parse(line.Split(' ')[1]);

                for (int i = 0; i < numClusters; i++)
                {
                    Cluster C        = new Cluster(i);
                    int     numItems = int.Parse(sr.ReadLine());
                    line = sr.ReadLine();
                    String[] split = line.Split(' ');
                    for (int k = 0; k < numItems; k++)
                    {
                        int pointIndex = int.Parse(split[k]);
                        C.AddPoint(new ClusteredItem(pointIndex));
                    }
                    Clusters.Add(C);
                }
            }
        }
Exemple #7
0
        public VATClust(DistanceMatrix distanceMatrix, Boolean useWeights, Boolean knnGraph, int knnOffset = 0, float alpha = 1.0f, float beta = 0.0f)
        {
            _distanceMatrix = distanceMatrix;
            UseWeights      = useWeights;
            KNNGraph        = knnGraph;
            _alpha          = alpha;
            _beta           = beta;

            //Now compute a graph
            LightWeightGraph lwg = LightWeightGraph.GetMinKnnGraph(_distanceMatrix, knnOffset);

            lwg.IsWeighted = UseWeights;

            //Run VAT on it
            VATResult = new VAT(lwg, _alpha, _beta);
        }
Exemple #8
0
        //This Will set the trackbars on the CAST tab
        private void button16_Click(object sender, EventArgs e)
        {
            if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                trackBar3.Enabled = false;
                trackBar4.Enabled = false;
                button17.Enabled  = false;

                textBox6.Text     = openFileDialog1.FileName;
                textBox4.Text     = openFileDialog1.FileName;
                textBox3.Text     = openFileDialog1.FileName;
                textBox2.Text     = openFileDialog1.FileName;
                textBox1.Text     = openFileDialog1.FileName;
                pointSetFile      = textBox6.Text;
                pointSetFileShort = openFileDialog1.SafeFileName;

                points = new PointSet(pointSetFile);

                //Now we set the Distance
                //Distance matrix
                distMatrix = points.GetDistanceMatrix();
                distances  = distMatrix.GetSortedDistanceList();

                //Find minimum Connectivity (can make this binary search)
                minConnectIndex = LightWeightGraph.BinSearchGeoMinConnectivity(0, distances.Count - 1, distMatrix.Count, distMatrix, distances);

                label25.Text = String.Format("Minimum Connectivity:({0})={1}", minConnectIndex, distances[minConnectIndex]);

                float sum = distances.Sum();
                sum         /= distances.Count;
                label22.Text = "Mean Dist:" + sum;

                //Set the track bars
                trackBar4.Minimum = 0;
                trackBar4.Maximum = minConnectIndex;
                trackBar3.Minimum = minConnectIndex;
                trackBar3.Maximum = distances.Count - 1;

                trackBar4.Value = trackBar3.Value = minConnectIndex;
                label24.Text    = String.Format("Min:({0}) {1}", trackBar4.Value, distances[trackBar4.Value]);
                label23.Text    = String.Format("Max:({0}) {1}", trackBar3.Value, distances[trackBar3.Value]);

                trackBar3.Enabled = true;
                trackBar4.Enabled = true;
                button17.Enabled  = true;
            }
        }
Exemple #9
0
        private static List <DataOutStruct> CombineOuts(LightWeightGraph healthy, LightWeightGraph infected, List <DataOutStruct> outData)
        {
            List <DataOutStruct> finalOut      = new List <DataOutStruct>();
            List <DataOutStruct> healthyNodes  = new List <DataOutStruct>();
            List <DataOutStruct> infectedNodes = new List <DataOutStruct>();

            foreach (var node in healthy.Nodes)
            {
                DataOutStruct singleNode = new DataOutStruct
                {
                    GroupNum = "0", Bacteria = node.sharedName, ClusterType = ""
                };
                healthyNodes.Add(singleNode);
            }
            foreach (var node in infected.Nodes)
            {
                DataOutStruct singleNode = new DataOutStruct
                {
                    GroupNum = "0", Bacteria = node.sharedName, ClusterType = ""
                };
                infectedNodes.Add(singleNode);
            }
            foreach (DataOutStruct bacteria in outData)
            {
                foreach (DataOutStruct healthyNode in healthyNodes)
                {
                    if (bacteria.Bacteria.Equals(healthyNode.Bacteria))
                    {
                        healthyNode.GroupNum = "1";
                    }
                }
                foreach (DataOutStruct infectedNode in infectedNodes)
                {
                    if (bacteria.Bacteria.Equals(infectedNode.Bacteria))
                    {
                        infectedNode.GroupNum = "1";
                    }
                }
            }

            finalOut = healthyNodes.Union(infectedNodes).Distinct().OrderByDescending(x => x.GroupNum).ThenBy(x => x.Bacteria).ToList();
            return(finalOut);
        }
Exemple #10
0
        public static Partition GetPartition(LightWeightGraph lwg)
        {
            //Get our cluster Assignment
            List <List <int> > componentList = lwg.GetComponents();

            //Setup our Clusters
            List <Cluster> clusterList = new List <Cluster>();

            for (int i = 0; i < componentList.Count; i++)
            {
                Cluster c = new Cluster(i);
                foreach (var n in componentList[i])
                {
                    c.AddPoint(new ClusteredItem(lwg[n].Label));
                }
                clusterList.Add(c);
            }


            return(new Partition(clusterList, lwg));
        }
Exemple #11
0
        private static GeneralCluster ReturnClusterAndPartition(LightWeightGraph healthy,
                                                                LightWeightGraph infected, int healthyClusters, int infectedClusters, String healthyfile,
                                                                String infectedfile)
        {
            String         workingDir = Directory.GetCurrentDirectory();
            GeneralCluster cluster    = new GeneralCluster();

            cluster.Int0.Cluster =
                new HIntegrityClust(healthy, healthyClusters + 1, false, 1, 0, false, false);
            cluster.Int0.Partition = cluster.Int0.Cluster.GetPartition();
            cluster.Int0.Partition.SavePartition($"{workingDir}/Data/{healthyfile}_INT.cluster", $"{workingDir}/{healthyfile}.graph");
            cluster.Int1.Cluster =
                new HIntegrityClust(infected, infectedClusters + 1, false, 1, 0, false, false);
            cluster.Int1.Partition = cluster.Int1.Cluster.GetPartition();
            cluster.Int1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_INT.cluster", $"{workingDir}/{infectedfile}.graph");
            cluster.HealthyIntCount  = new int[cluster.Int0.Partition.DataCount];
            cluster.InfectedIntCount = new int[cluster.Int1.Partition.DataCount];
            cluster.Ten0.Cluster     =
                new HTenacityClust(healthy, healthyClusters + 1, false, 1, 0, false, false);
            cluster.Ten0.Partition = cluster.Ten0.Cluster.GetPartition();
            cluster.Ten0.Partition.SavePartition($"{workingDir}/Data/{healthyfile}_TEN.cluster", $"{workingDir}/{healthyfile}.graph");
            cluster.Ten1.Cluster =
                new HTenacityClust(infected, infectedClusters + 1, false, 1, 0, false, false);
            cluster.Ten1.Partition = cluster.Ten1.Cluster.GetPartition();
            cluster.Ten1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_TEN.cluster", $"{workingDir}/{infectedfile}.graph");
            cluster.HealthyTenCount  = new int[cluster.Ten0.Partition.DataCount];
            cluster.InfectedTenCount = new int[cluster.Ten1.Partition.DataCount];
            cluster.Vat0.Cluster     = new HVATClust(healthy, healthyClusters + 1, false, 1, 0, false, false);
            cluster.Vat0.Partition   = cluster.Vat0.Cluster.GetPartition();
            cluster.Vat0.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_VAT.cluster", $"{workingDir}/{infectedfile}.graph");
            cluster.Vat1.Cluster =
                new HVATClust(infected, infectedClusters + 1, false, 1, 0, false, false);
            cluster.Vat1.Partition = cluster.Vat1.Cluster.GetPartition();
            cluster.Vat1.Partition.SavePartition($"{workingDir}/Data/{infectedfile}_VAT.cluster", $"{workingDir}/{infectedfile}.graph");
            cluster.HealthyVatCount  = new int[cluster.Vat0.Partition.DataCount];
            cluster.InfectedVatCount = new int[cluster.Vat1.Partition.DataCount];

            return(cluster);
        }
Exemple #12
0
        //This Generates Graphs when clicked
        private void button2_Click(object sender, EventArgs e)
        {
            int numGraphs = ((int)trackBar2.Value - (int)trackBar1.Value) / (int)numericUpDown1.Value;

            //sanity check on gui
            if (numGraphs > 20 && MessageBox.Show("Are you sure you want to generate " + numGraphs + " different graphs?", "Generate Graphs", MessageBoxButtons.YesNo) == System.Windows.Forms.DialogResult.No)
            {
                return;
            }

            for (int i = trackBar1.Value; i <= trackBar2.Value; i += (int)numericUpDown1.Value)
            {
                LightWeightGraph lwg = null;
                if (embeddingComboBox.SelectedIndex == 0)
                {
                    lwg = LightWeightGraph.GetGeometricGraph(distMatrix, distances[i]);
                }
                else if (embeddingComboBox.SelectedIndex == 1)
                {
                    lwg = LightWeightGraph.GetKNNGraph(distMatrix, i);
                }
                else
                {
                    lwg = LightWeightGraph.GetStackedMST(distMatrix, (int)trackBar1.Value);
                }

                //Save GML
                String folder = pointSetFile.Substring(0, pointSetFile.LastIndexOf('\\'));
                lwg.SaveGML(folder + "\\" + graphPrefix + i + ".gml");

                openFileDialog2.InitialDirectory = folder;

                //Save Graph format
                lwg.SaveGraph(folder + "\\" + graphPrefix + i + ".graph");
            }

            MessageBox.Show("Graphs have been Generated!");
        }
Exemple #13
0
        //this calculates the distance list
        private void CalculateMinConnectivityGeoGraph(KPoint.DistType distType)
        {
            points = new PointSet(pointSetFile);

            graphPrefix = pointSetFileShort.Substring(0, pointSetFileShort.IndexOf('.')) + "_" + distType.ToString() + "_";

            //Now we set the Distance
            //Distance matrix
            distMatrix = points.GetDistanceMatrix(distType);
            distances  = distMatrix.GetSortedDistanceList();

            //Find minimum Connectivity (can make this binary search)
            int pointCount = points.Count;

            minConnectIndex = LightWeightGraph.BinSearchGeoMinConnectivity(0, distances.Count - 1, pointCount, distMatrix, distances);

            label1.Text = String.Format("Minimum Connectivity:({0})={1}", minConnectIndex, distances[minConnectIndex]);

            float sum = 0;

            foreach (float dist in distances)
            {
                sum += dist;
            }
            sum        /= distances.Count;
            label4.Text = "Mean Dist:" + sum;

            //Set the track bars
            trackBar1.Minimum = 0;
            trackBar1.Maximum = minConnectIndex;
            trackBar2.Minimum = minConnectIndex;
            trackBar2.Maximum = distances.Count - 1;

            trackBar1.Value = trackBar2.Value = minConnectIndex;
            distMin.Text    = String.Format("Min:({0}) {1}", trackBar1.Value, distances[trackBar1.Value]);
            distMax.Text    = String.Format("Max:({0}) {1}", trackBar2.Value, distances[trackBar2.Value]);
        }
Exemple #14
0
        static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                Console.WriteLine(
                    "Usage: Program.cs <Healthyfile> <Infectedfile> <Outputfile> <Group> ");
                Environment.Exit(0);
            }

            // AUTOMATING IBD
            // We need both a healthy network and an IBD network
            // COMMAND LINE: clusteringanalysis.exe healthyNet infectedNet VATorINTorTEN


            //convert from gml to graph

            String healthyfile  = BackSlashRemover(args[0]);
            String infectedfile = BackSlashRemover(args[1]);

            String healthyFileName  = "";
            String infectedFileName = "";

            String workingDir = Directory.GetCurrentDirectory();
            String datapath   = workingDir + "/Data";

            datapath = BackSlashRemover(datapath);

            if (!Directory.Exists(datapath))
            {
                Directory.CreateDirectory(datapath);
            }

            String outPath = BackSlashRemover(args[2]);

            if (outPath.Split('/').Length == 1)
            {
                outPath = $"{workingDir}/{outPath}";
            }

            if (healthyfile.Contains("/"))
            {
                healthyFileName = healthyfile.Split('/').Last().Split('.').First();
            }

            if (infectedfile.Contains("/"))
            {
                infectedFileName = infectedfile.Split('/').Last().Split('.').First();
            }

            LightWeightGraph healthy  = LightWeightGraph.GetGraphFromGML(healthyfile);
            LightWeightGraph infected = LightWeightGraph.GetGraphFromGML(infectedfile);

            healthy.SaveGraph($"{datapath}/{healthyFileName}.graph");
            infected.SaveGraph($"{datapath}/{infectedFileName}.graph");
            // Makes a list of what the nodes reference
            using (StreamWriter sw = new StreamWriter($"{datapath}/{healthyFileName}.txt", true))
            {
                for (int i = 0; i < healthy.Nodes.Length; i++)
                {
                    sw.WriteLine(healthy.Nodes[i].sharedName);
                }
            }

            using (StreamWriter sw = new StreamWriter($"{datapath}/{infectedFileName}.txt", true))
            {
                for (int i = 0; i < infected.Nodes.Length; i++)
                {
                    sw.WriteLine(infected.Nodes[i].sharedName);
                }
            }

            //we don't actually know the number of clusters in each graph - we want to cluster for 1 more than we start with
            //so cluster for 1 just to get the file.
            //HVATClust clust1 = new HVATClust(lwg2, K, useweights, 1, 0, reassign, hillclimb);

            HVATClust healthyClust1    = new HVATClust(healthy, 1, false, 1, 0, false, false);
            Partition t1               = healthyClust1.GetPartition();
            int       healthyClusters  = t1.Clusters.Count;
            HVATClust infectedClust1   = new HVATClust(infected, 1, false, 1, 0, false, false);
            Partition t2               = infectedClust1.GetPartition();
            int       infectedClusters = t2.Clusters.Count;

            // Now we know the intital number of clusters, do the actual clustering
            //HVATClust clust1 = new HVATClust(lwg2, K, useweights, 1, 0, reassign, hillclimb);

            // This sees if the input cluster type can be parsed as the Enum, and if so
            // Uses a switch statement to decide which clustering to run.
            if (args.Length == 4)
            {
                List <DataOutStruct> outData = ConstructList(args[3], healthy, infected, healthyFileName, infectedFileName,
                                                             healthyClusters, infectedClusters);
                outData = CombineOuts(infected, healthy, outData);
                using (StreamWriter sw = new StreamWriter(outPath))
                {
                    for (int i = 0; i < outData.Count(); i++)
                    {
                        sw.WriteLine($"{outData[i].Bacteria} {outData[i].GroupNum}");
                    }
                }
                Console.WriteLine("Done.");
                Console.WriteLine($"Output in: {outPath}");
            }
            else
            {
                Console.WriteLine(
                    "Please enter 3 parameters, in this order:\n " +
                    "Healthy data path(.gml)\n " +
                    "Unhealthy data path(.gml)\n " +
                    "Desired Output Group(listed in Readme)\n");
            }
        }
Exemple #15
0
        /// <summary>
        /// combineClusters is used when the partitioning achieved has too many clusters.
        /// </summary>
        /// <param name="partition">A partitioning of a graph with any number of clusters</param>
        /// <param name="minK">The desired number of clusters</param>
        /// <returns>A new partitioning with the desired number of clusters</returns>
        public Partition combineClusters(Partition partition, int minK)
        {
            // we want to do (partition.Clusters.count - minK) merges
            int startPartitions = partition.Clusters.Count;

            for (int numMerges = 0; numMerges < startPartitions - minK; numMerges++)
            {
                int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count];
                LightWeightGraph g = (LightWeightGraph)_data;

                // for quick reference let's make a list of which nodes are in which clusters
                int[] clustAssignments = new int[g.Nodes.Count()];
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                    {
                        clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId;
                    }
                }
                // now go through each node and count its edges out to each cluster
                // add these edges to the connections[] matrix
                for (int i = 0; i < g.Nodes.Count(); i++)
                {
                    int currentCluster = clustAssignments[i];
                    for (int e = 0; e < g.Nodes[i].Edge.Count(); e++)
                    {
                        int adjacentNode    = g.Nodes[i].Edge[e];
                        int adjacentCluster = clustAssignments[adjacentNode];
                        connections[currentCluster, adjacentCluster]++;
                    }
                }

                // keep a list of which partitions will be merged
                // List<int> merges = new List<int>();

                // find the largest connections[i,j] and merge clusters i and j
                int    largestI     = 0;
                int    largestJ     = 0;
                double largestValue = 0;
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    for (int j = 0; j < partition.Clusters.Count; j++)
                    {
                        if (j <= i)
                        {
                            continue;
                        }
                        int    sizeI = partition.Clusters[i].Points.Count;
                        int    sizeJ = partition.Clusters[j].Points.Count;
                        double score = ((double)connections[i, j]) / (sizeI * sizeJ);
                        //double score = connections[i, j];
                        if (score > largestValue)
                        {
                            largestValue = score;
                            largestI     = i;
                            largestJ     = j;
                        }
                        // we want to merge smaller into larger clusters
                        if (sizeI > sizeJ)
                        {
                            int temp = largestI;
                            largestI = largestJ;
                            largestJ = temp;
                        }
                    }
                }
                // if everything's zero, there is no hope ;-)
                if (largestValue == 0)
                {
                    continue;
                }


                // now we want to merge cluster largestJ into cluster largestI,
                // remove cluster largestJ, and renumber all clusters after the first
                // adds the points of the second cluster to the first cluster
                for (int i = 0; i < partition.Clusters[largestJ].Points.Count; i++)
                {
                    partition.Clusters[largestI].Points.Add(partition.Clusters[largestJ].Points[i]);
                }


                // remove largestJ cluster
                partition.Clusters.RemoveAt(largestJ);


                // renumber the clusters
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    partition.Clusters[i].Points.Sort();
                    for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                    {
                        partition.Clusters[i].Points[j].ClusterId = i;
                    }
                }
            }
            return(partition);
        }
        /// <summary>
        /// GetGPartition is different from GetPartition in 2 ways:
        /// 1. It does not require a connected graph.
        /// 2. If there are too many clusters, it combines them such that the desired number of clusters is returned
        /// </summary>
        /// <returns>A partitioning of the graph</returns>
        public Partition GetGPartition()
        {
            DistanceMatrix mat = null;

            if (_data.Type == AbstractDataset.DataType.DistanceMatrix)
            {
                mat = (DistanceMatrix)_data;
            }
            else if (_data.Type == AbstractDataset.DataType.PointSet)
            {
                mat = ((PointSet)_data).GetDistanceMatrix();
            }

            //get the actual partition (if graph not necessarily connected)
            Partition partition = Partition.GetPartition((LightWeightGraph)_data);

            //Dictionary to hold VAT
            var vatMap = new Dictionary <int, VatABC>();

            //Dictionary to hold subset array
            var subsetMap = new Dictionary <int, int[]>();

            while (partition.Clusters.Count < _minK)
            //while (clusterList.Count < _minK)
            {
                Console.WriteLine("Count = " + partition.Clusters.Count);
                Console.WriteLine("mink = " + _minK);
                //Calculate the VAT for all values
                foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId)))
                {
                    //We must calculate a graph for this subset of data
                    List <int> clusterSubset = c.Points.Select(p => p.Id).ToList();

                    //Now calculate Vat
                    LightWeightGraph lwg;
                    if (_data.Type == AbstractDataset.DataType.Graph)
                    {
                        bool[] exclusion = new bool[_data.Count];
                        for (int i = 0; i < _data.Count; i++)
                        {
                            exclusion[i] = true;
                        }
                        foreach (var p in c.Points)
                        {
                            exclusion[p.Id] = false;
                        }
                        lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion);
                    }
                    else //Distance matrix or Pointset
                    {
                        Debug.Assert(mat != null, "mat != null");
                        var subMatrix = mat.GetReducedDataSet(clusterSubset);

                        //Generate our graph
                        lwg = _graphGen.GenerateGraph(subMatrix.Mat);
                    }

                    subsetMap.Add(c.ClusterId, clusterSubset.ToArray());
                    lwg.IsWeighted = _weighted;
                    VatABC v = new VatABC(lwg, _M, _k, _reassignNodes, _alpha, _beta);
                    _vatNodeRemovalOrder = v.NodeRemovalOrder;
                    _vatNumNodesRemoved  = v.NumNodesRemoved;
                    //if (_hillClimb)
                    //    v.HillClimb();
                    ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta);
                    vatMap.Add(c.ClusterId, v);
                    Console.WriteLine("Calculated Vat for cluster " + c.ClusterId);
                }

                meta.AppendLine("All calculated VATs:");
                //Now find the minimum vat value
                int    minVatCluster = 0;
                double minVatValue   = double.MaxValue;
                foreach (var c in vatMap)
                {
                    meta.Append(String.Format("{0} ", c.Value.MinVat));
                    if (c.Value.MinVat < minVatValue)
                    {
                        minVatCluster = c.Key;
                        minVatValue   = c.Value.MinVat;
                    }
                }
                meta.AppendLine();

                //now merge the partition into the cluster
                var minVAT       = vatMap[minVatCluster];
                var subPartition = minVAT.GetPartition();
                var nodeIndexMap = subsetMap[minVatCluster];

                meta.AppendFormat("Vat: MinVat={0}\r\n", minVAT.MinVat);
                meta.AppendFormat("Removed Count:{0}\r\n", minVAT.NumNodesRemoved);
                meta.AppendLine(String.Join(",",
                                            minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c])));

                partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster);
                vatMap.Remove(minVatCluster);
                subsetMap.Remove(minVatCluster);
                Console.WriteLine("Found min cluster");
                Console.WriteLine(meta);
            }
            partition.MetaData = meta.ToString();
            // The idea is now that we have partitions, combine them so that partition.Clusters.Count == minK
            if (partition.Clusters.Count > _minK)
            {
                combineClusters(partition, _minK);
            }
            return(partition);
        }
        //public HVatABCClust(AbstractDataset data, int minK, IPointGraphGenerator graphGen, bool weighted = true, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true)
        //    : this(minK, M, K, weighted, graphGen, alpha, beta, reassignNodes, hillClimb)
        // {
        //    _data = data;
        // }

        public HVatABCClust(LightWeightGraph data, int minK, int myM, int myK, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true)
            : this(minK, weighted, myM, myK, null, alpha, beta, reassignNodes, hillClimb)
        {
            _data = data;
        }
Exemple #18
0
        public static void combineClusters(String saveLocation, String clusterfileName, int minK)
        {
            //get the Partion file
            Partition partition = new Partition(saveLocation + clusterfileName + ".cluster");
            // we want to do (partition.Clusters.count - minK) merges
            int startPartitions = partition.Clusters.Count;
            LightWeightGraph g  = (LightWeightGraph)partition.Data;
            //get the name of the graph file from the partition file
            String graphFile = "";

            using (StreamReader sr = new StreamReader(saveLocation + clusterfileName + ".cluster"))
            {
                String dataString = sr.ReadLine();
                graphFile = dataString.Substring(6);
            }

            for (int numMerges = 0; numMerges < startPartitions - minK; numMerges++)
            {
                int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count];


                // for quick reference let's make a list of which nodes are in which clusters
                int[] clustAssignments = new int[g.Nodes.Count()];
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                    {
                        clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId;
                    }
                }
                // now go through each node and count its edges out to each cluster
                // add these edges to the connections[] matrix
                for (int i = 0; i < g.Nodes.Count(); i++)
                {
                    int currentCluster = clustAssignments[i];
                    for (int e = 0; e < g.Nodes[i].Edge.Count(); e++)
                    {
                        int adjacentNode    = g.Nodes[i].Edge[e];
                        int adjacentCluster = clustAssignments[adjacentNode];
                        connections[currentCluster, adjacentCluster]++;
                    }
                }

                // keep a list of which partitions will be merged
                // List<int> merges = new List<int>();

                // find the largest connections[i,j] and merge clusters i and j
                int    largestI     = 0;
                int    largestJ     = 0;
                double largestValue = 0;
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    for (int j = 0; j < partition.Clusters.Count; j++)
                    {
                        if (j <= i)
                        {
                            continue;
                        }
                        int    sizeI = partition.Clusters[i].Points.Count;
                        int    sizeJ = partition.Clusters[j].Points.Count;
                        double score = ((double)connections[i, j]) / (sizeI * sizeJ);
                        //double score = connections[i, j];
                        //if (sizeI > 40 || sizeJ > 40) score = 0;
                        if (score > largestValue)
                        {
                            largestValue = score;
                            largestI     = i;
                            largestJ     = j;
                        }
                        // we want to merge smaller into larger clusters
                        if (sizeI > sizeJ)
                        {
                            int temp = largestI;
                            largestI = largestJ;
                            largestJ = temp;
                        }
                    }
                }
                // if everything's zero, there is no hope ;-)
                if (largestValue == 0)
                {
                    continue;
                }



                // now we want to merge cluster largestJ into cluster largestI,
                // remove cluster largestJ, and renumber all clusters after the first
                // adds the points of the second cluster to the first cluster
                for (int i = 0; i < partition.Clusters[largestJ].Points.Count; i++)
                {
                    partition.Clusters[largestI].Points.Add(partition.Clusters[largestJ].Points[i]);
                }


                // remove largestJ cluster
                partition.Clusters.RemoveAt(largestJ);


                // renumber the clusters
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    partition.Clusters[i].Points.Sort();
                    for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                    {
                        partition.Clusters[i].Points[j].ClusterId = i;
                    }
                }
            }
            partition.SavePartition(saveLocation + clusterfileName + minK + ".cluster", graphFile);
        }
 public HyperVATClust(List <List <int> > overlaps, LightWeightGraph data, int k, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true)
     : this(overlaps, k, weighted, null, alpha, beta, reassignNodes, hillClimb)
 {
     _data = data;
 }
Exemple #20
0
        private static List <DataOutStruct> ConstructList(string args, LightWeightGraph healthy,
                                                          LightWeightGraph infected, String healthyfile, String infectedfile, int healthyClusters,
                                                          int infectedClusters)
        {
            List <List <DataOutStruct> > outList = new List <List <DataOutStruct> >();
            List <DataOutStruct>         dataOut = new List <DataOutStruct>();

            if (Enum.TryParse <ClusterType>(args, ignoreCase: true, result: out var userOut))
            {
                GeneralCluster cluster = ReturnClusterAndPartition(healthy, infected, healthyClusters, infectedClusters,
                                                                   healthyfile, infectedfile);
                List <DataOutStruct> d1;
                List <DataOutStruct> d2;
                List <DataOutStruct> d3;
                switch (userOut)
                {
                case ClusterType.G1I:
                    dataOut = G1(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                                  cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int,
                                                  outList));
                    break;

                case ClusterType.G1T:
                    dataOut = G1(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition,
                                                  cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten,
                                                  outList));
                    break;

                case ClusterType.G1V:
                    dataOut = G1(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                                  cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat,
                                                  outList));
                    break;

                case ClusterType.G2I:
                    dataOut = G2(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    break;

                case ClusterType.G2T:
                    dataOut = G2(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount,
                                         cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList),
                        cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten);
                    break;

                case ClusterType.G2V:
                    dataOut = G2(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount,
                                         cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList),
                        cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    break;

                case ClusterType.G3I:
                    dataOut = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    break;

                case ClusterType.G3T:
                    dataOut = G3(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount,
                                         cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList),
                        cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten);
                    break;

                case ClusterType.G3V:
                    dataOut = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    break;

                case ClusterType.G4I:
                    dataOut = G4(GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                                  cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int,
                                                  outList));
                    break;

                case ClusterType.G4T:
                    dataOut = G4(GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition,
                                                  cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten,
                                                  outList));
                    break;

                case ClusterType.G4V:
                    dataOut = G4(GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                                  cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat,
                                                  outList));
                    break;

                /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //                                            start of G13 - G25                                               //
                /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                case ClusterType.G13:
                    d1 = G1(GroupInitializer(cluster.Vat0.Partition,
                                             cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile,
                                             infectedfile, OutType.Vat, outList));
                    d2 = G2(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    d3 = G2(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount,
                                         cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList),
                        cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G14:
                    d1 = G1(GroupInitializer(cluster.Vat0.Partition,
                                             cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile,
                                             infectedfile, OutType.Vat, outList));
                    d2 = G2(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount,
                                         cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList),
                        cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten);
                    d3 = G2(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G15:
                    d1 = G1(GroupInitializer(cluster.Ten0.Partition,
                                             cluster.Ten0.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile,
                                             infectedfile, OutType.Ten, outList));
                    d2 = G2(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount,
                                         cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList),
                        cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten);
                    d3 = G2(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G16:
                    d1 = G1(GroupInitializer(cluster.Int0.Partition,
                                             cluster.Int0.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile,
                                             infectedfile, OutType.Int, outList));
                    d2 = G2(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition, cluster.HealthyIntCount,
                                         cluster.InfectedIntCount, healthyfile, infectedfile, OutType.Int, outList),
                        cluster.Int0.Partition, cluster.Int1.Partition, OutType.Int);
                    d3 = G2(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount,
                                         cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList),
                        cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G17:
                    d1 = G1(GroupInitializer(cluster.Vat0.Partition,
                                             cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile,
                                             infectedfile, OutType.Vat, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G18:
                    d1 = G1(GroupInitializer(cluster.Int0.Partition,
                                             cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile,
                                             infectedfile, OutType.Int, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G19:
                    d1 = G1(GroupInitializer(cluster.Ten0.Partition,
                                             cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile,
                                             infectedfile, OutType.Ten, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G20:
                    d1 = G1(GroupInitializer(cluster.Ten0.Partition,
                                             cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile,
                                             infectedfile, OutType.Ten, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G21:
                    d1 = G4(GroupInitializer(cluster.Vat0.Partition,
                                             cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile,
                                             infectedfile, OutType.Vat, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G22:
                    d1 = G4(GroupInitializer(cluster.Int0.Partition,
                                             cluster.Int1.Partition, cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile,
                                             infectedfile, OutType.Int, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G23:
                    d1 = G4(GroupInitializer(cluster.Ten0.Partition,
                                             cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile,
                                             infectedfile, OutType.Ten, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Int0.Partition, cluster.Int1.Partition,
                                         cluster.HealthyIntCount, cluster.InfectedIntCount, healthyfile, infectedfile,
                                         OutType.Int, outList), cluster.Int0.Partition, cluster.Int1.Partition,
                        OutType.Int);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition,
                                         cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile, infectedfile,
                                         OutType.Vat, outList), cluster.Vat0.Partition, cluster.Vat1.Partition,
                        OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G24:
                    d1 = G4(GroupInitializer(cluster.Ten0.Partition,
                                             cluster.Ten1.Partition, cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile,
                                             infectedfile, OutType.Ten, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition,
                                         cluster.HealthyTenCount, cluster.InfectedTenCount, healthyfile, infectedfile,
                                         OutType.Ten, outList), cluster.Ten0.Partition, cluster.Ten1.Partition,
                        OutType.Ten);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount,
                                         cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList),
                        cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;

                case ClusterType.G25:
                    d1 = G4(GroupInitializer(cluster.Vat0.Partition,
                                             cluster.Vat1.Partition, cluster.HealthyVatCount, cluster.InfectedVatCount, healthyfile,
                                             infectedfile, OutType.Vat, outList));
                    d2 = G3(
                        GroupInitializer(cluster.Ten0.Partition, cluster.Ten1.Partition, cluster.HealthyTenCount,
                                         cluster.InfectedTenCount, healthyfile, infectedfile, OutType.Ten, outList),
                        cluster.Ten0.Partition, cluster.Ten1.Partition, OutType.Ten);
                    d3 = G3(
                        GroupInitializer(cluster.Vat0.Partition, cluster.Vat1.Partition, cluster.HealthyVatCount,
                                         cluster.InfectedVatCount, healthyfile, infectedfile, OutType.Vat, outList),
                        cluster.Vat0.Partition, cluster.Vat1.Partition, OutType.Vat);
                    dataOut = d1.Union(d2).Where(x => !d3.Contains(x)).OrderBy(x => x.Bacteria).Distinct().ToList();
                    break;
                }
            }

            return(dataOut);
        }
Exemple #21
0
 public HIntegrityClust(LightWeightGraph data, int k, bool weighted, double alpha = 1.0f, double beta = 0.0f, bool reassignNodes = true, bool hillClimb = true)
     : this(k, weighted, null, alpha, beta, reassignNodes, hillClimb)
 {
     _data = data;
 }
        /// <summary>
        /// combineClusters is used when the partitioning achieved has too many clusters.
        /// </summary>
        /// <param name="partition">A partitioning of a graph with any number of clusters</param>
        /// <param name="minK">The desired number of clusters</param>
        /// <returns>A new partitioning with the desired number of clusters</returns>
        public Partition combineClustersOld(Partition partition, int minK)
        {
            int[,] connections = new int[partition.Clusters.Count, partition.Clusters.Count];
            LightWeightGraph g = (LightWeightGraph)_data;

            // for quick reference let's make a list of which nodes are in which clusters
            int[] clustAssignments = new int[g.Nodes.Count()];
            for (int i = 0; i < partition.Clusters.Count; i++)
            {
                for (int j = 0; j < partition.Clusters[i].Points.Count; j++)
                {
                    clustAssignments[partition.Clusters[i].Points[j].Id] = partition.Clusters[i].Points[j].ClusterId;
                }
            }
            // now go through each node and count its edges out to each cluster
            // add these edges to the connections[] matrix
            for (int i = 0; i < g.Nodes.Count(); i++)
            {
                int currentCluster = clustAssignments[i];
                for (int e = 0; e < g.Nodes[i].Edge.Count(); e++)
                {
                    int adjacentNode    = g.Nodes[i].Edge[e];
                    int adjacentCluster = clustAssignments[adjacentNode];
                    connections[currentCluster, adjacentCluster]++;
                }
            }
            // we want to do (partition.Clusters.count - minK) merges
            // keep a list of which partitions will be merged
            List <int> merges = new List <int>();

            for (int numMerges = 0; numMerges < partition.Clusters.Count - minK; numMerges++)
            {
                // find the largest connections[i,j] and merge clusters i and j
                int    largestI     = 0;
                int    largestJ     = 0;
                double largestValue = 0;
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    for (int j = 0; j < partition.Clusters.Count; j++)
                    {
                        if (j <= i)
                        {
                            continue;
                        }
                        int    sizeI = partition.Clusters[i].Points.Count;
                        int    sizeJ = partition.Clusters[j].Points.Count;
                        double score = ((double)connections[i, j]) / (sizeI * sizeJ);
                        if (score > largestValue)
                        {
                            largestValue = score;
                            largestI     = i;
                            largestJ     = j;
                        }
                    }
                }
                // if everything's zero, there is no hope ;-)
                if (largestValue == 0)
                {
                    continue;
                }
                merges.Add(largestI);
                merges.Add(largestJ);
                // it is possible to merge J multiple times, if its nodes are split between clusters.
                // we only want to merget J once, so we need to zero out all largestJ
                for (int i = 0; i < partition.Clusters.Count; i++)
                {
                    connections[i, largestJ] = 0;
                }
            }
            // now we have the list *merges*, the idea is to take 2 numbers off it,
            // the first is smaller than the second.  We need to merge the second into the first,
            // remove the second, and renumber all clusters after the first
            for (int numMerges = 0; numMerges < merges.Count / 2; numMerges++)
            {
                int firstCluster  = merges[numMerges * 2];
                int secondCluster = merges[(numMerges * 2) + 1];

                // adds the points of the second cluster to the first cluster
                for (int i = 0; i < partition.Clusters[secondCluster].Points.Count; i++)
                {
                    partition.Clusters[firstCluster].Points.Add(partition.Clusters[secondCluster].Points[i]);
                }
            }
            // remove all the second clusters (count from the bottom
            // so that the numbering doesn't get messed up...)
            int[] toRemove = new int[merges.Count / 2];
            for (int numMerges = 0; numMerges < merges.Count / 2; numMerges++)
            {
                int firstCluster  = merges[numMerges * 2];
                int secondCluster = merges[(numMerges * 2) + 1];

                toRemove[numMerges] = secondCluster;
            }
            Array.Sort(toRemove);
            for (int i = toRemove.Length - 1; i >= 0; i--)
            {
                partition.Clusters.RemoveAt(toRemove[i]);
            }

            // renumber the clusters
            for (int i = 0; i < partition.Clusters.Count; i++)
            {
                partition.Clusters[i].Points.Sort();
                partition.Clusters[i].ClusterId = i;
            }
            return(partition);
        }
Exemple #23
0
        public LightWeightGraph GetNormalizedRandomGraph(DistanceMatrix d)
        {
            int numNodes = d.Count;
            var nodes    = new LightWeightGraph.LightWeightNode[numNodes];
            // make an array to hold all possible edges, less the edges in the mst
            List <oneNode> myDistances = new List <oneNode>();

            LightWeightGraph mst = LightWeightGraph.GetStackedMST(d, 1);

            LightWeightGraph.LightWeightNode[] mstNodes = mst.Nodes;

            //Create a list to hold edge values
            List <int>[]    edges   = new List <int> [numNodes];
            List <double>[] weights = new List <double> [numNodes];
            for (int i = 0; i < numNodes; i++)
            {
                edges[i]   = new List <int>();
                weights[i] = new List <double>();
            }

            double largestMSTEdge = 0.0;

            // add edges from the mst to the edges list, to facilitate adding additional edges later on
            // Also find the largest edge to use as a cutoff
            for (int i = 0; i < numNodes; i++)
            {
                for (int j = 0; j < mstNodes[i].Edge.Length; j++)
                {
                    largestMSTEdge = Math.Max(largestMSTEdge, mst[i].EdgeWeights[j]);
                    edges[i].Add(mstNodes[i].Edge[j]);
                    weights[i].Add(mstNodes[i].EdgeWeights[j]);
                }
            }
            largestMSTEdge *= _cutoffProp;
            // cycle through each possible edge
            // if the edge exists in the mst, continue
            // otherwise, add the edge to distances array, and add distance to the edge to the cummulative total
            for (int i = 0; i < numNodes - 1; i++)
            {
                for (int j = i + 1; j < numNodes; j++)
                {
                    double dist = d[i, j];
                    if (dist >= largestMSTEdge || mstNodes[i].Edge.Contains(j))
                    {
                        continue;
                    }
                    else
                    {
                        //Probability function goes here

                        double  addlProb = _normAlpha * (1.0 / Math.Exp(_xScale * dist / largestMSTEdge));
                        oneNode nd       = new oneNode {
                            prob = addlProb, fromNode = i, toNode = j, alreadyExists = false
                        };
                        myDistances.Add(nd);
                    }
                }
            }

            // walk through the array until you find the random number
            for (int m = 0; m < myDistances.Count; m++)
            {
                double rand = Utility.Util.Rng.NextDouble();
                if (myDistances[m].prob > rand)
                {
                    // we have found the edge to add.
                    // add the edge if it does not already exist
                    if (!myDistances[m].alreadyExists)
                    {
                        int    from = myDistances[m].fromNode;
                        int    to   = myDistances[m].toNode;
                        double dist = d[from, to];
                        edges[from].Add(to);
                        weights[from].Add(dist);
                        edges[to].Add(from);
                        weights[to].Add(dist);
                        myDistances[m].alreadyExists = true;
                    }
                }
            }

            for (int i = 0; i < numNodes; i++)
            {
                nodes[i] = new LightWeightGraph.LightWeightNode(i, true, edges[i], weights[i]);
            }

            return(new LightWeightGraph(nodes, true));
        }
Exemple #24
0
        public Partition GetPartition()
        {
            DistanceMatrix mat = null;

            if (_data.Type == AbstractDataset.DataType.DistanceMatrix)
            {
                mat = (DistanceMatrix)_data;
            }
            else if (_data.Type == AbstractDataset.DataType.PointSet)
            {
                mat = ((PointSet)_data).GetDistanceMatrix();
            }

            //Setup our partition with a single cluster, with all points
            List <Cluster> clusterList = new List <Cluster> {
                new Cluster(0, Enumerable.Range(0, _data.Count).ToList())
            };
            Partition partition = new Partition(clusterList, _data);

            //Dictionary to hold VAT
            var vatMap = new Dictionary <int, Integrity>();

            //Dictionary to hold subset array
            var subsetMap = new Dictionary <int, int[]>();

            while (clusterList.Count < _minK)
            {
                //Calculate the VAT for all values
                foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId)))
                {
                    //We must calculate a graph for this subset of data
                    List <int> clusterSubset = c.Points.Select(p => p.Id).ToList();

                    //Now calculate Vat
                    LightWeightGraph lwg;
                    if (_data.Type == AbstractDataset.DataType.Graph)
                    {
                        bool[] exclusion = new bool[_data.Count];
                        for (int i = 0; i < _data.Count; i++)
                        {
                            exclusion[i] = true;
                        }
                        foreach (var p in c.Points)
                        {
                            exclusion[p.Id] = false;
                        }
                        lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion);
                    }
                    else //Distance matrix or Pointset
                    {
                        Debug.Assert(mat != null, "mat != null");
                        var subMatrix = mat.GetReducedDataSet(clusterSubset);

                        //Generate our graph
                        lwg = _graphGen.GenerateGraph(subMatrix.Mat);
                    }

                    subsetMap.Add(c.ClusterId, clusterSubset.ToArray());
                    lwg.IsWeighted = _weighted;
                    Integrity v = new Integrity(lwg, _reassignNodes, _alpha, _beta);
                    _vatNodeRemovalOrder = v.NodeRemovalOrder;
                    _vatNumNodesRemoved  = v.NumNodesRemoved;
                    if (_hillClimb)
                    {
                        v.HillClimb();
                    }
                    ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta);
                    vatMap.Add(c.ClusterId, v);
                }

                meta.AppendLine("All calculated Integritys:");
                //Now find the minimum vat value
                int    minVatCluster = 0;
                double minVatValue   = double.MaxValue;
                foreach (var c in vatMap)
                {
                    meta.Append(String.Format("{0} ", c.Value.MinVat));
                    if (c.Value.MinVat < minVatValue)
                    {
                        minVatCluster = c.Key;
                        minVatValue   = c.Value.MinVat;
                    }
                }
                meta.AppendLine();

                //now merge the partition into the cluster
                var minVAT       = vatMap[minVatCluster];
                var subPartition = minVAT.GetPartition();
                var nodeIndexMap = subsetMap[minVatCluster];

                meta.AppendFormat("Integrity: MinIntegrity={0}\r\n", minVAT.MinVat);
                meta.AppendFormat("Removed Count:{0} \r\n", minVAT.NumNodesRemoved);
                meta.AppendLine(String.Join(",",
                                            minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c])));

                partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster);
                vatMap.Remove(minVatCluster);
                subsetMap.Remove(minVatCluster);
            }
            partition.MetaData = meta.ToString();
            return(partition);
        }
Exemple #25
0
        /// <summary>
        /// Creates a random graph, based on an mst.
        /// </summary>
        /// <param name="d">distance matrix used to construct the graph</param>
        /// <param name="alpha">the resulting average degree of the graph</param>
        /// /// <param name="expP">The probability of adding an edge depends on its distance: 1/d^expP</param>
        /// <returns></returns>
        public static LightWeightGraph GetRandomGraph(DistanceMatrix d, int alpha, double expP)
        {
            int numNodes = d.Count;

            var nodes = new LightWeightGraph.LightWeightNode[numNodes];

            // make an array to hold all possible edges, less the edges in the mst
            oneNode[] myDistances = new oneNode[numNodes * (numNodes - 1) / 2 - (numNodes - 1)];

            LightWeightGraph mst = LightWeightGraph.GetStackedMST(d, 1);

            LightWeightGraph.LightWeightNode[] mstNodes = mst.Nodes;
            int    myDistancesIndex = 0;
            double myDistancesTotal = 0;

            //Create a list to hold edge values
            List <int>[]    edges   = new List <int> [numNodes];
            List <double>[] weights = new List <double> [numNodes];
            for (int i = 0; i < numNodes; i++)
            {
                edges[i] = new List <int>();
            }

            // add edges from the mst to the edges list, to facilitate adding additional edges later on
            for (int i = 0; i < numNodes; i++)
            {
                for (int j = 0; j < mstNodes[i].Edge.Length; j++)
                {
                    edges[i].Add(mstNodes[i].Edge[j]);
                }
            }


            // cycle through each possible edge
            // if the edge exists in the mst, continue
            // otherwise, add the edge to distances array, and add distance to the edge to the cummulative total
            for (int i = 0; i < numNodes - 1; i++)
            {
                for (int j = i + 1; j < numNodes; j++)
                {
                    if (mstNodes[i].Edge.Contains(j))
                    {
                        continue;
                    }
                    else
                    {
                        double addlProb = 1.0 / Math.Pow(d[i, j], expP);
                        myDistancesTotal += addlProb;
                        oneNode nd = new oneNode {
                            prob = myDistancesTotal, fromNode = i, toNode = j, alreadyExists = false
                        };
                        myDistances[myDistancesIndex] = nd;
                        myDistancesIndex++;
                    }
                }
            }

            // how many edges do we want to add?
            int desiredNewEdges = (alpha * numNodes) - (numNodes - 1);

            Random rnd = Utility.Util.Rng;

            // add edges randomly until we have added the desired number of edges
            while (desiredNewEdges > 0)
            {
                // generate a random number between 0 and myDistancesTotal
                double rand = rnd.NextDouble() * myDistancesTotal;

                // walk through the array until you find the random number
                for (int m = 0; m < myDistances.Length; m++)
                {
                    if (myDistances[m].prob > rand)
                    {
                        // we have found the edge to add.
                        // add the edge if it does not already exist
                        if (!myDistances[m].alreadyExists)
                        {
                            edges[myDistances[m].fromNode].Add(myDistances[m].toNode);
                            edges[myDistances[m].toNode].Add(myDistances[m].fromNode);
                            myDistances[m].alreadyExists = true;
                            desiredNewEdges--;
                            break;
                        }
                    }
                }
            }
            for (int i = 0; i < numNodes; i++)
            {
                nodes[i] = new LightWeightGraph.LightWeightNode(i, true, edges[i], weights[i]);
            }

            return(new LightWeightGraph(nodes, true));
        }