Exemplo n.º 1
0
        //This will calculate the errors
        //first it must ask what column to use as a label
        private void externalEvalCalculateButton_Click(object sender, EventArgs e)
        {
            //start by parsing label file
            DelimitedFile delimitedLabelFile = new DelimitedFile(externalEvalLabelText.Text);
            int           labelCol           = Prompt.ShowDialog("Enter the Column to use", "Select Attribute", 1, delimitedLabelFile.Data[0].Length);
            LabelList     labels             = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1));

            //get the Partion file
            Partition clusterFile   = new Partition(externalEvalClusterText.Text);
            int       countOfPoints = clusterFile.DataCount;

            //create a count mapping
            //[actual cluster label, number in found clusters]
            int[,] clusterMatching = new int[labels.UniqueLabels.Count, clusterFile.Clusters.Count];
            foreach (Cluster c in clusterFile.Clusters)
            {
                foreach (ClusteredItem k in c.Points)
                {
                    int actualMatching = labels.LabelIndices[k.Id];
                    int foundMatching  = k.ClusterId;
                    clusterMatching[actualMatching, foundMatching]++;
                }
            }

            //One-To-One Mapping like Darla's
            String greedyError = ExternalEval.GreedyErrorEval(clusterFile, labels, clusterMatching);

            externalEvalResultText.Text = greedyError;
        }
Exemplo n.º 2
0
        public static double RandIndex(String labelFile, String clusterFileName)
        {
            //CALCULATING THE RAND INDEX

            //start by parsing label file
            DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile);
            int           labelCol           = delimitedLabelFile.Data[0].Length;
            LabelList     labels             = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1));

            //get the Partion file
            Partition clusterFile = new Partition(clusterFileName);

            int[] assignments = new int[labels.LabelIndices.Length];

            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++)
                {
                    int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId;
                    int id        = clusterFile.Clusters[cluster].Points[j].Id;
                    assignments[id] = clusterid;
                }
            }

            // compare two arrays, assigments and labels.LabelIndices
            int a = 0;
            int b = 0;

            for (int i = 0; i < assignments.Length; i++)
            {
                for (int j = i + 1; j < assignments.Length; j++)
                {
                    //Check for case a -> i and j are in same cluster in assignments and LabelIndices
                    if (labels.LabelIndices[i] == labels.LabelIndices[j] && assignments[i] == assignments[j])
                    {
                        a++;
                    }
                    else if (labels.LabelIndices[i] != labels.LabelIndices[j] && assignments[i] != assignments[j])
                    {
                        b++;
                    }
                }
            }

            int    denominator = assignments.Length * (assignments.Length - 1) / 2;
            double randIndex   = (a + b) / (double)denominator;

            //return "Group A: " + a + " Group B: " + b + " RandIndex: " + randIndex;
            return(randIndex);
        }
        //This will calculate the errors
        //first it must ask what column to use as a label
        private void externalEvalCalculateButton_Click(object sender, EventArgs e)
        {
            //start by parsing label file
            DelimitedFile delimitedLabelFile = new DelimitedFile(externalEvalLabelText.Text);
            int           labelCol           = Prompt.ShowDialog("Enter the Column to use", "Select Attribute", 1, delimitedLabelFile.Data[0].Length);
            LabelList     labels             = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1));

            //get the Partion file
            Partition clusterFile = new Partition(externalEvalClusterText.Text);

            //Calculate the Error
            ExternalEval error = new ExternalEval(clusterFile, labels);

            externalEvalResultText.Text = error.TextResults;
        }
Exemplo n.º 4
0
        private void button19_Click_1(object sender, EventArgs e)
        {
            int somDim = (int)somWidth.Value;

            NetMining.Data.PointSet    data = new PointSet(textBox4.Text);
            HexagonalSelfOrganizingMap hSOM = new HexagonalSelfOrganizingMap(data, somDim, 0.3);

            hSOM.runLargeEpochs(0.2, 1);
            hSOM.runLargeEpochs(0.05, 2);
            hSOM.runLargeEpochs(0.01, 2);
            //hSOM.runLargeEpochs(0.01, 4);
            //hSOM.runLargeEpochs(0.005, 6);

            //Setup out labels
            DelimitedFile f          = new DelimitedFile(SOMLabelTextbox.Text);
            int           labelIndex = Prompt.ShowDialog("Select Label Index", "Label File", 1, f.Data[0].Length) - 1;

            String[] labels = f.GetColumn(labelIndex);
            //Now build our array of indicies
            List <String> labelNames = new List <string>();

            int[] labelIndexArr = new int[data.Count];
            for (int i = 0; i < data.Count; i++)
            {
                if (!labelNames.Contains(labels[i]))
                {
                    labelNames.Add(labels[i]);
                }
                labelIndexArr[i] = labelNames.IndexOf(labels[i]);
            }

            var bmp = hSOM.GetUMatrix(10, labelNames.Count, labelIndexArr);

            bmp[0].Save("test" + somDim.ToString() + ".bmp");
            bmp[1].Save("count" + somDim.ToString() + ".bmp");
            for (int i = 2; i < bmp.Count; i++)
            {
                bmp[i].Save("count" + somDim.ToString() + "class_" + (i - 1).ToString() + ".bmp");
            }
            MessageBox.Show("Done!");
        }
Exemplo n.º 5
0
        public static double Purity(String labelFile, String clusterFileName)
        {
            //start by parsing label file
            DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile);
            int           labelCol           = delimitedLabelFile.Data[0].Length;
            LabelList     labels             = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1));

            //get the Partion file
            Partition clusterFile = new Partition(clusterFileName);

            int[] majority = new int[clusterFile.Clusters.Count];

            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                int[] assignments = new int[labels.UniqueLabels.Count];
                for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++)
                {
                    int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId;
                    int id        = clusterFile.Clusters[cluster].Points[j].Id;
                    assignments[labels.LabelIndices[id]]++;
                }
                // now find the max of assignments
                int maxAssign = 0;
                for (int k = 0; k < assignments.Length; k++)
                {
                    if (assignments[k] > maxAssign)
                    {
                        maxAssign = assignments[k];
                    }
                }
                majority[cluster] = maxAssign;
            }
            // add up majority[] and divide by number of vertices
            int total = 0;

            for (int i = 0; i < majority.Length; i++)
            {
                total += majority[i];
            }
            return((double)total / labels.LabelIndices.Length);
        }
        public static String CheckForNoise(String labelFile, String clusterFileName)
        {
            // need to calculate ns, ms and cs, as described in Yang and Leskovec ICDM2012
            //start by parsing label file
            DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile);
            int           labelCol           = delimitedLabelFile.Data[0].Length;
            LabelList     labels             = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1));

            //get the Partion file
            Partition clusterFile = new Partition(clusterFileName);

            int[] assignments = new int[labels.LabelIndices.Length];
            // initialize assignments array to -1
            // ultimately, nodes that have been removed as part of a critical attack set will stay at -1 assignment
            for (int i = 0; i < assignments.Length; i++)
            {
                assignments[i] = -1;
            }
            int noiseThreshold;

            //if (assignments.Length == 550) noiseThreshold = 500;
            //else if (assignments.Length == 770) noiseThreshold = 700;
            //else noiseThreshold = 1100;

            if (assignments.Length == 220)
            {
                noiseThreshold = 200;
            }
            else if (assignments.Length == 440)
            {
                noiseThreshold = 400;
            }
            else
            {
                noiseThreshold = 800;
            }


            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++)
                {
                    int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId;
                    int id        = clusterFile.Clusters[cluster].Points[j].Id;
                    assignments[id] = clusterid;
                }
            }
            int[]     ns         = new int[clusterFile.Clusters.Count];
            int[]     ms         = new int[clusterFile.Clusters.Count];
            int[]     cs         = new int[clusterFile.Clusters.Count];
            Boolean[] isAllNoise = new Boolean[clusterFile.Clusters.Count];
            // if we're doing this without reassign, we need new nodes and edges valuse
            int edges = 0;
            int nodes = 0;

            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                ns[cluster]         = clusterFile.Clusters[cluster].Points.Count;
                isAllNoise[cluster] = true;
                for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++) // for each vertex in this cluster
                {
                    nodes++;
                    if (clusterFile.Clusters[cluster].Points[j].Id < noiseThreshold)
                    {
                        isAllNoise[cluster] = false;
                    }
                    for (int k = 0; k < clusterFile.Graph.Nodes[clusterFile.Clusters[cluster].Points[j].Id].Edge.Length; k++) // for each edge k adjacent to j
                    {
                        edges++;
                        int edge = clusterFile.Graph.Nodes[clusterFile.Clusters[cluster].Points[j].Id].Edge[k];
                        if (cluster == assignments[edge])
                        {
                            ms[cluster]++;
                            //if (cluster == 7) Console.WriteLine("ms " + edge);
                        }
                        else
                        {
                            cs[cluster]++;
                            //if (cluster == 7) Console.WriteLine("cs " + edge);
                        }
                    }
                }
            }

            String report = "";

            double[] internalDensity   = new double[clusterFile.Clusters.Count];
            double[] averageDegree     = new double[clusterFile.Clusters.Count];
            double[] expansion         = new double[clusterFile.Clusters.Count];
            double[] cutRatio          = new double[clusterFile.Clusters.Count];
            double[] conductance       = new double[clusterFile.Clusters.Count];
            double[] separability      = new double[clusterFile.Clusters.Count];
            double   WAinternalDensity = 0;
            double   WAaverageDegree   = 0;
            double   WAexpansion       = 0;
            double   WAcutRatio        = 0;
            double   WAconductance     = 0;
            double   WAseparability    = 0;

            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                double totalPossibleInternalEdges = ((ns[cluster] * (ns[cluster] - 1)) / 2);
                internalDensity[cluster] = totalPossibleInternalEdges == 0 ? 0 : (double)ms[cluster] / totalPossibleInternalEdges;
                averageDegree[cluster]   = 2.0 * ms[cluster] / ns[cluster];
                expansion[cluster]       = (double)cs[cluster] / ns[cluster];
                cutRatio[cluster]        = (double)cs[cluster] / (ns[cluster] * (assignments.Length - ns[cluster]));
                conductance[cluster]     = (double)cs[cluster] / (2 * ms[cluster] + cs[cluster]);
                separability[cluster]    = (double)ms[cluster] / cs[cluster];
            }
            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                WAinternalDensity += internalDensity[cluster] * ns[cluster];
                WAaverageDegree   += averageDegree[cluster] * ns[cluster];
                WAexpansion       += expansion[cluster] * ns[cluster];
                WAcutRatio        += cutRatio[cluster] * ns[cluster];
                WAconductance     += conductance[cluster] * ns[cluster];
                WAseparability    += separability[cluster] * ns[cluster];
            }

            WAinternalDensity /= (double)nodes;
            WAaverageDegree   /= (double)nodes;
            WAexpansion       /= (double)nodes;
            WAcutRatio        /= (double)nodes;
            WAconductance     /= (double)nodes;
            WAseparability    /= (double)nodes;

            for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++)
            {
                report += clusterFileName.Substring(clusterFileName.LastIndexOf('\\') + 1) + "," + cluster + "," +
                          (isAllNoise[cluster] ? 1 : 0) + "," + ns[cluster] + "," + ms[cluster] + "," + cs[cluster] +
                          "," + internalDensity[cluster] + "," + internalDensity.Min() + "," + WAinternalDensity + "," + internalDensity.Max() +
                          //"," + averageDegree[cluster] + ","  + averageDegree.Min() + "," + averageDegree.Average() + "," + averageDegree.Max() +
                          "," + averageDegree[cluster] + "," + averageDegree.Min() + "," + WAaverageDegree + "," + averageDegree.Max() +
                          "," + expansion[cluster] + "," + expansion.Min() + "," + WAexpansion + "," + expansion.Max() +
                          "," + cutRatio[cluster] + "," + cutRatio.Min() + "," + WAcutRatio + "," + cutRatio.Max() +
                          "," + conductance[cluster] + "," + conductance.Min() + "," + WAconductance + "," + conductance.Max() +
                          "," + separability[cluster] + "," + separability.Min() + "," + WAseparability + "," + separability.Max() + "\n";
            }
            return(report);
        }
 /// <summary>
 /// Creates a LabelList
 /// </summary>
 /// <param name="f">File holding the label column</param>
 /// <param name="col">0-Based index of the label column</param>
 public LabelList(DelimitedFile f, int col) : this(f.GetColumn(col))
 {
 }