//This will calculate the errors //first it must ask what column to use as a label private void externalEvalCalculateButton_Click(object sender, EventArgs e) { //start by parsing label file DelimitedFile delimitedLabelFile = new DelimitedFile(externalEvalLabelText.Text); int labelCol = Prompt.ShowDialog("Enter the Column to use", "Select Attribute", 1, delimitedLabelFile.Data[0].Length); LabelList labels = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1)); //get the Partion file Partition clusterFile = new Partition(externalEvalClusterText.Text); int countOfPoints = clusterFile.DataCount; //create a count mapping //[actual cluster label, number in found clusters] int[,] clusterMatching = new int[labels.UniqueLabels.Count, clusterFile.Clusters.Count]; foreach (Cluster c in clusterFile.Clusters) { foreach (ClusteredItem k in c.Points) { int actualMatching = labels.LabelIndices[k.Id]; int foundMatching = k.ClusterId; clusterMatching[actualMatching, foundMatching]++; } } //One-To-One Mapping like Darla's String greedyError = ExternalEval.GreedyErrorEval(clusterFile, labels, clusterMatching); externalEvalResultText.Text = greedyError; }
public static double RandIndex(String labelFile, String clusterFileName) { //CALCULATING THE RAND INDEX //start by parsing label file DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile); int labelCol = delimitedLabelFile.Data[0].Length; LabelList labels = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1)); //get the Partion file Partition clusterFile = new Partition(clusterFileName); int[] assignments = new int[labels.LabelIndices.Length]; for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++) { int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId; int id = clusterFile.Clusters[cluster].Points[j].Id; assignments[id] = clusterid; } } // compare two arrays, assigments and labels.LabelIndices int a = 0; int b = 0; for (int i = 0; i < assignments.Length; i++) { for (int j = i + 1; j < assignments.Length; j++) { //Check for case a -> i and j are in same cluster in assignments and LabelIndices if (labels.LabelIndices[i] == labels.LabelIndices[j] && assignments[i] == assignments[j]) { a++; } else if (labels.LabelIndices[i] != labels.LabelIndices[j] && assignments[i] != assignments[j]) { b++; } } } int denominator = assignments.Length * (assignments.Length - 1) / 2; double randIndex = (a + b) / (double)denominator; //return "Group A: " + a + " Group B: " + b + " RandIndex: " + randIndex; return(randIndex); }
//This will calculate the errors //first it must ask what column to use as a label private void externalEvalCalculateButton_Click(object sender, EventArgs e) { //start by parsing label file DelimitedFile delimitedLabelFile = new DelimitedFile(externalEvalLabelText.Text); int labelCol = Prompt.ShowDialog("Enter the Column to use", "Select Attribute", 1, delimitedLabelFile.Data[0].Length); LabelList labels = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1)); //get the Partion file Partition clusterFile = new Partition(externalEvalClusterText.Text); //Calculate the Error ExternalEval error = new ExternalEval(clusterFile, labels); externalEvalResultText.Text = error.TextResults; }
private void button19_Click_1(object sender, EventArgs e) { int somDim = (int)somWidth.Value; NetMining.Data.PointSet data = new PointSet(textBox4.Text); HexagonalSelfOrganizingMap hSOM = new HexagonalSelfOrganizingMap(data, somDim, 0.3); hSOM.runLargeEpochs(0.2, 1); hSOM.runLargeEpochs(0.05, 2); hSOM.runLargeEpochs(0.01, 2); //hSOM.runLargeEpochs(0.01, 4); //hSOM.runLargeEpochs(0.005, 6); //Setup out labels DelimitedFile f = new DelimitedFile(SOMLabelTextbox.Text); int labelIndex = Prompt.ShowDialog("Select Label Index", "Label File", 1, f.Data[0].Length) - 1; String[] labels = f.GetColumn(labelIndex); //Now build our array of indicies List <String> labelNames = new List <string>(); int[] labelIndexArr = new int[data.Count]; for (int i = 0; i < data.Count; i++) { if (!labelNames.Contains(labels[i])) { labelNames.Add(labels[i]); } labelIndexArr[i] = labelNames.IndexOf(labels[i]); } var bmp = hSOM.GetUMatrix(10, labelNames.Count, labelIndexArr); bmp[0].Save("test" + somDim.ToString() + ".bmp"); bmp[1].Save("count" + somDim.ToString() + ".bmp"); for (int i = 2; i < bmp.Count; i++) { bmp[i].Save("count" + somDim.ToString() + "class_" + (i - 1).ToString() + ".bmp"); } MessageBox.Show("Done!"); }
public static double Purity(String labelFile, String clusterFileName) { //start by parsing label file DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile); int labelCol = delimitedLabelFile.Data[0].Length; LabelList labels = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1)); //get the Partion file Partition clusterFile = new Partition(clusterFileName); int[] majority = new int[clusterFile.Clusters.Count]; for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { int[] assignments = new int[labels.UniqueLabels.Count]; for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++) { int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId; int id = clusterFile.Clusters[cluster].Points[j].Id; assignments[labels.LabelIndices[id]]++; } // now find the max of assignments int maxAssign = 0; for (int k = 0; k < assignments.Length; k++) { if (assignments[k] > maxAssign) { maxAssign = assignments[k]; } } majority[cluster] = maxAssign; } // add up majority[] and divide by number of vertices int total = 0; for (int i = 0; i < majority.Length; i++) { total += majority[i]; } return((double)total / labels.LabelIndices.Length); }
public static String CheckForNoise(String labelFile, String clusterFileName) { // need to calculate ns, ms and cs, as described in Yang and Leskovec ICDM2012 //start by parsing label file DelimitedFile delimitedLabelFile = new DelimitedFile(labelFile); int labelCol = delimitedLabelFile.Data[0].Length; LabelList labels = new LabelList(delimitedLabelFile.GetColumn(labelCol - 1)); //get the Partion file Partition clusterFile = new Partition(clusterFileName); int[] assignments = new int[labels.LabelIndices.Length]; // initialize assignments array to -1 // ultimately, nodes that have been removed as part of a critical attack set will stay at -1 assignment for (int i = 0; i < assignments.Length; i++) { assignments[i] = -1; } int noiseThreshold; //if (assignments.Length == 550) noiseThreshold = 500; //else if (assignments.Length == 770) noiseThreshold = 700; //else noiseThreshold = 1100; if (assignments.Length == 220) { noiseThreshold = 200; } else if (assignments.Length == 440) { noiseThreshold = 400; } else { noiseThreshold = 800; } for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++) { int clusterid = clusterFile.Clusters[cluster].Points[j].ClusterId; int id = clusterFile.Clusters[cluster].Points[j].Id; assignments[id] = clusterid; } } int[] ns = new int[clusterFile.Clusters.Count]; int[] ms = new int[clusterFile.Clusters.Count]; int[] cs = new int[clusterFile.Clusters.Count]; Boolean[] isAllNoise = new Boolean[clusterFile.Clusters.Count]; // if we're doing this without reassign, we need new nodes and edges valuse int edges = 0; int nodes = 0; for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { ns[cluster] = clusterFile.Clusters[cluster].Points.Count; isAllNoise[cluster] = true; for (int j = 0; j < clusterFile.Clusters[cluster].Points.Count; j++) // for each vertex in this cluster { nodes++; if (clusterFile.Clusters[cluster].Points[j].Id < noiseThreshold) { isAllNoise[cluster] = false; } for (int k = 0; k < clusterFile.Graph.Nodes[clusterFile.Clusters[cluster].Points[j].Id].Edge.Length; k++) // for each edge k adjacent to j { edges++; int edge = clusterFile.Graph.Nodes[clusterFile.Clusters[cluster].Points[j].Id].Edge[k]; if (cluster == assignments[edge]) { ms[cluster]++; //if (cluster == 7) Console.WriteLine("ms " + edge); } else { cs[cluster]++; //if (cluster == 7) Console.WriteLine("cs " + edge); } } } } String report = ""; double[] internalDensity = new double[clusterFile.Clusters.Count]; double[] averageDegree = new double[clusterFile.Clusters.Count]; double[] expansion = new double[clusterFile.Clusters.Count]; double[] cutRatio = new double[clusterFile.Clusters.Count]; double[] conductance = new double[clusterFile.Clusters.Count]; double[] separability = new double[clusterFile.Clusters.Count]; double WAinternalDensity = 0; double WAaverageDegree = 0; double WAexpansion = 0; double WAcutRatio = 0; double WAconductance = 0; double WAseparability = 0; for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { double totalPossibleInternalEdges = ((ns[cluster] * (ns[cluster] - 1)) / 2); internalDensity[cluster] = totalPossibleInternalEdges == 0 ? 0 : (double)ms[cluster] / totalPossibleInternalEdges; averageDegree[cluster] = 2.0 * ms[cluster] / ns[cluster]; expansion[cluster] = (double)cs[cluster] / ns[cluster]; cutRatio[cluster] = (double)cs[cluster] / (ns[cluster] * (assignments.Length - ns[cluster])); conductance[cluster] = (double)cs[cluster] / (2 * ms[cluster] + cs[cluster]); separability[cluster] = (double)ms[cluster] / cs[cluster]; } for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { WAinternalDensity += internalDensity[cluster] * ns[cluster]; WAaverageDegree += averageDegree[cluster] * ns[cluster]; WAexpansion += expansion[cluster] * ns[cluster]; WAcutRatio += cutRatio[cluster] * ns[cluster]; WAconductance += conductance[cluster] * ns[cluster]; WAseparability += separability[cluster] * ns[cluster]; } WAinternalDensity /= (double)nodes; WAaverageDegree /= (double)nodes; WAexpansion /= (double)nodes; WAcutRatio /= (double)nodes; WAconductance /= (double)nodes; WAseparability /= (double)nodes; for (int cluster = 0; cluster < clusterFile.Clusters.Count; cluster++) { report += clusterFileName.Substring(clusterFileName.LastIndexOf('\\') + 1) + "," + cluster + "," + (isAllNoise[cluster] ? 1 : 0) + "," + ns[cluster] + "," + ms[cluster] + "," + cs[cluster] + "," + internalDensity[cluster] + "," + internalDensity.Min() + "," + WAinternalDensity + "," + internalDensity.Max() + //"," + averageDegree[cluster] + "," + averageDegree.Min() + "," + averageDegree.Average() + "," + averageDegree.Max() + "," + averageDegree[cluster] + "," + averageDegree.Min() + "," + WAaverageDegree + "," + averageDegree.Max() + "," + expansion[cluster] + "," + expansion.Min() + "," + WAexpansion + "," + expansion.Max() + "," + cutRatio[cluster] + "," + cutRatio.Min() + "," + WAcutRatio + "," + cutRatio.Max() + "," + conductance[cluster] + "," + conductance.Min() + "," + WAconductance + "," + conductance.Max() + "," + separability[cluster] + "," + separability.Min() + "," + WAseparability + "," + separability.Max() + "\n"; } return(report); }
/// <summary> /// Creates a LabelList /// </summary> /// <param name="f">File holding the label column</param> /// <param name="col">0-Based index of the label column</param> public LabelList(DelimitedFile f, int col) : this(f.GetColumn(col)) { }