public static object KCluster(double[,] Data, int NumClusters) { int nRows = Data.GetLength(0), nCols = Data.GetLength(1); double[][] data = new double[nRows][]; for (int r = 0; r < nRows; ++r) { data[r] = new double[nCols]; for (int c = 0; c < nCols; ++c) { data[r][c] = Data[r, c]; } } // Unfortunately, you can't set the initial cluster centers manually here, which // means that every time this function is called, it will return different clusters. aml.KMeans km = new aml.KMeans(NumClusters); aml.KMeansClusterCollection kcc = km.Learn(data); int nClusters = kcc.Count; double[,] ret = new double[nClusters, nCols]; // TODO - 19Apr20 - needs testing, this used to previously put the mean[c] into ret[x,c]? int x = 0; foreach (KMeansClusterCollection.KMeansCluster cc in kcc.Clusters.OrderBy(xx => xx.Centroid.Mean())) { for (int c = 0; c < nCols; ++c) { ret[x, c] = cc.Centroid.Mean(); } x++; } return(ret); }
private static Tuple <double[][], double[][]> split(double[][] cluster, KMeans kmeans) { kmeans.Randomize(cluster); int[] idx = kmeans.Learn(cluster).Decide(cluster); List <double[]> a = new List <double[]>(); List <double[]> b = new List <double[]>(); for (int i = 0; i < idx.Length; i++) { if (idx[i] == 0) { a.Add(cluster[i]); } else { b.Add(cluster[i]); } } return(Tuple.Create(a.ToArray(), b.ToArray())); }
protected override void SolveInstance(IGH_DataAccess DA) { int n = 0; DA.GetData(0, ref n); List <List <double> > data = new List <List <double> >(); for (int i = 2; i < Params.Input.Count; i++) { List <double> d = new List <double>(); DA.GetDataList(i, d); if (d.Count > 0) { data.Add(d); } } // Declare some observations double[][] observations = new double[data[0].Count][]; for (int i = 0; i < data[0].Count; i++) { List <double> num = new List <double>(); for (int j = 0; j < data.Count; j++) { num.Add(data[j][i]); } observations[i] = num.ToArray(); } //Get Weights List <double> weights = new List <double>(); DA.GetDataList(1, weights); if (weights.Count != data[0].Count) { weights = Enumerable.Repeat(1.0, data[0].Count).ToList(); } //Seed Accord.Math.Random.Generator.Seed = 0; // Create a new K-Means algorithm with n clusters Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(n); KMeansClusterCollection clusters = kmeans.Learn(observations, weights.ToArray()); int[] labels = clusters.Decide(observations); //Message base.Message = "Weights " + weights.Count.ToString() + "\r\n" + "Dimensions " + observations.Length.ToString() + " of length " + observations[0].Length.ToString(); //Output DA.SetDataList(0, labels.ToList()); DataTree <int> dataTree = new DataTree <int>(); for (int i = 0; i < labels.Length; i++) { dataTree.Add(i, new GH_Path(labels[i])); } DA.SetDataTree(1, dataTree); }
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Tuple<double[,], Dictionary<string, int>> StartClustering() { convert_input(); // 1. build adjacency matrix. diagonalMatrix(); // 2.1 build diganoal matrix. buildLmat(); // 2.2 build L = D - adjacency_matrix. calculate_eigendecomposition(); // 3. find k largest eigenvectors. //findEigenvectors(); // 3. find k largest eigenvectors. normalizeVectors(); // 4. normalize eigenvectors. labels = new int[inputSize]; //////ClusterCollection clusters; //////clusters = KMeans.ClusterDataSet(NumofClusters, Y); //////int index = -1; //////double[] temp = new double[NumofClusters]; //////for (int i = 0; i < clusters.Count; i++) //////{ ////// //System.Console.Out.Write(clusters.Count); ////// // System.Console.Out.Write(i + ") ( "); ////// for (int j = 0; j < clusters[i].Count; j++) ////// { ////// // System.Console.Out.Write(clusters[i].Count ); ////// for (int k = 0; k < clusters[i][j].Length; k++) // one point (full row) ////// { ////// temp[k] = clusters[i][j][k]; ////// } ////// index = findCluster(conArrayD(Y), temp); ////// if (index != -1) // cluster and row were matched ////// { ////// labels[index] = i; ////// index = -1; // reset index. ////// } ////// } //////} observations = new double[Y.GetLength(0)][]; for (int i = 0; i < Y.GetLength(0); i++) { observations[i] = new double[Y.GetLength(1)]; for (int j = 0; j < Y.GetLength(1); j++) observations[i][j] = Y[i, j]; } ////////////////observations = new double[X.GetLength(0)][]; ////////////////for (int i = 0; i < X.GetLength(0); i++) ////////////////{ //////////////// observations[i] = new double[X.GetLength(1)]; //////////////// for (int j = 0; j < X.GetLength(1); j++) //////////////// observations[i][j] = X[i, j]; ////////////////} //K-Means Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k: NumofClusters); var clusters = kmeans.Learn(observations); labels = clusters.Decide(observations); words_cluster = new Dictionary<string, int>(); for (int i = 0; i < labels.Length; i++) words_cluster.Add(nodes[i], labels[i]); var tuple = new Tuple<double[,], Dictionary<string, int>>(adjacency_matrix, words_cluster); return tuple; }// end StartClustering
}// end normalizeVectors() /* * Extract one vector from BoW. * input: BoW matrix, number of Vector to extract. * output: Extracted vector. */ private void convert_input() { string[] lines = input_file.ToArray<string>(); Dictionary<string, int> words = new Dictionary<string, int>(); List<string> val = new List<string>(); double[] vector = new double[lines.Length]; int k = 1; for (int i = 0; i < lines.Length; i++) { string[] line = lines[i].Split('\t'); if (!words.ContainsKey(line[0])) words.Add(line[0], k++); if (!words.ContainsKey(line[1])) words.Add(line[1], k++); val.Add(line[2]); vector[i] = double.Parse(line[2]); } double[][] observ = new double[vector.Length][]; for (int i = 0; i < vector.Length; i++) observ[i] = new double[] { vector[i] }; Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k:2); var clusters = kmeans.Learn(observ); int[] labels = clusters.Decide(observ); ////double min = 0, max = 0; ////Sigma(vector,ref min,ref max); nodes = new List<string>();// { "Id" }; List<string> edges = new List<string>();// { "Source; Target" }; HashSet<string> tmpnodes = new HashSet<string>(); for (int i = 0; i < lines.Length; i++) { if (labels[i] == 2 || labels[i] == 1)//if (vector[i] < min || vector[i] > max) continue; string[] tmp = lines[i].Split('\t'); //newlines.Add(words[tmp[0]].ToString() + "," + words[tmp[1]].ToString());// + "\t" + val[i]); edges.Add(tmp[0] + "\t" + tmp[1] + "\t" + val[i]); //if (!nodes.Contains(tmp[0])) tmpnodes.Add(tmp[0]); //if (!nodes.Contains(tmp[1])) tmpnodes.Add(tmp[1]); } nodes.AddRange(tmpnodes.ToList()); File.WriteAllLines("nodes.csv", nodes); File.WriteAllLines("edges.csv", edges); nodes = new List<string>(); foreach (var item in words) { nodes.Add(item.Key); } File.WriteAllLines("nodes.txt", nodes); inputSize = nodes.Count; adjacency_matrix = new double[inputSize, inputSize]; // init affinity matrix D = new double[inputSize, inputSize]; // init diagonal matrix Y = new double[inputSize, NumofClusters]; D = new double[inputSize, inputSize]; L = new double[inputSize, inputSize]; SortedDictionary<int, List<string>> lst = new SortedDictionary<int, List<string>>(); List<string> all_temp = new List<string>(); string[] file = edges.ToArray(); for (int i = 0; i < file.Length; i++) { string[] row = file[i].Split('\t'); int feat = 0; if (!lst.ContainsKey(feat)) lst.Add(feat, new List<string>()); lst[feat].Add(row[0] + "\t" + row[1] + "\t" + row[2]); } for (int i = 0; i < lst.Count; i++) normalize(lst[i], i.ToString()); List<string> all_results = new List<string>(); for (int i = 0; i < lst.Count; i++) { string[] files = all_res[i].ToArray<string>(); for (int j = 0; j < files.Length; j++) all_results.Add(files[j]); } build_adjmat(all_results); }