public static object KCluster(double[,] Data, int NumClusters) { int nRows = Data.GetLength(0), nCols = Data.GetLength(1); double[][] data = new double[nRows][]; for (int r = 0; r < nRows; ++r) { data[r] = new double[nCols]; for (int c = 0; c < nCols; ++c) { data[r][c] = Data[r, c]; } } // Unfortunately, you can't set the initial cluster centers manually here, which // means that every time this function is called, it will return different clusters. aml.KMeans km = new aml.KMeans(NumClusters); aml.KMeansClusterCollection kcc = km.Learn(data); int nClusters = kcc.Count; double[,] ret = new double[nClusters, nCols]; // TODO - 19Apr20 - needs testing, this used to previously put the mean[c] into ret[x,c]? int x = 0; foreach (KMeansClusterCollection.KMeansCluster cc in kcc.Clusters.OrderBy(xx => xx.Centroid.Mean())) { for (int c = 0; c < nCols; ++c) { ret[x, c] = cc.Centroid.Mean(); } x++; } return(ret); }
private static Tuple <double[][], double[][]> split(double[][] cluster, KMeans kmeans, double threshold) { kmeans.Randomize(cluster, useSeeding: false); int[] idx = kmeans.Compute(cluster, threshold, false); List <double[]> a = new List <double[]>(); List <double[]> b = new List <double[]>(); for (int i = 0; i < idx.Length; i++) { if (idx[i] == 0) { a.Add(cluster[i]); } else { b.Add(cluster[i]); } } return(Tuple.Create(a.ToArray(), b.ToArray())); }
/// <summary> /// Initializes the model with initial values obtained /// through a run of the K-Means clustering algorithm. /// </summary> /// public void Initialize(KMeans kmeans) { clusters.Initialize(kmeans); }
/// <summary> /// Divides the input data into K clusters. /// </summary> /// /// <param name="data">The data where to compute the algorithm.</param> /// <param name="weights">The weight associated with each data point.</param> /// public override int[] Compute(double[][] data, double[] weights) { // Initial argument checking if (data == null) { throw new ArgumentNullException("data"); } if (data.Length < K) { throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); } if (weights == null) { throw new ArgumentNullException("weights"); } if (data.Length != weights.Length) { throw new ArgumentException("Data weights vector must be the same length as data samples."); } double weightSum = weights.Sum(); if (weightSum <= 0) { throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); } int cols = data[0].Length; for (int i = 0; i < data.Length; i++) { if (data[0].Length != cols) { throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones."); } } int k = Clusters.Count; KMeans kmeans = new KMeans(2) { Distance = (IDistance <double[]>)Clusters.Distance, ComputeError = false, ComputeCovariances = false, UseSeeding = UseSeeding, Tolerance = Tolerance, MaxIterations = MaxIterations, }; double[][] centroids = Clusters.Centroids; double[][][] clusters = new double[k][][]; double[] distortions = new double[k]; // 1. Start with all data points in one cluster clusters[0] = data; // 2. Repeat steps 3 to 6 (k-1) times to obtain K centroids for (int current = 1; current < k; current++) { // 3. Choose cluster with largest distortion int choosen; distortions.Max(current, out choosen); // 4. Split cluster into two sub-clusters var splits = split(clusters[choosen], kmeans); clusters[choosen] = splits.Item1; clusters[current] = splits.Item2; // 5. Replace chosen centroid and add a new one centroids[choosen] = kmeans.Clusters.Centroids[0]; centroids[current] = kmeans.Clusters.Centroids[1]; // Recompute distortions for the updated clusters distortions[choosen] = kmeans.Clusters[0].Distortion(clusters[choosen]); distortions[current] = kmeans.Clusters[1].Distortion(clusters[current]); // 6. Increment cluster count (current = current + 1) } return(Clusters.Nearest(data)); }
internal KMeansClusterCollection(KMeans owner, IList <KMeansCluster> list) : base(list) { this.owner = owner; }
internal KMeansCluster(KMeans owner, int index) { this.owner = owner; this.index = index; }
protected override void SolveInstance(IGH_DataAccess DA) { int n = 0; DA.GetData(0, ref n); List <List <double> > data = new List <List <double> >(); for (int i = 2; i < Params.Input.Count; i++) { List <double> d = new List <double>(); DA.GetDataList(i, d); if (d.Count > 0) { data.Add(d); } } // Declare some observations double[][] observations = new double[data[0].Count][]; for (int i = 0; i < data[0].Count; i++) { List <double> num = new List <double>(); for (int j = 0; j < data.Count; j++) { num.Add(data[j][i]); } observations[i] = num.ToArray(); } //Get Weights List <double> weights = new List <double>(); DA.GetDataList(1, weights); if (weights.Count != data[0].Count) { weights = Enumerable.Repeat(1.0, data[0].Count).ToList(); } //Seed Accord.Math.Random.Generator.Seed = 0; // Create a new K-Means algorithm with n clusters Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(n); KMeansClusterCollection clusters = kmeans.Learn(observations, weights.ToArray()); int[] labels = clusters.Decide(observations); //Message base.Message = "Weights " + weights.Count.ToString() + "\r\n" + "Dimensions " + observations.Length.ToString() + " of length " + observations[0].Length.ToString(); //Output DA.SetDataList(0, labels.ToList()); DataTree <int> dataTree = new DataTree <int>(); for (int i = 0; i < labels.Length; i++) { dataTree.Add(i, new GH_Path(labels[i])); } DA.SetDataTree(1, dataTree); }
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Tuple<double[,], Dictionary<string, int>> StartClustering() { convert_input(); // 1. build adjacency matrix. diagonalMatrix(); // 2.1 build diganoal matrix. buildLmat(); // 2.2 build L = D - adjacency_matrix. calculate_eigendecomposition(); // 3. find k largest eigenvectors. //findEigenvectors(); // 3. find k largest eigenvectors. normalizeVectors(); // 4. normalize eigenvectors. labels = new int[inputSize]; //////ClusterCollection clusters; //////clusters = KMeans.ClusterDataSet(NumofClusters, Y); //////int index = -1; //////double[] temp = new double[NumofClusters]; //////for (int i = 0; i < clusters.Count; i++) //////{ ////// //System.Console.Out.Write(clusters.Count); ////// // System.Console.Out.Write(i + ") ( "); ////// for (int j = 0; j < clusters[i].Count; j++) ////// { ////// // System.Console.Out.Write(clusters[i].Count ); ////// for (int k = 0; k < clusters[i][j].Length; k++) // one point (full row) ////// { ////// temp[k] = clusters[i][j][k]; ////// } ////// index = findCluster(conArrayD(Y), temp); ////// if (index != -1) // cluster and row were matched ////// { ////// labels[index] = i; ////// index = -1; // reset index. ////// } ////// } //////} observations = new double[Y.GetLength(0)][]; for (int i = 0; i < Y.GetLength(0); i++) { observations[i] = new double[Y.GetLength(1)]; for (int j = 0; j < Y.GetLength(1); j++) observations[i][j] = Y[i, j]; } ////////////////observations = new double[X.GetLength(0)][]; ////////////////for (int i = 0; i < X.GetLength(0); i++) ////////////////{ //////////////// observations[i] = new double[X.GetLength(1)]; //////////////// for (int j = 0; j < X.GetLength(1); j++) //////////////// observations[i][j] = X[i, j]; ////////////////} //K-Means Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k: NumofClusters); var clusters = kmeans.Learn(observations); labels = clusters.Decide(observations); words_cluster = new Dictionary<string, int>(); for (int i = 0; i < labels.Length; i++) words_cluster.Add(nodes[i], labels[i]); var tuple = new Tuple<double[,], Dictionary<string, int>>(adjacency_matrix, words_cluster); return tuple; }// end StartClustering
}// end normalizeVectors() /* * Extract one vector from BoW. * input: BoW matrix, number of Vector to extract. * output: Extracted vector. */ private void convert_input() { string[] lines = input_file.ToArray<string>(); Dictionary<string, int> words = new Dictionary<string, int>(); List<string> val = new List<string>(); double[] vector = new double[lines.Length]; int k = 1; for (int i = 0; i < lines.Length; i++) { string[] line = lines[i].Split('\t'); if (!words.ContainsKey(line[0])) words.Add(line[0], k++); if (!words.ContainsKey(line[1])) words.Add(line[1], k++); val.Add(line[2]); vector[i] = double.Parse(line[2]); } double[][] observ = new double[vector.Length][]; for (int i = 0; i < vector.Length; i++) observ[i] = new double[] { vector[i] }; Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k:2); var clusters = kmeans.Learn(observ); int[] labels = clusters.Decide(observ); ////double min = 0, max = 0; ////Sigma(vector,ref min,ref max); nodes = new List<string>();// { "Id" }; List<string> edges = new List<string>();// { "Source; Target" }; HashSet<string> tmpnodes = new HashSet<string>(); for (int i = 0; i < lines.Length; i++) { if (labels[i] == 2 || labels[i] == 1)//if (vector[i] < min || vector[i] > max) continue; string[] tmp = lines[i].Split('\t'); //newlines.Add(words[tmp[0]].ToString() + "," + words[tmp[1]].ToString());// + "\t" + val[i]); edges.Add(tmp[0] + "\t" + tmp[1] + "\t" + val[i]); //if (!nodes.Contains(tmp[0])) tmpnodes.Add(tmp[0]); //if (!nodes.Contains(tmp[1])) tmpnodes.Add(tmp[1]); } nodes.AddRange(tmpnodes.ToList()); File.WriteAllLines("nodes.csv", nodes); File.WriteAllLines("edges.csv", edges); nodes = new List<string>(); foreach (var item in words) { nodes.Add(item.Key); } File.WriteAllLines("nodes.txt", nodes); inputSize = nodes.Count; adjacency_matrix = new double[inputSize, inputSize]; // init affinity matrix D = new double[inputSize, inputSize]; // init diagonal matrix Y = new double[inputSize, NumofClusters]; D = new double[inputSize, inputSize]; L = new double[inputSize, inputSize]; SortedDictionary<int, List<string>> lst = new SortedDictionary<int, List<string>>(); List<string> all_temp = new List<string>(); string[] file = edges.ToArray(); for (int i = 0; i < file.Length; i++) { string[] row = file[i].Split('\t'); int feat = 0; if (!lst.ContainsKey(feat)) lst.Add(feat, new List<string>()); lst[feat].Add(row[0] + "\t" + row[1] + "\t" + row[2]); } for (int i = 0; i < lst.Count; i++) normalize(lst[i], i.ToString()); List<string> all_results = new List<string>(); for (int i = 0; i < lst.Count; i++) { string[] files = all_res[i].ToArray<string>(); for (int j = 0; j < files.Length; j++) all_results.Add(files[j]); } build_adjmat(all_results); }
/// <summary> /// Learns a model that can map the given inputs to the desired outputs. /// </summary> /// <param name="x">The model inputs.</param> /// <param name="weights">The weight of importance for each input sample.</param> /// <returns>A model that has learned how to produce suitable outputs /// given the input data <paramref name="x" />.</returns> public override KMeansClusterCollection Learn(double[][] x, double[] weights = null) { // Initial argument checking if (x == null) { throw new ArgumentNullException("x"); } if (x.Length < K) { throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); } if (weights == null) { weights = Vector.Ones(x.Length); } if (x.Length != weights.Length) { throw new ArgumentException("Data weights vector must be the same length as data samples."); } double weightSum = weights.Sum(); if (weightSum <= 0) { throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); } int cols = x.Columns(); for (int i = 0; i < x.Length; i++) { if (x[i].Length != cols) { throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones."); } } int k = Clusters.Count; KMeans kmeans = new KMeans(2) { Distance = (IDistance <double[]>)Clusters.Distance, ComputeError = false, ComputeCovariances = false, UseSeeding = UseSeeding, Tolerance = Tolerance, MaxIterations = MaxIterations, }; var centroids = Clusters.Centroids; var clusters = new double[k][][]; var distortions = new double[k]; // 1. Start with all data points in one cluster clusters[0] = x; // 2. Repeat steps 3 to 6 (k-1) times to obtain K centroids for (int current = 1; current < k; current++) { // 3. Choose cluster with largest distortion int choosen; distortions.Max(current, out choosen); // 4. Split cluster into two sub-clusters var splits = split(clusters[choosen], kmeans); clusters[choosen] = splits.Item1; clusters[current] = splits.Item2; // 5. Replace chosen centroid and add a new one centroids[choosen] = kmeans.Clusters.Centroids[0]; centroids[current] = kmeans.Clusters.Centroids[1]; // Recompute distortions for the updated clusters distortions[choosen] = kmeans.Clusters[0].Distortion(clusters[choosen]); distortions[current] = kmeans.Clusters[1].Distortion(clusters[current]); // 6. Increment cluster count (current = current + 1) } Clusters.NumberOfInputs = cols; Accord.Diagnostics.Debug.Assert(Clusters.NumberOfClasses == K); Accord.Diagnostics.Debug.Assert(Clusters.NumberOfOutputs == K); Accord.Diagnostics.Debug.Assert(Clusters.NumberOfInputs == x[0].Length); if (ComputeProportions) { int[] y = Clusters.Decide(x); int[] counts = y.Histogram(); counts.Divide(y.Length, result: Clusters.Proportions); ComputeInformation(x, y); } else { ComputeInformation(x); } return(Clusters); }