예제 #1
0
        public static object KCluster(double[,] Data, int NumClusters)
        {
            int nRows = Data.GetLength(0), nCols = Data.GetLength(1);

            double[][] data = new double[nRows][];
            for (int r = 0; r < nRows; ++r)
            {
                data[r] = new double[nCols];
                for (int c = 0; c < nCols; ++c)
                {
                    data[r][c] = Data[r, c];
                }
            }

            // Unfortunately, you can't set the initial cluster centers manually here, which
            // means that every time this function is called, it will return different clusters.
            aml.KMeans km = new aml.KMeans(NumClusters);
            aml.KMeansClusterCollection kcc = km.Learn(data);

            int nClusters = kcc.Count;

            double[,] ret = new double[nClusters, nCols];

            // TODO - 19Apr20 - needs testing, this used to previously put the mean[c] into ret[x,c]?
            int x = 0;

            foreach (KMeansClusterCollection.KMeansCluster cc in kcc.Clusters.OrderBy(xx => xx.Centroid.Mean()))
            {
                for (int c = 0; c < nCols; ++c)
                {
                    ret[x, c] = cc.Centroid.Mean();
                }

                x++;
            }

            return(ret);
        }
예제 #2
0
        private static Tuple <double[][], double[][]> split(double[][] cluster,
                                                            KMeans kmeans, double threshold)
        {
            kmeans.Randomize(cluster, useSeeding: false);

            int[] idx = kmeans.Compute(cluster, threshold, false);

            List <double[]> a = new List <double[]>();
            List <double[]> b = new List <double[]>();

            for (int i = 0; i < idx.Length; i++)
            {
                if (idx[i] == 0)
                {
                    a.Add(cluster[i]);
                }
                else
                {
                    b.Add(cluster[i]);
                }
            }

            return(Tuple.Create(a.ToArray(), b.ToArray()));
        }
예제 #3
0
 /// <summary>
 ///   Initializes the model with initial values obtained
 ///   through a run of the K-Means clustering algorithm.
 /// </summary>
 ///
 public void Initialize(KMeans kmeans)
 {
     clusters.Initialize(kmeans);
 }
예제 #4
0
        /// <summary>
        ///   Divides the input data into K clusters.
        /// </summary>
        ///
        /// <param name="data">The data where to compute the algorithm.</param>
        /// <param name="weights">The weight associated with each data point.</param>
        ///
        public override int[] Compute(double[][] data, double[] weights)
        {
            // Initial argument checking
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            if (data.Length < K)
            {
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");
            }

            if (weights == null)
            {
                throw new ArgumentNullException("weights");
            }

            if (data.Length != weights.Length)
            {
                throw new ArgumentException("Data weights vector must be the same length as data samples.");
            }

            double weightSum = weights.Sum();

            if (weightSum <= 0)
            {
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");
            }

            int cols = data[0].Length;

            for (int i = 0; i < data.Length; i++)
            {
                if (data[0].Length != cols)
                {
                    throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones.");
                }
            }


            int k = Clusters.Count;

            KMeans kmeans = new KMeans(2)
            {
                Distance           = (IDistance <double[]>)Clusters.Distance,
                ComputeError       = false,
                ComputeCovariances = false,
                UseSeeding         = UseSeeding,
                Tolerance          = Tolerance,
                MaxIterations      = MaxIterations,
            };

            double[][]   centroids   = Clusters.Centroids;
            double[][][] clusters    = new double[k][][];
            double[]     distortions = new double[k];

            // 1. Start with all data points in one cluster
            clusters[0] = data;

            // 2. Repeat steps 3 to 6 (k-1) times to obtain K centroids
            for (int current = 1; current < k; current++)
            {
                // 3. Choose cluster with largest distortion
                int choosen; distortions.Max(current, out choosen);

                // 4. Split cluster into two sub-clusters
                var splits = split(clusters[choosen], kmeans);

                clusters[choosen] = splits.Item1;
                clusters[current] = splits.Item2;

                // 5. Replace chosen centroid and add a new one
                centroids[choosen] = kmeans.Clusters.Centroids[0];
                centroids[current] = kmeans.Clusters.Centroids[1];

                // Recompute distortions for the updated clusters
                distortions[choosen] = kmeans.Clusters[0].Distortion(clusters[choosen]);
                distortions[current] = kmeans.Clusters[1].Distortion(clusters[current]);

                // 6. Increment cluster count (current = current + 1)
            }


            return(Clusters.Nearest(data));
        }
예제 #5
0
 internal KMeansClusterCollection(KMeans owner, IList <KMeansCluster> list)
     : base(list)
 {
     this.owner = owner;
 }
예제 #6
0
 internal KMeansCluster(KMeans owner, int index)
 {
     this.owner = owner;
     this.index = index;
 }
예제 #7
0
        protected override void SolveInstance(IGH_DataAccess DA)
        {
            int n = 0;

            DA.GetData(0, ref n);

            List <List <double> > data = new List <List <double> >();

            for (int i = 2; i < Params.Input.Count; i++)
            {
                List <double> d = new List <double>();
                DA.GetDataList(i, d);
                if (d.Count > 0)
                {
                    data.Add(d);
                }
            }

            // Declare some observations
            double[][] observations = new double[data[0].Count][];

            for (int i = 0; i < data[0].Count; i++)
            {
                List <double> num = new List <double>();
                for (int j = 0; j < data.Count; j++)
                {
                    num.Add(data[j][i]);
                }
                observations[i] = num.ToArray();
            }

            //Get Weights
            List <double> weights = new List <double>();

            DA.GetDataList(1, weights);

            if (weights.Count != data[0].Count)
            {
                weights = Enumerable.Repeat(1.0, data[0].Count).ToList();
            }


            //Seed
            Accord.Math.Random.Generator.Seed = 0;

            // Create a new K-Means algorithm with n clusters
            Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(n);

            KMeansClusterCollection clusters = kmeans.Learn(observations, weights.ToArray());

            int[] labels = clusters.Decide(observations);

            //Message
            base.Message = "Weights " + weights.Count.ToString() + "\r\n" + "Dimensions " + observations.Length.ToString() + " of length " + observations[0].Length.ToString();

            //Output
            DA.SetDataList(0, labels.ToList());

            DataTree <int> dataTree = new DataTree <int>();

            for (int i = 0; i < labels.Length; i++)
            {
                dataTree.Add(i, new GH_Path(labels[i]));
            }

            DA.SetDataTree(1, dataTree);
        }
예제 #8
0
        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

        public Tuple<double[,], Dictionary<string, int>> StartClustering()
        {            
            convert_input();                // 1.  build adjacency matrix.
            diagonalMatrix();               // 2.1 build diganoal matrix.
            buildLmat();                    // 2.2 build L = D - adjacency_matrix.
            calculate_eigendecomposition(); // 3.  find k largest eigenvectors.
            //findEigenvectors();             // 3.  find k largest eigenvectors.
            normalizeVectors();             // 4.  normalize eigenvectors.
            labels = new int[inputSize];

            //////ClusterCollection clusters;
            //////clusters = KMeans.ClusterDataSet(NumofClusters, Y);
            //////int index = -1;
            //////double[] temp = new double[NumofClusters];
            //////for (int i = 0; i < clusters.Count; i++)
            //////{
            //////    //System.Console.Out.Write(clusters.Count);  
            //////    // System.Console.Out.Write(i + ") ( ");
            //////    for (int j = 0; j < clusters[i].Count; j++)
            //////    {
            //////        // System.Console.Out.Write(clusters[i].Count );
            //////        for (int k = 0; k < clusters[i][j].Length; k++) // one point (full row)
            //////        {
            //////            temp[k] = clusters[i][j][k];



            //////        }
            //////        index = findCluster(conArrayD(Y), temp);
            //////        if (index != -1) // cluster and row were matched
            //////        {
            //////            labels[index] = i;
            //////            index = -1; // reset index.
            //////        }
            //////    }
            //////}            

            observations = new double[Y.GetLength(0)][];
            for (int i = 0; i < Y.GetLength(0); i++)
            {
                observations[i] = new double[Y.GetLength(1)];
                for (int j = 0; j < Y.GetLength(1); j++)
                    observations[i][j] = Y[i, j];
            }

            ////////////////observations = new double[X.GetLength(0)][];
            ////////////////for (int i = 0; i < X.GetLength(0); i++)
            ////////////////{
            ////////////////    observations[i] = new double[X.GetLength(1)];
            ////////////////    for (int j = 0; j < X.GetLength(1); j++)
            ////////////////        observations[i][j] = X[i, j];
            ////////////////}

            //K-Means
            Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k: NumofClusters);
            var clusters = kmeans.Learn(observations);
            labels = clusters.Decide(observations);

            words_cluster = new Dictionary<string, int>();
            for (int i = 0; i < labels.Length; i++)
                words_cluster.Add(nodes[i], labels[i]);
            var tuple = new Tuple<double[,], Dictionary<string, int>>(adjacency_matrix, words_cluster);
            return tuple;
        }// end StartClustering
예제 #9
0
        }// end normalizeVectors()        
                
        /*
         *  Extract one vector from BoW.
         *  input: BoW matrix, number of Vector to extract.
         *  output: Extracted vector.
         */        
        
        private void convert_input()
        {
            string[] lines = input_file.ToArray<string>();
            
            Dictionary<string, int> words = new Dictionary<string, int>();
            
            List<string> val = new List<string>();
            double[] vector = new double[lines.Length];

            int k = 1;
            for (int i = 0; i < lines.Length; i++)
            {
                string[] line = lines[i].Split('\t');
                if (!words.ContainsKey(line[0]))
                    words.Add(line[0], k++);
                if (!words.ContainsKey(line[1]))
                    words.Add(line[1], k++);
                val.Add(line[2]);
                vector[i] = double.Parse(line[2]);
            }

            double[][] observ = new double[vector.Length][];
            for (int i = 0; i < vector.Length; i++)
                observ[i] = new double[] { vector[i] };

            Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(k:2);
            var clusters = kmeans.Learn(observ);
            int[] labels = clusters.Decide(observ);

            ////double min = 0, max = 0;
            ////Sigma(vector,ref min,ref max);
            nodes = new List<string>();// { "Id" };
            List<string> edges = new List<string>();// { "Source; Target" };
            HashSet<string> tmpnodes = new HashSet<string>();
            for (int i = 0; i < lines.Length; i++)
            {
                if (labels[i] == 2 || labels[i] == 1)//if (vector[i] < min || vector[i] > max)
                    continue;
                string[] tmp = lines[i].Split('\t');
                //newlines.Add(words[tmp[0]].ToString() + "," + words[tmp[1]].ToString());// + "\t" + val[i]);
                edges.Add(tmp[0] + "\t" + tmp[1] + "\t" + val[i]);
                                                 //if (!nodes.Contains(tmp[0]))
                tmpnodes.Add(tmp[0]);
                //if (!nodes.Contains(tmp[1]))
                tmpnodes.Add(tmp[1]);
            }

            nodes.AddRange(tmpnodes.ToList());

            File.WriteAllLines("nodes.csv", nodes);
            File.WriteAllLines("edges.csv", edges);

            nodes = new List<string>();
            foreach (var item in words)
            {
                nodes.Add(item.Key);
            }

            File.WriteAllLines("nodes.txt", nodes);
            inputSize = nodes.Count;
            adjacency_matrix = new double[inputSize, inputSize]; // init affinity matrix
            D = new double[inputSize, inputSize]; // init diagonal matrix
            Y = new double[inputSize, NumofClusters]; D = new double[inputSize, inputSize];
            L = new double[inputSize, inputSize];

            SortedDictionary<int, List<string>> lst = new SortedDictionary<int, List<string>>();
            List<string> all_temp = new List<string>();
            string[] file = edges.ToArray();
            for (int i = 0; i < file.Length; i++)
            {
                string[] row = file[i].Split('\t');
                int feat = 0;
                if (!lst.ContainsKey(feat))
                    lst.Add(feat, new List<string>());
                lst[feat].Add(row[0] + "\t" + row[1] + "\t" + row[2]);
            }

            for (int i = 0; i < lst.Count; i++)
                normalize(lst[i], i.ToString());

            List<string> all_results = new List<string>();
            for (int i = 0; i < lst.Count; i++)
            {
                string[] files = all_res[i].ToArray<string>();                
                for (int j = 0; j < files.Length; j++)
                    all_results.Add(files[j]);
            }
            build_adjmat(all_results);

        }
예제 #10
0
        /// <summary>
        /// Learns a model that can map the given inputs to the desired outputs.
        /// </summary>
        /// <param name="x">The model inputs.</param>
        /// <param name="weights">The weight of importance for each input sample.</param>
        /// <returns>A model that has learned how to produce suitable outputs
        /// given the input data <paramref name="x" />.</returns>
        public override KMeansClusterCollection Learn(double[][] x, double[] weights = null)
        {
            // Initial argument checking
            if (x == null)
            {
                throw new ArgumentNullException("x");
            }

            if (x.Length < K)
            {
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");
            }

            if (weights == null)
            {
                weights = Vector.Ones(x.Length);
            }

            if (x.Length != weights.Length)
            {
                throw new ArgumentException("Data weights vector must be the same length as data samples.");
            }

            double weightSum = weights.Sum();

            if (weightSum <= 0)
            {
                throw new ArgumentException("Not enough points. There should be more points than the number K of clusters.");
            }

            int cols = x.Columns();

            for (int i = 0; i < x.Length; i++)
            {
                if (x[i].Length != cols)
                {
                    throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones.");
                }
            }

            int k = Clusters.Count;

            KMeans kmeans = new KMeans(2)
            {
                Distance           = (IDistance <double[]>)Clusters.Distance,
                ComputeError       = false,
                ComputeCovariances = false,
                UseSeeding         = UseSeeding,
                Tolerance          = Tolerance,
                MaxIterations      = MaxIterations,
            };

            var centroids   = Clusters.Centroids;
            var clusters    = new double[k][][];
            var distortions = new double[k];

            // 1. Start with all data points in one cluster
            clusters[0] = x;

            // 2. Repeat steps 3 to 6 (k-1) times to obtain K centroids
            for (int current = 1; current < k; current++)
            {
                // 3. Choose cluster with largest distortion
                int choosen; distortions.Max(current, out choosen);

                // 4. Split cluster into two sub-clusters
                var splits = split(clusters[choosen], kmeans);

                clusters[choosen] = splits.Item1;
                clusters[current] = splits.Item2;

                // 5. Replace chosen centroid and add a new one
                centroids[choosen] = kmeans.Clusters.Centroids[0];
                centroids[current] = kmeans.Clusters.Centroids[1];

                // Recompute distortions for the updated clusters
                distortions[choosen] = kmeans.Clusters[0].Distortion(clusters[choosen]);
                distortions[current] = kmeans.Clusters[1].Distortion(clusters[current]);

                // 6. Increment cluster count (current = current + 1)
            }

            Clusters.NumberOfInputs = cols;

            Accord.Diagnostics.Debug.Assert(Clusters.NumberOfClasses == K);
            Accord.Diagnostics.Debug.Assert(Clusters.NumberOfOutputs == K);
            Accord.Diagnostics.Debug.Assert(Clusters.NumberOfInputs == x[0].Length);

            if (ComputeProportions)
            {
                int[] y      = Clusters.Decide(x);
                int[] counts = y.Histogram();
                counts.Divide(y.Length, result: Clusters.Proportions);

                ComputeInformation(x, y);
            }
            else
            {
                ComputeInformation(x);
            }

            return(Clusters);
        }