public void Learn(double[][] observations, int amountOfClusters)
        {
            var kmeans = new KMeans(amountOfClusters);

            _clusters = kmeans.Learn(observations);
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 50);

            // Read in the Online Retail feature dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.6\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "features.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var ecommerceDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}", ecommerceDF.RowCount, ecommerceDF.ColumnCount);

            string[] features = new string[] { "NetRevenuePercentile", "AvgUnitPricePercentile", "AvgQuantityPercentile" };
            Console.WriteLine("* Features: {0}\n\n", String.Join(", ", features));

            var normalizedDf = Frame.CreateEmpty <int, string>();
            var average      = ecommerceDF.Columns[features].Sum() / ecommerceDF.RowCount;

            foreach (string feature in features)
            {
                normalizedDf.AddColumn(feature, (ecommerceDF[feature] - average[feature]) / ecommerceDF[feature].StdDev());
            }

            double[][] sampleSet = BuildJaggedArray(
                normalizedDf.Columns[features].ToArray2D <double>(),
                normalizedDf.RowCount,
                features.Length
                );

            // Create a new K-Means algorithm with n clusters
            Accord.Math.Random.Generator.Seed = 0;

            int[]         numClusters      = new int[] { 4, 5, 6, 7, 8 };
            List <string> clusterNames     = new List <string>();
            List <double> silhouetteScores = new List <double>();

            for (int i = 0; i < numClusters.Length; i++)
            {
                KMeans kmeans = new KMeans(numClusters[i]);
                KMeansClusterCollection clusters = kmeans.Learn(sampleSet);
                int[] labels = clusters.Decide(sampleSet);

                string colname = String.Format("Cluster-{0}", numClusters[i]);
                clusterNames.Add(colname);

                normalizedDf.AddColumn(colname, labels);
                ecommerceDF.AddColumn(colname, labels);

                Console.WriteLine("\n\n\n#####################    {0}    ###########################", colname);

                Console.WriteLine("\n\n* Centroids for {0} clusters:", numClusters[i]);

                PrintCentroidsInfo(clusters.Centroids, features);
                Console.WriteLine("\n");

                VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgUnitPricePercentile");
                VisualizeClusters(normalizedDf, colname, "AvgUnitPricePercentile", "AvgQuantityPercentile");
                VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgQuantityPercentile");

                for (int j = 0; j < numClusters[i]; j++)
                {
                    GetTopNItemsPerCluster(ecommerceDF, j, colname);
                }

                double silhouetteScore = CalculateSilhouetteScore(normalizedDf, features, numClusters[i], colname);
                Console.WriteLine("\n\n* Silhouette Score: {0}", silhouetteScore.ToString("0.0000"));

                silhouetteScores.Add(silhouetteScore);
                Console.WriteLine("\n\n##############################################################\n\n\n");
            }

            for (int i = 0; i < clusterNames.Count; i++)
            {
                Console.WriteLine("- Silhouette Score for {0}: {1}", clusterNames[i], silhouetteScores[i].ToString("0.0000"));
            }

            Console.WriteLine("\n\n\nDONE!!");
            Console.ReadKey();
        }