public void Learn(double[][] observations, int amountOfClusters) { var kmeans = new KMeans(amountOfClusters); _clusters = kmeans.Learn(observations); }
static void Main(string[] args) { Console.SetWindowSize(100, 50); // Read in the Online Retail feature dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.6\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "features.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var ecommerceDF = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); Console.WriteLine("* Shape: {0}, {1}", ecommerceDF.RowCount, ecommerceDF.ColumnCount); string[] features = new string[] { "NetRevenuePercentile", "AvgUnitPricePercentile", "AvgQuantityPercentile" }; Console.WriteLine("* Features: {0}\n\n", String.Join(", ", features)); var normalizedDf = Frame.CreateEmpty <int, string>(); var average = ecommerceDF.Columns[features].Sum() / ecommerceDF.RowCount; foreach (string feature in features) { normalizedDf.AddColumn(feature, (ecommerceDF[feature] - average[feature]) / ecommerceDF[feature].StdDev()); } double[][] sampleSet = BuildJaggedArray( normalizedDf.Columns[features].ToArray2D <double>(), normalizedDf.RowCount, features.Length ); // Create a new K-Means algorithm with n clusters Accord.Math.Random.Generator.Seed = 0; int[] numClusters = new int[] { 4, 5, 6, 7, 8 }; List <string> clusterNames = new List <string>(); List <double> silhouetteScores = new List <double>(); for (int i = 0; i < numClusters.Length; i++) { KMeans kmeans = new KMeans(numClusters[i]); KMeansClusterCollection clusters = kmeans.Learn(sampleSet); int[] labels = clusters.Decide(sampleSet); string colname = String.Format("Cluster-{0}", numClusters[i]); clusterNames.Add(colname); normalizedDf.AddColumn(colname, labels); ecommerceDF.AddColumn(colname, labels); Console.WriteLine("\n\n\n##################### {0} ###########################", colname); Console.WriteLine("\n\n* Centroids for {0} clusters:", numClusters[i]); PrintCentroidsInfo(clusters.Centroids, features); Console.WriteLine("\n"); VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgUnitPricePercentile"); VisualizeClusters(normalizedDf, colname, "AvgUnitPricePercentile", "AvgQuantityPercentile"); VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgQuantityPercentile"); for (int j = 0; j < numClusters[i]; j++) { GetTopNItemsPerCluster(ecommerceDF, j, colname); } double silhouetteScore = CalculateSilhouetteScore(normalizedDf, features, numClusters[i], colname); Console.WriteLine("\n\n* Silhouette Score: {0}", silhouetteScore.ToString("0.0000")); silhouetteScores.Add(silhouetteScore); Console.WriteLine("\n\n##############################################################\n\n\n"); } for (int i = 0; i < clusterNames.Count; i++) { Console.WriteLine("- Silhouette Score for {0}: {1}", clusterNames[i], silhouetteScores[i].ToString("0.0000")); } Console.WriteLine("\n\n\nDONE!!"); Console.ReadKey(); }