static void runKMeans(DistanceObj[] groups) { int numGroups = groups.Length; // Declaring and intializing array for K-Means double[][] observations = new double[numGroups][]; for (int i = 0; i < observations.Length; i++) { observations[i] = new double[2]; observations[i][0] = groups[i].coords[0]; observations[i][1] = groups[i].coords[1]; } KMeans km = new KMeans(7); KMeansClusterCollection clusters = km.Learn(observations); int[] labels = clusters.Decide(observations); for (int i = 0; i < labels.Length; i++) { Console.WriteLine(groups[i].address + ": " + labels[i]); } }
public static void AddClusterInfo(ref SortedDictionary <double, RunOutput> dict) { if (dict.Count <= 3) { return; } Accord.Math.Random.Generator.Seed = 0; double[][] metrics = new double[dict.Count][]; int i = 0; foreach (RunOutput t in dict.Values) { metrics[i++] = t.metricDepths.ToArray(); } // Create a new K-Means algorithm KMeans kmeans = new KMeans(k: dict.Count / 3); // Compute and retrieve the data centroids KMeansClusterCollection clusters = kmeans.Learn(metrics); // Use the centroids to parition all the data int[] labels = clusters.Decide(metrics); int j = 0; foreach (RunOutput v in dict.Values) { v.cluster = labels[j++]; } }
public void addPoint(int x, int y) { Double[][] d = { new Double[] { x, y } }; int[] cl = clusters.Decide(d); Clusters[x][y] = cl[0] + 1; Console.WriteLine("Новая точка кластера #" + cl[0] + 1); }
public void Train(List <Person> trainingPeople, int skillSetSize) { double[][] inputs = _dataPointService.GenerateDataPointsFromPeople(trainingPeople, skillSetSize); KMeans kMeans = new KMeans(2); _clustersCollection = kMeans.Learn(inputs); trainingPredictions = _clustersCollection.Decide(inputs); }
private int[] clusterKMeans(double[][] observations) { Accord.Math.Random.Generator.Seed = 0; KMeans kmeans = new KMeans(9); KMeansClusterCollection clusters = kmeans.Learn(observations); int[] labels = clusters.Decide(observations); return(labels); }
public bool isImageEmpty(Bitmap src) { bool ret = false; Bitmap g = Grayscale.CommonAlgorithms.BT709.Apply(src); ImageStatistics stat = new ImageStatistics(g); double[][] ds = { new double[] { stat.Gray.Mean, stat.Gray.Median, stat.Gray.StdDev } }; Program.logIt(string.Format("{0},{1},{2}", ds[0][0], ds[0][1], ds[0][2])); int[] res = clusters.Decide(ds); ret = !output[res[0]]; return(ret); }
private int[] CreateKClusters(int k, double[][] locations) { Accord.Math.Random.Generator.Seed = 0; // Create a new K-Means algorithm with 3 clusters KMeans kmeans = new KMeans(k); KMeansClusterCollection clusters = kmeans.Learn(locations); int[] labels = clusters.Decide(locations); return(labels); }
// Called by LandMap.GetZones (), returns number of subregions public int ClusterLocationsAccordKMeans(MapPoint[,] points, TerrainVerticesDatabase vertDatabase) { // K-means cluster algorithm to separate locations in the regions int regionId = 0; for (int isleId = 0; isleId < regions.Count; isleId++) { MapRegion region = regions[isleId]; double[][] tileLocations = new double[region.turf.Count][]; for (int i = 0; i < tileLocations.Length; i++) { tileLocations[i] = new double[3]; TerrainVertData vertData = vertDatabase.GetVertDataFromRegionTile(region.turf[i], isleId); //LoggerTool.Post ("Requesting " + region.turf[i].ToString ()); if (vertData != null) { tileLocations[i][0] = region.turf[i].x; tileLocations[i][1] = vertData.inlandPosition; tileLocations[i][2] = region.turf[i].y; } else { LoggerTool.Post("Null from VertDB for " + region.turf[i].ToString()); tileLocations[i][0] = 0; tileLocations[i][1] = 0; tileLocations[i][2] = 0; } } int k = InitializeNumOfK(tileLocations.Length); Debug.Log(k + " centroid(s)"); KMeans kmeans = new KMeans(k); KMeansClusterCollection clusters = kmeans.Learn(tileLocations); int[] labels = clusters.Decide(tileLocations); Debug.Log("Number of labels (clusters) = " + labels.Length); for (int i = 0; i < labels.Length; i++) { points[(int)tileLocations[i][0], (int)tileLocations[i][2]].areaValue = regionId + labels[i]; } regionId += k; } return(regionId); }
public void Engine(double[][] observations, int k, ref int[] labels) { Accord.Math.Random.Generator.Seed = 0; KMeans kmeans = new KMeans(k); kmeans.UseSeeding = Seeding.Uniform; kmeans.MaxIterations = 0; // no limit KMeansClusterCollection clusters = kmeans.Learn(observations); double[][] centroids = kmeans.Centroids; labels = clusters.Decide(observations); double err = kmeans.Error; }
public void binary_split_new_method() { #region doc_sample1 // Use a fixed seed for reproducibility Accord.Math.Random.Generator.Seed = 0; // Declare some data to be clustered double[][] input = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; // Create a new binary split with 3 clusters BinarySplit binarySplit = new BinarySplit(3); // Learn a data partitioning using the Binary Split algorithm KMeansClusterCollection clustering = binarySplit.Learn(input); // Predict group labels for each point int[] output = clustering.Decide(input); // As a result, the first two observations should belong to the // same cluster (thus having the same label). The same should // happen to the next four observations and to the last three. #endregion Assert.AreEqual(output[0], output[1]); Assert.AreEqual(output[2], output[3]); Assert.AreEqual(output[2], output[4]); Assert.AreEqual(output[2], output[5]); Assert.AreEqual(output[6], output[7]); Assert.AreEqual(output[6], output[8]); Assert.AreNotEqual(output[0], output[2]); Assert.AreNotEqual(output[2], output[6]); Assert.AreNotEqual(output[0], output[6]); int[] labels2 = binarySplit.Clusters.Nearest(input); Assert.IsTrue(output.IsEqual(labels2)); }
/// <summary> /// Initializes the Gaussian Mixture Models using K-Means /// parameters as an initial parameter guess. /// </summary> /// private void btnInitialize_Click(object sender, EventArgs e) { // Creates and computes a new // K-Means clustering algorithm: kmeans = new KMeans(k); KMeansClusterCollection clustering = kmeans.Learn(observations); // Classify all instances in mixture data int[] classifications = clustering.Decide(observations); // Draw the classifications updateGraph(classifications); }
public void AccordKMeans() { Stopwatch stopwatch = Stopwatch.StartNew(); stopwatch.Start(); KMeans kmeans = new KMeans(50); double[][] v = Input.ToRowArrays(); KMeansClusterCollection clusters = kmeans.Learn(v); AssignedClusters = clusters.Decide(v).ToList(); BestClustering = AssignedClusters; Centroids = clusters.Centroids.Select(x => CreateVector.Dense(x)).ToList(); stopwatch.Stop(); timer = stopwatch.ElapsedTicks; }
private static double[][][] ClusterPoints(double[][] dataset, int k, KMeansClusterCollection clusters) { // points in each cluster double[][][] clusterData = new double[k][][]; for (int i = 0; i < dataset.Length; i++) { int decision = clusters.Decide(dataset[i]); if (clusterData[decision] == null) { clusterData[decision] = new double[][] { }; } Array.Resize(ref clusterData[decision], clusterData[decision].Length + 1); clusterData[decision][clusterData[decision].Length - 1] = dataset[i]; } return(clusterData); }
private static void splitCluster(int index) { //kmeans with k = 2 for clusters[index].members //if kmeansArr == 1, add group to temp list for new cluster, delete from clusters[index] //create new Cluster with the temp list, append to end of clusters (the for loop in findLargeClusters will adapt to it and check it later for >15 int numGroups = clusters[index].members.Count; double[][] observations = new double[numGroups][]; for (int i = 0; i < observations.Length; i++) { observations[i] = new double[2]; observations[i][0] = clusters[index].members[i].destination.coords[0]; observations[i][1] = clusters[index].members[i].destination.coords[1]; } KMeans km = new KMeans(2); KMeansClusterCollection clust = km.Learn(observations); int[] clustArr = clust.Decide(observations); //if a group is in the second of the two clusters, we will put it in a new List and delete it from the old one List <Group> forNewCluster = new List <Group>(); for (int i = clustArr.Length - 1; i >= 0; i--) { if (clustArr[i] == 1) { forNewCluster.Add(clusters[index].members[i]); clusters[index].members.RemoveAt(i); } } Cluster newCluster = new Cluster(forNewCluster); //update the cluster attributes in each group for the new cluster clusters.Add(newCluster); foreach (Group g in clusters[clusters.Count() - 1].members) { g.cluster = clusters.Count() - 1; } }
void train() { clusters = kmeans.Learn(inputList.ToArray()); // make a empty picture { Bitmap mask = new Bitmap(300, 300); using (Graphics g = Graphics.FromImage(mask)) { g.FillRectangle(new SolidBrush(Color.Black), new Rectangle(0, 0, mask.Width, mask.Height)); } //Bitmap g = Grayscale.CommonAlgorithms.BT709.Apply(mask); ImageStatistics stat = new ImageStatistics(Grayscale.CommonAlgorithms.BT709.Apply(mask)); double[][] ds = { new double[] { stat.Gray.Mean, stat.Gray.Median, stat.Gray.StdDev } }; //Program.logIt(string.Format("{0},{1},{2}", ds[0][0], ds[0][1], ds[0][2])); int[] res = clusters.Decide(ds); output[0] = true; output[1] = true; output[res[0]] = false; } }
static void Main(string[] args) { // sample input var sampleSet = new double[][] { new double[] { 1, 9 }, new double[] { 2, 8 }, new double[] { 3, 7 }, new double[] { 4, 6 }, new double[] { 5, 5 } }; KMeans kmeans = new KMeans(2); KMeansClusterCollection clusters = kmeans.Learn(sampleSet); Console.WriteLine("\n\n* Clusters: {0}", String.Join(",", clusters.Decide(sampleSet))); Console.WriteLine("\n\n\n\nDONE!!"); Console.ReadKey(); }
//sets the values in kmeansArr[] public static void runKMeans(ref Group[] gs) { int numGroups = gs.Count(); // Declaring and intializing array for K-Means double[][] observations = new double[numGroups][]; for (int i = 0; i < observations.Length; i++) { observations[i] = new double[2]; observations[i][0] = gs[i].destination.coords[0]; observations[i][1] = gs[i].destination.coords[1]; } int numClusters = (gs.Count() / 6) + 1; //THIS WHAT WE WANT? KMeans km = new KMeans(numClusters); KMeansClusterCollection clust = km.Learn(observations); kmeansArr = clust.Decide(observations); }
// Called by LandMap.GetZones (), returns number of subregions public int ClusterLocationsAccordKMeans(MapPoint[,] points) { // K-means cluster algorithm to separate locations in the regions int regionId = 0; for (int isleId = 0; isleId < regions.Count; isleId++) { MapRegion region = regions[isleId]; double[][] tileLocations = new double[region.turf.Count][]; for (int i = 0; i < tileLocations.Length; i++) { tileLocations[i] = new double[2]; tileLocations[i][0] = region.turf[i].x; tileLocations[i][1] = region.turf[i].y; } int k = InitializeNumOfK(region.turf.Count); Debug.Log(k + " centroid(s)"); KMeans kmeans = new KMeans(k); KMeansClusterCollection clusters = kmeans.Learn(tileLocations); int[] labels = clusters.Decide(tileLocations); Debug.Log("Number of labels (clusters) = " + labels.Length); for (int i = 0; i < labels.Length; i++) { points[(int)tileLocations[i][0], (int)tileLocations[i][1]].areaValue = regionId + labels[i]; } regionId += k; } return(regionId); }
/** * Get related post for each post * Use k-means cluster algorithm */ public void PostetsRelatedAi() { // Get all posts from db var posts = (from p in db.Posts where p.Content.Length > 50 // Avoid uneccesary outliers select p).ToList(); // Create Array of all posts content string[] documents = (from p in db.Posts where p.Content.Length > 50 // Avoid uneccesary outliers select p.Content).ToArray(); ///Apply TF*IDF on the documents and get the resulting vectors. double[][] inputs = TFIDFEX.TFIDF.Transform(documents); inputs = TFIDFEX.TFIDF.Normalize(inputs); // Create a new K-Means algorithm with Posts/2 clusters (create couples) KMeans kmeans = new KMeans(Convert.ToInt32(posts.Count() / 2)); // Compute the algorithm, retrieving an integer array // containing the labels for each of the observations KMeansClusterCollection clusters = kmeans.Learn(inputs); int[] labels = clusters.Decide(inputs); // Create more handy list of clusters and their vectors var clustersList = new List <List <int> >(); for (int j = 0; j < Convert.ToInt32(posts.Count() / 2); j++) { clustersList.Add(labels.Select((s, i) => new { i, s }) .Where(t => t.s == j) .Select(t => t.i) .ToList()); } // Adjust all posts and thier related according to clustering results foreach (var clusetr in clustersList) { // Handle clusters with 2 and more vectors if (clusetr.Count() >= 2) { for (int i = 1; i < clusetr.Count(); i++) { // Attach each post in the cluster to it's neighbor posts[clusetr[i - 1]].relatedPost = posts[clusetr[i]].Title; } // Attach the last post to the first one in the list posts[clusetr.Last()].relatedPost = posts[clusetr.First()].Title; } // Handle clusters with only one vector else if (clusetr.Count() > 0) { // In case matching not found posts[clusetr.First()].relatedPost = null; } } // Update Changes in DB foreach (var p in posts) { db.Entry(p).State = EntityState.Modified; } db.SaveChanges(); }
public void dayWorkSchedule(DateTime date) { Func <WorkSchedule, bool> predicat = (w => w.workSchedule_date == date); List <WorkSchedule> workSchedules = selectWorkSchedule(predicat); List <Distributors> alldistributors = new List <Distributors>(); List <Distributors> ezerList = new List <Distributors>(); foreach (WorkSchedule item in workSchedules) { ezerList = selectDistributors(d => d.distributors_id == item.distributor_id); if (ezerList.Count != 1) { throw new Exception("שגיאה בחיפוש אחרי מחלק זה"); } alldistributors.Add(ezerList[0]); } List <Recipients> allrecipients = recipientsPackageByDay(date); double[][] Coordinates = new double[allrecipients.Count][]; int i = 0; foreach (Recipients item in allrecipients) { Coordinates[i] = getLatLongFromAddress(item.recipients_address); i++; } // Create a new K-Means algorithm with 3 clusters KMeans kmeans = new KMeans(3); // Compute the algorithm, retrieving an integer array // containing the labels for each of the observations KMeansClusterCollection clusters = kmeans.Learn(Coordinates); // As a result, the first two observations should belong to the // same cluster (thus having the same label). The same should // happen to the next four observations and to the last three. int[] labels = clusters.Decide(Coordinates); List <Recipients> group0 = new List <Recipients>(); List <Recipients> group1 = new List <Recipients>(); List <Recipients> group2 = new List <Recipients>(); for (int k = 0; k < labels.Length; k++) { if (labels[k] == 0) { group0.Add(allrecipients[k]); } if (labels[k] == 1) { group1.Add(allrecipients[k]); } if (labels[k] == 2) { group2.Add(allrecipients[k]); } } double lat = 0; double lon = 0; double[] d0 = new double[2]; double[] d1 = new double[2]; double[] d2 = new double[2]; foreach (var item in group0) { d0 = getLatLongFromAddress(item.recipients_address); lat += d0[0]; lon += d0[1]; } d0[0] = lat / group0.Count; d0[1] = lon / group0.Count; lat = 0; lon = 0; foreach (var item in group1) { d1 = getLatLongFromAddress(item.recipients_address); lat += d1[0]; lon += d1[1]; } d1[0] = lat / group1.Count; d1[1] = lon / group1.Count; lat = 0; lon = 0; foreach (var item in group2) { d2 = getLatLongFromAddress(item.recipients_address); lat += d2[0]; lon += d2[1]; } d2[0] = lat / group2.Count; d2[1] = lon / group2.Count; // double[] d0 = getLatLongFromAddress(group0[0].recipients_address); // double[] d1 = getLatLongFromAddress(group1[0].recipients_address); // double[] d2 = getLatLongFromAddress(group2[0].recipients_address); double[,] distance = new double[3, 3]; for (i = 0; i < 3; i++) { double[] d3 = getLatLongFromAddress(alldistributors[i].distributors_address); for (int j = 0; j < 3; j++) { if (j == 0) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d0, d3); } if (j == 1) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d1, d3); } if (j == 2) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d2, d3); } } } int [,] minIndex = new int[3, 3]; for (i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { minIndex[i, j] = findMinDist(distance[i, 0], distance[i, 1], distance[i, 2], j); } } if ((minIndex[0, 0] != minIndex[2, 0]) && (minIndex[0, 0] != minIndex[1, 0]) && (minIndex[2, 0] != minIndex[1, 0])) { if (minIndex[0, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group0)); if (minIndex[1, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group2)); } else if (minIndex[1, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group2)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group1)); } } else if (minIndex[0, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group1)); if (minIndex[1, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group2)); } else if (minIndex[1, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group2)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group0)); } } else if (minIndex[0, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group2)); if (minIndex[1, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group1)); } else if (minIndex[1, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group0)); } } } else { if ((minIndex[0, 0] == minIndex[2, 0]) && (minIndex[0, 0] == minIndex[1, 0]) && (minIndex[2, 0] == minIndex[1, 0])) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group0)); if ((minIndex[1, 1] == minIndex[2, 1])) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 2]].distributors_id, date, group2)); } else { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 1]].distributors_id, date, group2)); } } else { int place; if (minIndex[1, 0] == minIndex[2, 0]) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group0)); place = minIndex[1, 1] + minIndex[1, 1] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 0]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group2)); } else { if (minIndex[0, 0] == minIndex[2, 0]) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 0]].distributors_id, date, group1)); place = minIndex[0, 0] + minIndex[1, 0] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group2)); } else { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 0]].distributors_id, date, group2)); place = minIndex[0, 0] + minIndex[2, 0] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group1)); } } } } }
public void learn_test() { #region doc_learn Accord.Math.Random.Generator.Seed = 0; // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; double[][] orig = observations.MemberwiseClone(); // Create a new K-Means algorithm with 3 clusters BalancedKMeans kmeans = new BalancedKMeans(3) { // Note: in balanced k-means the chances of the algorithm oscillating // between two solutions increases considerably. For this reason, we // set a max-iterations limit to avoid iterating indefinitely. MaxIterations = 100 }; // Compute the algorithm, retrieving an integer array // containing the labels for each of the observations KMeansClusterCollection clusters = kmeans.Learn(observations); // As a result, the first two observations should belong to the // same cluster (thus having the same label). The same should // happen to the next four observations and to the last three. int[] labels = clusters.Decide(observations); #endregion Assert.AreEqual(labels[0], labels[1]); Assert.AreEqual(labels[2], labels[3]); Assert.AreEqual(labels[2], labels[4]); Assert.AreEqual(labels[2], labels[5]); Assert.AreEqual(labels[6], labels[7]); Assert.AreEqual(labels[6], labels[8]); Assert.AreNotEqual(labels[0], labels[2]); Assert.AreNotEqual(labels[2], labels[6]); Assert.AreNotEqual(labels[0], labels[6]); int[] labels2 = kmeans.Clusters.Decide(observations); Assert.IsTrue(labels.IsEqual(labels2)); // the data must not have changed! Assert.IsTrue(orig.IsEqual(observations)); var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; int i = 0; foreach (var cluster in clusters) { c[i++] = cluster; } for (i = 0; i < c.Length; i++) { Assert.AreSame(c[i], clusters[i]); } }
public void Test(List <Person> testingPeople, int skillSetSize) { var inputs = _dataPointService.GenerateDataPointsFromPeople(testingPeople, skillSetSize); testPredictions = _clustersCollection.Decide(inputs); }
/// <summary> /// Computes the silhouette width for the given set of clusters and observations. /// </summary> /// <param name="clusters">The clusters in the dataset.</param> /// <param name="observations">The observation vectors.</param> /// <param name="isTwoDimensionalObservations"> /// Indicates whether or not the observation vector consists of flattened, /// two-dimensioal observations (which is how agent trajectories are stored), prompting special consideration for /// euclidean distasnce calculation. /// </param> /// <returns>The silhouette width for the given set of clusters and observations.</returns> private static double ComputeSilhouetteWidth(KMeansClusterCollection clusters, double[][] observations, bool isTwoDimensionalObservations) { var lockObj = new object(); double totalSilhouetteWidth = 0; // Get cluster assignments for all of the observations var clusterAssignments = clusters.Decide(observations); Parallel.For(0, observations.Length, observationIdx => { double obsIntraclusterDissimilarity = 0; // Get the cluster assignment of the current observation var curObsClusterAssignment = clusterAssignments[observationIdx]; // Only add observation silhouette width if it is NOT the sole member of its assigned cluster if (clusterAssignments.Count(ca => ca == curObsClusterAssignment) > 1) { // Setup list to hold average distance from current observation to every other neighboring cluster var neighboringClusterDistances = new List <double>(clusters.Count); for (var clusterIdx = 0; clusterIdx < clusters.Count; clusterIdx++) { // Handle the case where the current cluster is the cluster of which the observation is a member if (clusterIdx == curObsClusterAssignment) { // Sum the distance between current observation and every other observation in the same cluster for (var caIdx = 0; caIdx < clusterAssignments.Length; caIdx++) { if (curObsClusterAssignment == clusterAssignments[caIdx]) { obsIntraclusterDissimilarity += isTwoDimensionalObservations ? ComputeEuclideanTrajectoryDifference(observations[observationIdx], observations[caIdx]) : ComputeEuclideanObservationDifference(observations[observationIdx], observations[caIdx]); } } } // Otherwise, handle the case where we're on a neighboring cluster else { // Create new variable to hold sum of dissimilarities between observation and // neighboring cluster observations double curObsNeighboringClusterDissimilarity = 0; // Sum the distance between current observation and cluster centroids to which // the current observation is NOT assigned for (var caIdx = 0; caIdx < clusterAssignments.Length; caIdx++) { if (curObsClusterAssignment != clusterAssignments[caIdx]) { curObsNeighboringClusterDissimilarity += isTwoDimensionalObservations ? ComputeEuclideanTrajectoryDifference(observations[observationIdx], observations[caIdx]) : ComputeEuclideanObservationDifference(observations[observationIdx], observations[caIdx]); } } // Compute the average intercluster dissimilarity for the current neighboring // cluster and add to the list of average neighboring cluster distances neighboringClusterDistances.Add(curObsNeighboringClusterDissimilarity / clusterAssignments.Count(ca => ca == clusterIdx)); } } // Compute the average intracluster dissimilarity (local variance) obsIntraclusterDissimilarity = obsIntraclusterDissimilarity / clusterAssignments.Count(ca => ca == curObsClusterAssignment); // Get the minimum intercluster dissimilarity (0 if there are no centroid differences) var obsInterClusterDissimilarity = neighboringClusterDistances.Any() ? neighboringClusterDistances.Min() : 0; // Compute the silhouette width for the current observation // If its the only point in the cluster, then the silhouette width is 0 var curSilhouetteWidth = Math.Abs(obsIntraclusterDissimilarity) < 0.0000001 ? 0 : (obsInterClusterDissimilarity - obsIntraclusterDissimilarity) / Math.Max(obsIntraclusterDissimilarity, obsInterClusterDissimilarity); lock (lockObj) { // Add the silhoutte width for the current observation totalSilhouetteWidth += curSilhouetteWidth; } } }); // Return the silhoutte width return(totalSilhouetteWidth / observations.Length); }
/// <summary> /// This is the method that actually does the work. /// </summary> /// <param name="DA">The DA object is used to retrieve from inputs and store in outputs.</param> protected override void SolveInstance(IGH_DataAccess DA) { int i, j, k; bool IsPointData = false; GH_Structure <IGH_Goo> data = new GH_Structure <IGH_Goo>(); GH_Structure <IGH_GeometricGoo> geo = new GH_Structure <IGH_GeometricGoo>(); List <int> numCluster = new List <int>(); if (!DA.GetDataTree(0, out data)) { return; } if (!DA.GetDataTree(1, out geo)) { return; } if (!DA.GetDataList(2, numCluster)) { return; } data.Simplify(GH_SimplificationMode.CollapseAllOverlaps); DataTree <IGH_Goo> outputData = new DataTree <IGH_Goo>(); DataTree <IGH_GeometricGoo> outputGeo = new DataTree <IGH_GeometricGoo>(); DataTree <Point3d> outputCentroids = new DataTree <Point3d>(); for (i = 0; i < data.Branches.Count; i++) { double[] x = new double[data.Branches[i].Count]; double[] y = new double[data.Branches[i].Count]; double[] z = new double[data.Branches[i].Count]; for (j = 0; j < data.Branches[i].Count; j++) { if (data.Branches[i][j] is GH_Point) { IsPointData = true; GH_Point target = new GH_Point(); if (GH_Convert.ToGHPoint(data.Branches[i][j], GH_Conversion.Both, ref target)) { x[j] = target.Value.X; y[j] = target.Value.Y; z[j] = target.Value.Z; } } else { break; } } if (IsPointData) { List <double[]> datalist = new List <double[]> { x, y, z }; double[][] _data = ArrayConvert.To2DArray(datalist); KMeans m = new KMeans(numCluster[i]); KMeansClusterCollection cluster = m.Learn(_data); int[] labels = cluster.Decide(_data); double[][] centroids = m.Centroids; for (j = 0; j < data.Branches[i].Count; j++) { GH_Path path = new GH_Path(i, labels[j]); outputData.Add(data.Branches[i][j], path); outputGeo.Add(geo.Branches[i][j], path); } for (k = 0; k < centroids.Length; k++) { outputCentroids.Add(new Point3d(centroids.ElementAt(k).ElementAt(0), centroids.ElementAt(k).ElementAt(1), centroids.ElementAt(k).ElementAt(2)), new GH_Path(k)); } } else { break; } } if (!IsPointData) { GH_Path oldPath = new GH_Path(); GH_Path newPath = new GH_Path(); int DataGroupCount = 0; for (i = 0; i < data.PathCount; i++) { if (data.Paths[i].Indices.Length == 1) { DataGroupCount = 1; break; } else { int[] pp = new int[data.Paths[i].Indices.Length - 1]; for (j = 0; j < data.Paths[i].Indices.Length - 1; j++) { pp[j] = data.Paths[i].Indices[j]; } newPath.Indices = pp; if (newPath != oldPath) { DataGroupCount++; oldPath = newPath; } newPath = new GH_Path(); } } for (i = 0; i < DataGroupCount; i++) { List <double[]> datalist = new List <double[]>(); for (j = 0; j < data.Branches.Count / DataGroupCount; j++) { double[] values = new double[data.Branches[DataGroupCount * i + j].Count]; for (k = 0; k < data.Branches[DataGroupCount * i + j].Count; k++) { if (data.Branches[DataGroupCount * i + j][k] is GH_Number) { if (GH_Convert.ToDouble(data.Branches[DataGroupCount * i + j][k], out double value, GH_Conversion.Both)) { values[k] = value; } } else { break; } } datalist.Add(values); } double[][] _data = ArrayConvert.ToDoubleArray(datalist); KMeans m = new KMeans(numCluster[0]); KMeansClusterCollection cluster = m.Learn(_data); int[] labels = cluster.Decide(_data); for (j = 0; j < labels.Length; j++) { List <IGH_Goo> numbers = new List <IGH_Goo>(); List <IGH_GeometricGoo> geos = new List <IGH_GeometricGoo>(); for (k = 0; k < data.Branches[DataGroupCount * i + j].Count; k++) { numbers.Add(data.Branches[DataGroupCount * i + j][k]); geos.Add(geo.Branches[DataGroupCount * i + j][k]); } GH_Path path = new GH_Path(i, j, labels[j]); outputData.AddRange(numbers, path); outputGeo.AddRange(geos, path); } } } DA.SetDataTree(0, outputData); DA.SetDataTree(1, outputGeo); DA.SetDataTree(2, outputCentroids); }
public void objectClustering() { int applyClusterNum = clusterNum; if (applyClusterNum > objectidList.Count) { applyClusterNum = objectidList.Count; } var kmeans = new KMeans(k: applyClusterNum); double[][] points = new double[objectidList.Count][]; for (int i = 0; i < objectidList.Count; i++) { int id = objectidList[i]; points[i] = ObjList[idxbyObjid[id]].getStartingPoint(); } KMeansClusterCollection clusters = kmeans.Learn(points); int[] output = clusters.Decide(points); setStartingGroup(); int maxClusterLength = objectidList.Count / applyClusterNum; for (int i = 0; i < objectidList.Count; i++) { int id = objectidList[i]; //startingGroup[output[i]].Add(id); if (startingGroup[output[i]].idList.Count < maxClusterLength) { startingGroup[output[i]].Add(id); } else { for (int j = output[i]; j < applyClusterNum + output[i]; j++) { if (startingGroup[(j + 1) % applyClusterNum].idList.Count < maxClusterLength) { startingGroup[(j + 1) % applyClusterNum].Add(id); break; } } } //^ } //sort int maxFrameLength = 0; for (int k = 0; k < startingGroup.Count; k++) { startingGroup[k].sort(ref ObjList, ref idxbyObjid); int curFrameLength = startingGroup[k].getFrameLength(); if (maxFrameLength < curFrameLength) { maxFrameLength = curFrameLength; } } //set maxFrmae //MessageBox.Show(overlayFrameNum.ToString()); outputFrameNum = trackingBar.Maximum = maxFrameLength - 1; if (videoInfo3Flag == true) { strVideoInfo3 += "\nresult Frame: " + outputFrameNum; } videoInfo3Flag = false; labelVideoInfo3.Text = strVideoInfo3; labelProgress.Text = "Detection 100%\nTracking 100%\nOverlay 100%"; int min = outputFrameNum / analysisFPS / 60; int sec = outputFrameNum / analysisFPS % 60; labelEndTime.Text = min.ToString("00") + ":" + sec.ToString("00"); }
/// <summary> /// Computes the silhouette width for the given set of clusters and observations. /// </summary> /// <param name="clusters">The clusters in the dataset.</param> /// <param name="observations">The observation vectors.</param> /// <returns>The silhouette width for the given set of clusters and observations.</returns> private static double ComputeSilhouetteWidth(KMeansClusterCollection clusters, double[][] observations) { Object lockObj = new object(); double totalSilhouetteWidth = 0; // Get cluster assignments for all of the observations int[] clusterAssignments = clusters.Decide(observations); Parallel.For(0, observations.Length, observationIdx => { double obsIntraclusterDissimilarity = 0; double obsInterClusterDissimilarity = 0; // Sum the distance between current observation and every other observation in the same cluster for (int caIdx = 0; caIdx < clusterAssignments.Length; caIdx++) { if (clusterAssignments[caIdx] == clusterAssignments[observationIdx]) { obsIntraclusterDissimilarity += ComputeEuclideanTrajectoryDifference(observations[observationIdx], observations[caIdx]); } } // Compute the average intracluster dissimilarity (local variance) obsIntraclusterDissimilarity = obsIntraclusterDissimilarity/ clusterAssignments.Where(ca => ca == clusterAssignments[observationIdx]) .Count(); // Setup list to hold distance from current observation to every other cluster centroid List<double> centroidDistances = new List<double>(clusters.Count); // Sum the distance between current observation and cluster centroids to which the current // observation is NOT assigned for (int idx = 0; idx < clusters.Count; idx++) { // Only compute distance when observation is not assigned to the current cluster if (idx != clusterAssignments[observationIdx]) { centroidDistances.Add(ComputeEuclideanTrajectoryDifference(observations[observationIdx], clusters[idx].Centroid)); } } // Get the minimum intercluster dissimilarity (0 if there are no centroid differences) obsInterClusterDissimilarity = centroidDistances.Any() ? centroidDistances.Min() : 0; // Add the silhoutte width for the current observation var curSilhouetteWidth = (Math.Abs(obsIntraclusterDissimilarity) < 0.0000001 && Math.Abs(obsInterClusterDissimilarity) < 0.0000001) ? 0 : (obsInterClusterDissimilarity - obsIntraclusterDissimilarity)/ Math.Max(obsIntraclusterDissimilarity, obsInterClusterDissimilarity); lock (lockObj) { totalSilhouetteWidth += curSilhouetteWidth; } }); // Return the silhoutte width return totalSilhouetteWidth/observations.Length; }
protected override void SolveInstance(IGH_DataAccess DA) { int n = 0; DA.GetData(0, ref n); List <List <double> > data = new List <List <double> >(); for (int i = 2; i < Params.Input.Count; i++) { List <double> d = new List <double>(); DA.GetDataList(i, d); if (d.Count > 0) { data.Add(d); } } // Declare some observations double[][] observations = new double[data[0].Count][]; for (int i = 0; i < data[0].Count; i++) { List <double> num = new List <double>(); for (int j = 0; j < data.Count; j++) { num.Add(data[j][i]); } observations[i] = num.ToArray(); } //Get Weights List <double> weights = new List <double>(); DA.GetDataList(1, weights); if (weights.Count != data[0].Count) { weights = Enumerable.Repeat(1.0, data[0].Count).ToList(); } //Seed Accord.Math.Random.Generator.Seed = 0; // Create a new K-Means algorithm with n clusters Accord.MachineLearning.KMeans kmeans = new Accord.MachineLearning.KMeans(n); KMeansClusterCollection clusters = kmeans.Learn(observations, weights.ToArray()); int[] labels = clusters.Decide(observations); //Message base.Message = "Weights " + weights.Count.ToString() + "\r\n" + "Dimensions " + observations.Length.ToString() + " of length " + observations[0].Length.ToString(); //Output DA.SetDataList(0, labels.ToList()); DataTree <int> dataTree = new DataTree <int>(); for (int i = 0; i < labels.Length; i++) { dataTree.Add(i, new GH_Path(labels[i])); } DA.SetDataTree(1, dataTree); }
static void Main(string[] args) { Console.SetWindowSize(100, 50); // Read in the Online Retail feature dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.6\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "features.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var ecommerceDF = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); Console.WriteLine("* Shape: {0}, {1}", ecommerceDF.RowCount, ecommerceDF.ColumnCount); string[] features = new string[] { "NetRevenuePercentile", "AvgUnitPricePercentile", "AvgQuantityPercentile" }; Console.WriteLine("* Features: {0}\n\n", String.Join(", ", features)); var normalizedDf = Frame.CreateEmpty <int, string>(); var average = ecommerceDF.Columns[features].Sum() / ecommerceDF.RowCount; foreach (string feature in features) { normalizedDf.AddColumn(feature, (ecommerceDF[feature] - average[feature]) / ecommerceDF[feature].StdDev()); } double[][] sampleSet = BuildJaggedArray( normalizedDf.Columns[features].ToArray2D <double>(), normalizedDf.RowCount, features.Length ); // Create a new K-Means algorithm with n clusters Accord.Math.Random.Generator.Seed = 0; int[] numClusters = new int[] { 4, 5, 6, 7, 8 }; List <string> clusterNames = new List <string>(); List <double> silhouetteScores = new List <double>(); for (int i = 0; i < numClusters.Length; i++) { KMeans kmeans = new KMeans(numClusters[i]); KMeansClusterCollection clusters = kmeans.Learn(sampleSet); int[] labels = clusters.Decide(sampleSet); string colname = String.Format("Cluster-{0}", numClusters[i]); clusterNames.Add(colname); normalizedDf.AddColumn(colname, labels); ecommerceDF.AddColumn(colname, labels); Console.WriteLine("\n\n\n##################### {0} ###########################", colname); Console.WriteLine("\n\n* Centroids for {0} clusters:", numClusters[i]); PrintCentroidsInfo(clusters.Centroids, features); Console.WriteLine("\n"); VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgUnitPricePercentile"); VisualizeClusters(normalizedDf, colname, "AvgUnitPricePercentile", "AvgQuantityPercentile"); VisualizeClusters(normalizedDf, colname, "NetRevenuePercentile", "AvgQuantityPercentile"); for (int j = 0; j < numClusters[i]; j++) { GetTopNItemsPerCluster(ecommerceDF, j, colname); } double silhouetteScore = CalculateSilhouetteScore(normalizedDf, features, numClusters[i], colname); Console.WriteLine("\n\n* Silhouette Score: {0}", silhouetteScore.ToString("0.0000")); silhouetteScores.Add(silhouetteScore); Console.WriteLine("\n\n##############################################################\n\n\n"); } for (int i = 0; i < clusterNames.Count; i++) { Console.WriteLine("- Silhouette Score for {0}: {1}", clusterNames[i], silhouetteScores[i].ToString("0.0000")); } Console.WriteLine("\n\n\nDONE!!"); Console.ReadKey(); }