private PointClusters GetBestKMeans(List <Case> allPoints) { Dictionary <int, PointClusters> AllClusters = new Dictionary <int, PointClusters>(); PointClusters cluster; //= new PointClusters(); List <Case> seedPoints = new List <Case>(); double Sk = 0; // Number of Attributes int Nd = allPoints[0].GetFeatures().Count - 2;//except id , cluster seedPoints = GetSeedPoints(allPoints, 1); cluster = GetKMeans(allPoints, seedPoints, 1); foreach (KeyValuePair <Case, double> pair in cluster.dist_error) { Sk += pair.Value; } cluster.Sk = Sk; cluster.Fk = 100; AllClusters.Add(1, cluster); for (int i = 2; i <= 19; i++)// { List <KmeansPlus.PointClusters> Allmyclusters = new List <KmeansPlus.PointClusters>(); double min_fk = Double.MaxValue; int min_i = 0; for (int j = 0; j < 1; j++) { cluster = new PointClusters(); seedPoints = GetSeedPoints(allPoints, i); iter = 0; cluster = GetKMeans(allPoints, seedPoints, i); if (AllClusters[i - 1].Sk == 0) { cluster.Fk = 1; } else { double ak = 1; if (Nd > 1) { ak = Convert.ToDouble(Ak(i, Nd)); } cluster.Fk = cluster.Sk / (ak * AllClusters[i - 1].Sk); } Allmyclusters.Add(cluster); if (cluster.Fk < min_fk) { min_i = j; min_fk = cluster.Fk; } } cluster = Allmyclusters[min_i]; AllClusters.Add(i, cluster); } double min = double.MaxValue; int kk = 0; foreach (KeyValuePair <int, PointClusters> pair in AllClusters) { if (pair.Value.Fk < min) { kk = pair.Key; cluster = pair.Value; min = pair.Value.Fk; } } return(cluster); }
/// <summary> /// Get the centroid of a set of points /// cf. http://en.wikipedia.org/wiki/Centroid /// Consider also: Metoid cf. http://en.wikipedia.org/wiki/Medoids /// </summary> /// <param name="pcs"></param> /// <returns></returns> private List <Case> GetCentroid(PointClusters pcs) { List <Case> newSeeds = new List <Case>(pcs.PC.Count); Case newSeed; int sumf = 0; double sumd = 0; bool sumb = false; List <string> sumst = new List <string>(); List <int> sums_count = new List <int>(); Feature f = new Feature(); int t = 0; foreach (List <Case> cluster in pcs.PC.Values) { newSeed = new Case(cluster[0].GetCaseID(), cluster[0].GetCaseName(), cluster[0].GetCaseDescription()); for (int j = 0; j < cluster[0].GetFeatures().Count; j++) { foreach (Case p in cluster) { f = (Feature)p.GetFeatures()[j]; if (f.GetFeatureName() == "id" || f.GetFeatureName() == "cluster") { break; } if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT) { sumf += Convert.ToInt32(f.GetFeatureValue()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT) { sumd += Convert.ToDouble(f.GetFeatureValue()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL) { sumb = (sumb || Convert.ToBoolean(f.GetFeatureValue())); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING || f.GetFeatureType() == FeatureType.TYPE_FEATURE_CATEGORICAL) { int position = sumst.IndexOf(f.GetFeatureValue().ToString()); if (position >= 0) { sums_count[position] = sums_count[position] + 1; } else { sumst.Add(f.GetFeatureValue().ToString()); sums_count.Add(1); } } } if (f.GetFeatureName() == "cluster") { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), -1, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } else if (f.GetFeatureName() == "id") { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), t, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumf / cluster.Count(), f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumd / cluster.Count(), f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumb, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING || f.GetFeatureType() == FeatureType.TYPE_FEATURE_CATEGORICAL) { int max = sums_count.IndexOf(sums_count.Max()); string string_value = sumst[max]; newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), string_value, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit()); } sumf = 0; sumd = 0; sumb = false; sumst.Clear(); sums_count.Clear(); } // end feature newSeeds.Add(newSeed); sumf = 0; sumd = 0; sumb = false; sumst.Clear(); sums_count.Clear(); t++; }// end clusters return(newSeeds); }
//Bog standard k-means. private PointClusters GetKMeans(List <Case> allPoints, List <Case> seedPoints, int k) { begin : PointClusters cluster = new PointClusters(); double[] Distances = new double[k]; double minD = double.MaxValue; List <Case> sameDPoint = new List <Case>(); bool exit = true; //Cycle thru all points in ensemble and assign to nearest centre foreach (Case p in allPoints) { foreach (Case sPoint in seedPoints) { double dist = GetEuclideanD(p, sPoint); if (dist < minD) { sameDPoint.Clear(); minD = dist; sameDPoint.Add(sPoint); } else if (dist == minD) { if (!sameDPoint.Contains(sPoint)) { sameDPoint.Add(sPoint); } } } //Extract nearest central point. Case keyPoint; if (sameDPoint.Count > 1) { int index = GetRandNumCrypto(0, sameDPoint.Count); keyPoint = sameDPoint[index]; } else { keyPoint = sameDPoint[0]; } //Assign ensemble point to correct central point cluster if (!cluster.PC.ContainsKey(keyPoint)) //New { List <Case> newCluster = new List <Case>(); newCluster.Add(p); cluster.PC.Add(keyPoint, newCluster); } else { //Existing cluster centre cluster.PC[keyPoint].Add(p); } //Reset sameDPoint.Clear(); minD = double.MaxValue; } //Bulletproof check - it it come out of the wash incorrect then re-seed. if (cluster.PC.Count != k) { cluster.PC.Clear(); seedPoints = GetSeedPoints(allPoints, k); goto begin; } List <Case> newSeeds = GetCentroid(cluster); //Determine exit foreach (Case newSeed in newSeeds) { if (!cluster.PC.ContainsKey(newSeed)) { exit = false; } } if ((exit) || (iter == 1000)) { return(cluster); } else { iter++; return(GetKMeans(allPoints, newSeeds, k)); } }
private PointClusters GetKMeans(List <Case> allPoints, List <Case> seedPoints, int k) { begin : PointClusters cluster = new PointClusters(); double[] Distances = new double[k]; double minD = double.MaxValue; List <Case> sameDPoint = new List <Case>(); bool exit = true; //Cycle thru all points in ensemble and assign to nearest centre foreach (Case p in allPoints) { foreach (Case sPoint in seedPoints) { double dist = GetEuclideanD(p, sPoint); if (dist < minD) { sameDPoint.Clear(); minD = dist; sameDPoint.Add(sPoint); } else if (dist == minD) { if (!sameDPoint.Contains(sPoint)) { sameDPoint.Add(sPoint); } } } //Extract nearest central point. Case keyPoint; if (sameDPoint.Count > 1) { int index = GetRandNumCrypto(0, sameDPoint.Count); keyPoint = sameDPoint[index]; } else { keyPoint = sameDPoint[0]; } //Assign ensemble point to correct central point cluster if (!cluster.PC.ContainsKey(keyPoint)) //New { List <Case> newCluster = new List <Case>(); newCluster.Add(p); cluster.PC.Add(keyPoint, newCluster); cluster.dist_error.Add(keyPoint, minD); } else { //Existing cluster centre cluster.PC[keyPoint].Add(p); double value = cluster.dist_error[keyPoint]; cluster.dist_error[keyPoint] += minD; } //Reset sameDPoint.Clear(); minD = double.MaxValue; } //Bulletproof check - it it come out of the wash incorrect then re-seed. if (cluster.PC.Count != k) { cluster.PC.Clear(); cluster.dist_error.Clear(); seedPoints = GetSeedPoints(allPoints, k); goto begin; } List <Case> newSeeds = GetCentroid(cluster); List <Case> n = new List <Case>(); bool found = true; //Determine exit // check if centers don't change, equality between cases foreach (Case newSeed in newSeeds) // last centers { foreach (KeyValuePair <Case, List <Case> > item in cluster.PC) //current centers { found = true; foreach (Feature f in item.Key.GetFeatures()) { Feature seedf = newSeed.GetFeature(f.GetFeatureName()); if (!(f.GetFeatureValue().ToString() == seedf.GetFeatureValue().ToString())) { found = false; break; } } if (found) { break; } } if (!found) { exit = false; } // if (!cluster.PC.ContainsKey(newSeed)) wrong for equal objects // exit = false; } if ((exit) || (iter == 1000)) { return(cluster); } else { iter++; return(GetKMeans(allPoints, newSeeds, k)); } }
/// <summary> /// Get the centroid of a set of points /// cf. http://en.wikipedia.org/wiki/Centroid /// Consider also: Metoid cf. http://en.wikipedia.org/wiki/Medoids /// </summary> /// <param name="pcs"></param> /// <returns></returns> private List <Case> GetCentroid(PointClusters pcs) { List <Case> newSeeds = new List <Case>(pcs.PC.Count); Case newSeed; int sumf = 0; double sumd = 0; bool sumb = false; List <string> sumst = new List <string>(); Feature f = new Feature(); foreach (List <Case> cluster in pcs.PC.Values) { newSeed = new Case(cluster[0].GetCaseID(), cluster[0].GetCaseName(), cluster[0].GetCaseDescription()); for (int j = 0; j < cluster[0].GetFeatures().Count; j++) { foreach (Case p in cluster) { f = (Feature)p.GetFeatures()[j]; if (f.GetFeatureName() == "id") { break; } if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT) { sumf += Convert.ToInt32(f.GetFeatureValue()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT) { sumd += Convert.ToDouble(f.GetFeatureValue()); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL) { sumb = (sumb || Convert.ToBoolean(f.GetFeatureValue())); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING) { sumst.Add(f.GetFeatureValue().ToString()); } } if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumf / cluster.Count(), 1.0, false, false, ""); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumd / cluster.Count(), 1.0, false, false, ""); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL) { newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumb, 1.0, false, false, ""); } else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING) { Random rd = new Random(); // for calculating random numbers int rnd = rd.Next(sumst.Count); newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumst[rnd].ToString(), 1.0, false, false, ""); } // newSeed = new Case(sumX / cluster.Count, sumY / cluster.Count); // newSeeds.Add(newSeed); } newSeeds.Add(newSeed); sumf = 0; sumd = 0; sumb = false; sumst.Clear(); } return(newSeeds); }