public static int DoKMeansWithMinVariance(int numClusters, ref List <MyPoint> PList, ref List <ClusterCenterPoint> CList, double maxError, int maxIterations, bool minVariance) { double stddev = 0; if (minVariance == true) { stddev = double.MaxValue; } else { stddev = double.MinValue; } List <MyPoint> PListBest = new List <MyPoint>(); List <ClusterCenterPoint> CListBest = new List <ClusterCenterPoint>(); int iter = 0; for (int m = 0; m < 20; m++) //Pick best (most balanced) clustering after 20 clustering attempts { List <MyPoint> PListCopy = new List <MyPoint>(); foreach (MyPoint mp in PList) { PListCopy.Add((MyPoint)mp.Clone()); } List <ClusterCenterPoint> CListCopy = null; iter += KMeans.DoKMeans(numClusters, ref PListCopy, ref CListCopy, 0.01, 100, true); // true means do kmeansplusplus // ----compute variance of cluster memberships int[] CCount = new int[numClusters]; for (int i = 0; i < numClusters; i++) { CCount[i] = 0; } foreach (MyPoint mp in PListCopy) { CCount[mp.ClusterId] += 1; } double variance = 0; for (int i = 0; i < numClusters; i++) { variance += (CCount[i] - (PListCopy.Count) / (double)numClusters) * (CCount[i] - (PListCopy.Count) / (double)numClusters); } double stddevCopy = Math.Sqrt(variance); string out1 = ""; for (int n = 0; n < CCount.Length; n++) { out1 += "Cluster " + n.ToString() + " count = " + CCount[n].ToString() + "\n"; } //MessageBox.Show("StdDev = " + stddevCopy.ToString() + " " + out1); if (minVariance == true) { if (stddevCopy < stddev) // if it improves, copy data into best { stddev = stddevCopy; PListBest.Clear(); foreach (MyPoint mp in PListCopy) { PListBest.Add((MyPoint)mp.Clone()); } CListBest.Clear(); foreach (ClusterCenterPoint cp in CListCopy) { CListBest.Add((ClusterCenterPoint)cp.Clone()); } } } else { if (stddevCopy > stddev) // if it improves, copy data into best { stddev = stddevCopy; PListBest.Clear(); foreach (MyPoint mp in PListCopy) { PListBest.Add((MyPoint)mp.Clone()); } CListBest.Clear(); foreach (ClusterCenterPoint cp in CListCopy) { CListBest.Add((ClusterCenterPoint)cp.Clone()); } } } } CList = CListBest; PList = PListBest; return(iter); }
public static int MDoKMeansWithMinVariance(int aiNumClusters, ref List <MyPoint> aoPList, ref List <ClusterCenterPoint> aoCL, double adMaxError, int aiMaxIterations, bool abMinVariance) { string koOut; double kdStdDev = 0; double kdStdDevCopy = 0; double kdVariance = 0; int kiIter = 0; List <MyPoint> koPListBest = new List <MyPoint>( ); List <MyPoint> koPListCopy; List <ClusterCenterPoint> koCListBest = new List <ClusterCenterPoint>( ); List <ClusterCenterPoint> koCListCopy; int[] kiCCount = new int[aiNumClusters]; if (abMinVariance == true) { kdStdDev = double.MaxValue; } else { kdStdDev = double.MinValue; } for (int m = 0; m < 20; m++) // pick best i.e., most balanced clustering { // of 20 attempts at custering koPListCopy = new List <MyPoint>( ); foreach (MyPoint koMP in aoPList) { koPListCopy.Add(( MyPoint )koMP.Clone( )); } koCListCopy = null; kiIter += KMeans.MDoKMeans(aiNumClusters, ref koPListCopy, ref koCListCopy, 0.01, 100, true); // true means do kmeansplusplus // ----compute variance of cluster memberships for (int i = 0; i < aiNumClusters; i++) { kiCCount[i] = 0; } foreach (MyPoint koMP in koPListCopy) { kiCCount[koMP.ViClusterId] += 1; } kdVariance = 0; for (int i = 0; i < aiNumClusters; i++) { kdVariance += (kiCCount[i] - (koPListCopy.Count / ( double )aiNumClusters)) * (kiCCount[i] - (koPListCopy.Count / ( double )aiNumClusters)); } kdStdDevCopy = Math.Sqrt(kdVariance); koOut = ""; for (int n = 0; n < kiCCount.Length; n++) { koOut += "Cluster " + n.ToString( ) + " count = " + kiCCount[n].ToString() + "\n"; } //MessageBox.Show( "StdDev = " + koStdDevCopy.ToString( ) + " " + koOut ); if (abMinVariance == true) { if (kdStdDevCopy < kdStdDev) // if it improves, copy data into best { kdStdDev = kdStdDevCopy; koPListBest.Clear( ); foreach (MyPoint koMP in koPListCopy) { koPListBest.Add(( MyPoint )koMP.Clone( )); } koCListBest.Clear( ); foreach (ClusterCenterPoint koCP in koCListCopy) { koCListBest.Add(( ClusterCenterPoint )koCP.Clone( )); } } } else { if (kdStdDevCopy > kdStdDev) // if it improves, copy data into best { kdStdDev = kdStdDevCopy; koPListBest.Clear( ); foreach (MyPoint koMP in koPListCopy) { koPListBest.Add(( MyPoint )koMP.Clone( )); } koCListBest.Clear( ); foreach (ClusterCenterPoint koCP in koCListCopy) { koCListBest.Add(( ClusterCenterPoint )koCP.Clone( )); } } } } aoCL = koCListBest; aoPList = koPListBest; return(kiIter); }
public static int DoKMeansWithMinVariance(int numClusters, ref List <MyPoint> PList, ref List <ClusterCenterPoint> CList, double maxError, int maxIterations, bool minVariance) { List <MyPoint> copy = new List <MyPoint>(PList); object olock = new object(); double stddev = 0; if (minVariance == true) { stddev = double.MaxValue; } else { stddev = double.MinValue; } List <MyPoint> PListBest = new List <MyPoint>(); List <ClusterCenterPoint> CListBest = new List <ClusterCenterPoint>(); int iter = 0; Parallel.For(0, 20, (m) => //for (int m = 0; m < 20; m++) //Pick best (most balanced) clustering after 20 clustering attempts { List <MyPoint> PListCopy = new List <MyPoint>(); for (int i = 0; i < copy.Count; i++) { PListCopy.Add((MyPoint)copy[i].Clone()); } //foreach (MyPoint mp in PList) //PListCopy.Add((MyPoint)mp.Clone()); List <ClusterCenterPoint> CListCopy = null; iter += KMeans.DoKMeans(numClusters, ref PListCopy, ref CListCopy, 0.01, 100, true); // true means do kmeansplusplus // ----compute variance of cluster memberships int[] CCount = new int[numClusters]; for (int i = 0; i < numClusters; i++) { CCount[i] = 0; } foreach (MyPoint mp in PListCopy) { CCount[mp.ClusterId] += 1; } double variance = 0; for (int i = 0; i < numClusters; i++) { variance += (CCount[i] - (PListCopy.Count) / (double)numClusters) * (CCount[i] - (PListCopy.Count) / (double)numClusters); } double stddevCopy = Math.Sqrt(variance); string out1 = ""; for (int n = 0; n < CCount.Length; n++) { out1 += "Cluster " + n.ToString() + " count = " + CCount[n].ToString() + "\n"; } //MessageBox.Show("StdDev = " + stddevCopy.ToString() + " " + out1); if (minVariance == true) { lock (olock) //Protect while updating stddev. // Otherwise one thread X can obtain a X stddevcopy better than the actual one stop at the if validation //then a new thread Y starts with Y stddevcopy better than actual one and X one, updates stddev and stops //thread X starts again (does not need to pass if statement again) and updates stddevcopy which would be wrong since the best one was Y { if (stddevCopy < stddev) // if it improves, copy data into best { stddev = stddevCopy; PListBest.Clear(); foreach (MyPoint mp in PListCopy) { PListBest.Add((MyPoint)mp.Clone()); } CListBest.Clear(); foreach (ClusterCenterPoint cp in CListCopy) { CListBest.Add((ClusterCenterPoint)cp.Clone()); } } } } /* * else * { * lock (olock) * { * if (stddevCopy > stddev) // if it improves, copy data into best * { * stddev = stddevCopy; * PListBest.Clear(); * foreach (MyPoint mp in PListCopy) * PListBest.Add((MyPoint)mp.Clone()); * CListBest.Clear(); * foreach (ClusterCenterPoint cp in CListCopy) * CListBest.Add((ClusterCenterPoint)cp.Clone()); * } * } * } */ }); CList = CListBest; PList = PListBest; return(iter); }