Exemple #1
0
        public static int DoKMeansWithMinVariance(int numClusters, ref List <MyPoint> PList, ref List <ClusterCenterPoint> CList, double maxError, int maxIterations, bool minVariance)
        {
            double stddev = 0;

            if (minVariance == true)
            {
                stddev = double.MaxValue;
            }
            else
            {
                stddev = double.MinValue;
            }

            List <MyPoint>            PListBest = new List <MyPoint>();
            List <ClusterCenterPoint> CListBest = new List <ClusterCenterPoint>();
            int iter = 0;

            for (int m = 0; m < 20; m++) //Pick best (most balanced) clustering after 20 clustering attempts
            {
                List <MyPoint> PListCopy = new List <MyPoint>();
                foreach (MyPoint mp in PList)
                {
                    PListCopy.Add((MyPoint)mp.Clone());
                }
                List <ClusterCenterPoint> CListCopy = null;
                iter += KMeans.DoKMeans(numClusters, ref PListCopy, ref CListCopy, 0.01, 100, true); // true means do kmeansplusplus
                                                                                                     // ----compute variance of cluster memberships
                int[] CCount = new int[numClusters];
                for (int i = 0; i < numClusters; i++)
                {
                    CCount[i] = 0;
                }
                foreach (MyPoint mp in PListCopy)
                {
                    CCount[mp.ClusterId] += 1;
                }

                double variance = 0;
                for (int i = 0; i < numClusters; i++)
                {
                    variance += (CCount[i] - (PListCopy.Count) / (double)numClusters) * (CCount[i] - (PListCopy.Count) / (double)numClusters);
                }
                double stddevCopy = Math.Sqrt(variance);
                string out1       = "";
                for (int n = 0; n < CCount.Length; n++)
                {
                    out1 += "Cluster " + n.ToString() + " count = " + CCount[n].ToString() + "\n";
                }
                //MessageBox.Show("StdDev = " + stddevCopy.ToString() + " " + out1);
                if (minVariance == true)
                {
                    if (stddevCopy < stddev) // if it improves, copy data into best
                    {
                        stddev = stddevCopy;
                        PListBest.Clear();
                        foreach (MyPoint mp in PListCopy)
                        {
                            PListBest.Add((MyPoint)mp.Clone());
                        }
                        CListBest.Clear();
                        foreach (ClusterCenterPoint cp in CListCopy)
                        {
                            CListBest.Add((ClusterCenterPoint)cp.Clone());
                        }
                    }
                }
                else
                {
                    if (stddevCopy > stddev) // if it improves, copy data into best
                    {
                        stddev = stddevCopy;
                        PListBest.Clear();
                        foreach (MyPoint mp in PListCopy)
                        {
                            PListBest.Add((MyPoint)mp.Clone());
                        }
                        CListBest.Clear();
                        foreach (ClusterCenterPoint cp in CListCopy)
                        {
                            CListBest.Add((ClusterCenterPoint)cp.Clone());
                        }
                    }
                }
            }
            CList = CListBest;
            PList = PListBest;
            return(iter);
        }
Exemple #2
0
        public static int MDoKMeansWithMinVariance(int aiNumClusters,
                                                   ref List <MyPoint> aoPList,
                                                   ref List <ClusterCenterPoint> aoCL,
                                                   double adMaxError, int aiMaxIterations, bool abMinVariance)
        {
            string                    koOut;
            double                    kdStdDev     = 0;
            double                    kdStdDevCopy = 0;
            double                    kdVariance   = 0;
            int                       kiIter       = 0;
            List <MyPoint>            koPListBest  = new List <MyPoint>( );
            List <MyPoint>            koPListCopy;
            List <ClusterCenterPoint> koCListBest = new List <ClusterCenterPoint>( );
            List <ClusterCenterPoint> koCListCopy;

            int[] kiCCount = new int[aiNumClusters];

            if (abMinVariance == true)
            {
                kdStdDev = double.MaxValue;
            }
            else
            {
                kdStdDev = double.MinValue;
            }

            for (int m = 0; m < 20; m++) // pick best i.e., most balanced clustering
            {                            // of 20 attempts at custering
                koPListCopy = new List <MyPoint>( );
                foreach (MyPoint koMP in aoPList)
                {
                    koPListCopy.Add(( MyPoint )koMP.Clone( ));
                }

                koCListCopy = null;
                kiIter     += KMeans.MDoKMeans(aiNumClusters, ref koPListCopy, ref koCListCopy, 0.01, 100, true); // true means do kmeansplusplus
                // ----compute variance of cluster memberships
                for (int i = 0; i < aiNumClusters; i++)
                {
                    kiCCount[i] = 0;
                }

                foreach (MyPoint koMP in koPListCopy)
                {
                    kiCCount[koMP.ViClusterId] += 1;
                }

                kdVariance = 0;
                for (int i = 0; i < aiNumClusters; i++)
                {
                    kdVariance += (kiCCount[i] - (koPListCopy.Count / ( double )aiNumClusters)) *
                                  (kiCCount[i] - (koPListCopy.Count / ( double )aiNumClusters));
                }
                kdStdDevCopy = Math.Sqrt(kdVariance);

                koOut = "";
                for (int n = 0; n < kiCCount.Length; n++)
                {
                    koOut += "Cluster " + n.ToString( ) + " count = " + kiCCount[n].ToString() + "\n";
                }
                //MessageBox.Show( "StdDev = " + koStdDevCopy.ToString( ) + " " + koOut );
                if (abMinVariance == true)
                {
                    if (kdStdDevCopy < kdStdDev) // if it improves, copy data into best
                    {
                        kdStdDev = kdStdDevCopy;
                        koPListBest.Clear( );
                        foreach (MyPoint koMP in koPListCopy)
                        {
                            koPListBest.Add(( MyPoint )koMP.Clone( ));
                        }
                        koCListBest.Clear( );
                        foreach (ClusterCenterPoint koCP in koCListCopy)
                        {
                            koCListBest.Add(( ClusterCenterPoint )koCP.Clone( ));
                        }
                    }
                }
                else
                {
                    if (kdStdDevCopy > kdStdDev) // if it improves, copy data into best
                    {
                        kdStdDev = kdStdDevCopy;
                        koPListBest.Clear( );
                        foreach (MyPoint koMP in koPListCopy)
                        {
                            koPListBest.Add(( MyPoint )koMP.Clone( ));
                        }
                        koCListBest.Clear( );
                        foreach (ClusterCenterPoint koCP in koCListCopy)
                        {
                            koCListBest.Add(( ClusterCenterPoint )koCP.Clone( ));
                        }
                    }
                }
            }

            aoCL    = koCListBest;
            aoPList = koPListBest;

            return(kiIter);
        }
Exemple #3
0
        public static int DoKMeansWithMinVariance(int numClusters, ref List <MyPoint> PList, ref List <ClusterCenterPoint> CList, double maxError, int maxIterations, bool minVariance)
        {
            List <MyPoint> copy  = new List <MyPoint>(PList);
            object         olock = new object();

            double stddev = 0;

            if (minVariance == true)
            {
                stddev = double.MaxValue;
            }
            else
            {
                stddev = double.MinValue;
            }

            List <MyPoint>            PListBest = new List <MyPoint>();
            List <ClusterCenterPoint> CListBest = new List <ClusterCenterPoint>();
            int iter = 0;

            Parallel.For(0, 20, (m) =>
                         //for (int m = 0; m < 20; m++) //Pick best (most balanced) clustering after 20 clustering attempts
            {
                List <MyPoint> PListCopy = new List <MyPoint>();
                for (int i = 0; i < copy.Count; i++)
                {
                    PListCopy.Add((MyPoint)copy[i].Clone());
                }
                //foreach (MyPoint mp in PList)
                //PListCopy.Add((MyPoint)mp.Clone());
                List <ClusterCenterPoint> CListCopy = null;
                iter += KMeans.DoKMeans(numClusters, ref PListCopy, ref CListCopy, 0.01, 100, true); // true means do kmeansplusplus
                                                                                                     // ----compute variance of cluster memberships
                int[] CCount = new int[numClusters];
                for (int i = 0; i < numClusters; i++)
                {
                    CCount[i] = 0;
                }
                foreach (MyPoint mp in PListCopy)
                {
                    CCount[mp.ClusterId] += 1;
                }

                double variance = 0;
                for (int i = 0; i < numClusters; i++)
                {
                    variance += (CCount[i] - (PListCopy.Count) / (double)numClusters) * (CCount[i] - (PListCopy.Count) / (double)numClusters);
                }
                double stddevCopy = Math.Sqrt(variance);
                string out1       = "";
                for (int n = 0; n < CCount.Length; n++)
                {
                    out1 += "Cluster " + n.ToString() + " count = " + CCount[n].ToString() + "\n";
                }
                //MessageBox.Show("StdDev = " + stddevCopy.ToString() + " " + out1);
                if (minVariance == true)
                {
                    lock (olock) //Protect while updating stddev.
                                 // Otherwise one thread X can obtain a X stddevcopy better than the actual one stop at the if validation
                                 //then a new thread Y starts with Y stddevcopy better than actual one and X one, updates stddev and stops
                                 //thread X starts again (does not need to pass if statement again) and updates stddevcopy which would be wrong since the best one was Y

                    {
                        if (stddevCopy < stddev) // if it improves, copy data into best
                        {
                            stddev = stddevCopy;
                            PListBest.Clear();
                            foreach (MyPoint mp in PListCopy)
                            {
                                PListBest.Add((MyPoint)mp.Clone());
                            }
                            CListBest.Clear();
                            foreach (ClusterCenterPoint cp in CListCopy)
                            {
                                CListBest.Add((ClusterCenterPoint)cp.Clone());
                            }
                        }
                    }
                }

                /*
                 * else
                 * {
                 *  lock (olock)
                 *  {
                 *      if (stddevCopy > stddev) // if it improves, copy data into best
                 *      {
                 *          stddev = stddevCopy;
                 *          PListBest.Clear();
                 *          foreach (MyPoint mp in PListCopy)
                 *              PListBest.Add((MyPoint)mp.Clone());
                 *          CListBest.Clear();
                 *          foreach (ClusterCenterPoint cp in CListCopy)
                 *              CListBest.Add((ClusterCenterPoint)cp.Clone());
                 *      }
                 *  }
                 * }
                 */
            });
            CList = CListBest;
            PList = PListBest;
            return(iter);
        }