Beispiel #1
0
        private PointClusters GetBestKMeans(List <Case> allPoints)
        {
            Dictionary <int, PointClusters> AllClusters = new Dictionary <int, PointClusters>();
            PointClusters cluster; //= new PointClusters();
            List <Case>   seedPoints = new List <Case>();
            double        Sk         = 0;
            // Number of Attributes
            int Nd = allPoints[0].GetFeatures().Count - 2;//except id , cluster

            seedPoints = GetSeedPoints(allPoints, 1);
            cluster    = GetKMeans(allPoints, seedPoints, 1);
            foreach (KeyValuePair <Case, double> pair in cluster.dist_error)
            {
                Sk += pair.Value;
            }
            cluster.Sk = Sk;
            cluster.Fk = 100;
            AllClusters.Add(1, cluster);

            for (int i = 2; i <= 19; i++)//
            {
                List <KmeansPlus.PointClusters> Allmyclusters = new List <KmeansPlus.PointClusters>();

                double min_fk = Double.MaxValue;

                int min_i = 0;
                for (int j = 0; j < 1; j++)
                {
                    cluster    = new PointClusters();
                    seedPoints = GetSeedPoints(allPoints, i);
                    iter       = 0;
                    cluster    = GetKMeans(allPoints, seedPoints, i);
                    if (AllClusters[i - 1].Sk == 0)
                    {
                        cluster.Fk = 1;
                    }
                    else
                    {
                        double ak = 1;
                        if (Nd > 1)
                        {
                            ak = Convert.ToDouble(Ak(i, Nd));
                        }
                        cluster.Fk = cluster.Sk / (ak * AllClusters[i - 1].Sk);
                    }

                    Allmyclusters.Add(cluster);
                    if (cluster.Fk < min_fk)
                    {
                        min_i = j; min_fk = cluster.Fk;
                    }
                }
                cluster = Allmyclusters[min_i];
                AllClusters.Add(i, cluster);
            }


            double min = double.MaxValue;
            int    kk  = 0;

            foreach (KeyValuePair <int, PointClusters> pair in AllClusters)
            {
                if (pair.Value.Fk < min)
                {
                    kk = pair.Key; cluster = pair.Value; min = pair.Value.Fk;
                }
            }


            return(cluster);
        }
        /// <summary>
        /// Get the centroid of a set of points
        /// cf. http://en.wikipedia.org/wiki/Centroid
        /// Consider also: Metoid cf. http://en.wikipedia.org/wiki/Medoids
        /// </summary>
        /// <param name="pcs"></param>
        /// <returns></returns>
        private List <Case> GetCentroid(PointClusters pcs)
        {
            List <Case>   newSeeds = new List <Case>(pcs.PC.Count);
            Case          newSeed;
            int           sumf = 0; double sumd = 0; bool sumb = false;
            List <string> sumst      = new List <string>();
            List <int>    sums_count = new List <int>();
            Feature       f          = new Feature();
            int           t          = 0;

            foreach (List <Case> cluster in pcs.PC.Values)
            {
                newSeed = new Case(cluster[0].GetCaseID(), cluster[0].GetCaseName(), cluster[0].GetCaseDescription());
                for (int j = 0; j < cluster[0].GetFeatures().Count; j++)
                {
                    foreach (Case p in cluster)
                    {
                        f = (Feature)p.GetFeatures()[j];
                        if (f.GetFeatureName() == "id" || f.GetFeatureName() == "cluster")
                        {
                            break;
                        }
                        if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT)
                        {
                            sumf += Convert.ToInt32(f.GetFeatureValue());
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT)
                        {
                            sumd += Convert.ToDouble(f.GetFeatureValue());
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL)
                        {
                            sumb = (sumb || Convert.ToBoolean(f.GetFeatureValue()));
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING || f.GetFeatureType() == FeatureType.TYPE_FEATURE_CATEGORICAL)
                        {
                            int position = sumst.IndexOf(f.GetFeatureValue().ToString());
                            if (position >= 0)
                            {
                                sums_count[position] = sums_count[position] + 1;
                            }
                            else
                            {
                                sumst.Add(f.GetFeatureValue().ToString());
                                sums_count.Add(1);
                            }
                        }
                    }
                    if (f.GetFeatureName() == "cluster")
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), -1, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    else if (f.GetFeatureName() == "id")
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), t, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumf / cluster.Count(), f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumd / cluster.Count(), f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumb, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING || f.GetFeatureType() == FeatureType.TYPE_FEATURE_CATEGORICAL)
                    {
                        int    max          = sums_count.IndexOf(sums_count.Max());
                        string string_value = sumst[max];
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), string_value, f.GetWeight(), f.GetIsKey(), f.GetIsIndex(), f.GetFeatureUnit());
                    }
                    sumf = 0;
                    sumd = 0;
                    sumb = false;
                    sumst.Clear();
                    sums_count.Clear();
                } // end feature
                newSeeds.Add(newSeed);
                sumf = 0;
                sumd = 0;
                sumb = false;
                sumst.Clear();
                sums_count.Clear();
                t++;
            }// end clusters

            return(newSeeds);
        }
        //Bog standard k-means.
        private PointClusters GetKMeans(List <Case> allPoints, List <Case> seedPoints, int k)
        {
            begin : PointClusters cluster = new PointClusters();
            double[]    Distances  = new double[k];
            double      minD       = double.MaxValue;
            List <Case> sameDPoint = new List <Case>();
            bool        exit       = true;

            //Cycle thru all points in ensemble and assign to nearest centre
            foreach (Case p in allPoints)
            {
                foreach (Case sPoint in seedPoints)
                {
                    double dist = GetEuclideanD(p, sPoint);
                    if (dist < minD)
                    {
                        sameDPoint.Clear();
                        minD = dist;
                        sameDPoint.Add(sPoint);
                    }
                    else if (dist == minD)
                    {
                        if (!sameDPoint.Contains(sPoint))
                        {
                            sameDPoint.Add(sPoint);
                        }
                    }
                }

                //Extract nearest central point.
                Case keyPoint;
                if (sameDPoint.Count > 1)
                {
                    int index = GetRandNumCrypto(0, sameDPoint.Count);
                    keyPoint = sameDPoint[index];
                }
                else
                {
                    keyPoint = sameDPoint[0];
                }

                //Assign ensemble point to correct central point cluster
                if (!cluster.PC.ContainsKey(keyPoint))  //New
                {
                    List <Case> newCluster = new List <Case>();
                    newCluster.Add(p);
                    cluster.PC.Add(keyPoint, newCluster);
                }
                else
                {   //Existing cluster centre
                    cluster.PC[keyPoint].Add(p);
                }

                //Reset
                sameDPoint.Clear();
                minD = double.MaxValue;
            }

            //Bulletproof check - it it come out of the wash incorrect then re-seed.
            if (cluster.PC.Count != k)
            {
                cluster.PC.Clear();
                seedPoints = GetSeedPoints(allPoints, k);
                goto begin;
            }

            List <Case> newSeeds = GetCentroid(cluster);

            //Determine exit
            foreach (Case newSeed in newSeeds)
            {
                if (!cluster.PC.ContainsKey(newSeed))
                {
                    exit = false;
                }
            }

            if ((exit) || (iter == 1000))
            {
                return(cluster);
            }
            else
            {
                iter++;
                return(GetKMeans(allPoints, newSeeds, k));
            }
        }
        private PointClusters GetKMeans(List <Case> allPoints, List <Case> seedPoints, int k)
        {
            begin : PointClusters cluster = new PointClusters();
            double[]    Distances  = new double[k];
            double      minD       = double.MaxValue;
            List <Case> sameDPoint = new List <Case>();
            bool        exit       = true;

            //Cycle thru all points in ensemble and assign to nearest centre
            foreach (Case p in allPoints)
            {
                foreach (Case sPoint in seedPoints)
                {
                    double dist = GetEuclideanD(p, sPoint);
                    if (dist < minD)
                    {
                        sameDPoint.Clear();
                        minD = dist;
                        sameDPoint.Add(sPoint);
                    }
                    else if (dist == minD)
                    {
                        if (!sameDPoint.Contains(sPoint))
                        {
                            sameDPoint.Add(sPoint);
                        }
                    }
                }

                //Extract nearest central point.
                Case keyPoint;
                if (sameDPoint.Count > 1)
                {
                    int index = GetRandNumCrypto(0, sameDPoint.Count);
                    keyPoint = sameDPoint[index];
                }
                else
                {
                    keyPoint = sameDPoint[0];
                }

                //Assign ensemble point to correct central point cluster
                if (!cluster.PC.ContainsKey(keyPoint))  //New
                {
                    List <Case> newCluster = new List <Case>();
                    newCluster.Add(p);
                    cluster.PC.Add(keyPoint, newCluster);
                    cluster.dist_error.Add(keyPoint, minD);
                }
                else
                {   //Existing cluster centre
                    cluster.PC[keyPoint].Add(p);
                    double value = cluster.dist_error[keyPoint];
                    cluster.dist_error[keyPoint] += minD;
                }

                //Reset
                sameDPoint.Clear();
                minD = double.MaxValue;
            }

            //Bulletproof check - it it come out of the wash incorrect then re-seed.
            if (cluster.PC.Count != k)
            {
                cluster.PC.Clear();
                cluster.dist_error.Clear();
                seedPoints = GetSeedPoints(allPoints, k);
                goto begin;
            }

            List <Case> newSeeds = GetCentroid(cluster);
            List <Case> n        = new List <Case>();
            bool        found    = true;

            //Determine exit
            // check if centers don't change, equality between cases
            foreach (Case newSeed in newSeeds)                                 // last centers
            {
                foreach (KeyValuePair <Case, List <Case> > item in cluster.PC) //current centers
                {
                    found = true;
                    foreach (Feature f in item.Key.GetFeatures())
                    {
                        Feature seedf = newSeed.GetFeature(f.GetFeatureName());
                        if (!(f.GetFeatureValue().ToString() == seedf.GetFeatureValue().ToString()))
                        {
                            found = false; break;
                        }
                    }
                    if (found)
                    {
                        break;
                    }
                }
                if (!found)
                {
                    exit = false;
                }
                //  if (!cluster.PC.ContainsKey(newSeed)) wrong for equal objects
                //   exit = false;
            }

            if ((exit) || (iter == 1000))
            {
                return(cluster);
            }
            else
            {
                iter++;
                return(GetKMeans(allPoints, newSeeds, k));
            }
        }
        /// <summary>
        /// Get the centroid of a set of points
        /// cf. http://en.wikipedia.org/wiki/Centroid
        /// Consider also: Metoid cf. http://en.wikipedia.org/wiki/Medoids
        /// </summary>
        /// <param name="pcs"></param>
        /// <returns></returns>
        private List <Case> GetCentroid(PointClusters pcs)
        {
            List <Case> newSeeds = new List <Case>(pcs.PC.Count);
            Case        newSeed;
            int         sumf = 0; double sumd = 0; bool sumb = false; List <string> sumst = new List <string>();
            Feature     f = new Feature();

            foreach (List <Case> cluster in pcs.PC.Values)
            {
                newSeed = new Case(cluster[0].GetCaseID(), cluster[0].GetCaseName(), cluster[0].GetCaseDescription());
                for (int j = 0; j < cluster[0].GetFeatures().Count; j++)
                {
                    foreach (Case p in cluster)
                    {
                        f = (Feature)p.GetFeatures()[j];
                        if (f.GetFeatureName() == "id")
                        {
                            break;
                        }
                        if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT)
                        {
                            sumf += Convert.ToInt32(f.GetFeatureValue());
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT)
                        {
                            sumd += Convert.ToDouble(f.GetFeatureValue());
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL)
                        {
                            sumb = (sumb || Convert.ToBoolean(f.GetFeatureValue()));
                        }
                        else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING)
                        {
                            sumst.Add(f.GetFeatureValue().ToString());
                        }
                    }
                    if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_INT)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumf / cluster.Count(), 1.0, false, false, "");
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_FLOAT)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumd / cluster.Count(), 1.0, false, false, "");
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_BOOL)
                    {
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumb, 1.0, false, false, "");
                    }
                    else if (f.GetFeatureType() == FeatureType.TYPE_FEATURE_STRING)
                    {
                        Random rd  = new Random(); // for calculating random numbers
                        int    rnd = rd.Next(sumst.Count);
                        newSeed.AddFeature(f.GetFeatureName(), f.GetFeatureType(), sumst[rnd].ToString(), 1.0, false, false, "");
                    }
                    //  newSeed = new Case(sumX / cluster.Count, sumY / cluster.Count);
                    // newSeeds.Add(newSeed);
                }
                newSeeds.Add(newSeed);
                sumf = 0;
                sumd = 0;
                sumb = false;
                sumst.Clear();
            }

            return(newSeeds);
        }