Ejemplo n.º 1
0
        public List <List <Vector> > Cluster(Vector[] vectors)
        {
            List <Vector> centers = new List <Vector>();

            List <List <Vector> > clusters = null;

            //任选K个初始聚类中心
            for (int i = 0; i < K; i++)
            {
                centers.Add(vectors[i]);
            }



            bool end = false;

            while (!end)
            {
                //分配所有样本
                clusters = DistanceOperations.Sort(centers, vectors);

                end = true;

                //计算新聚类中心
                for (int i = 0; i < centers.Count; i++)
                {
                    Vector newCenter = DistanceOperations.CalculateCenter(clusters[i]);

                    if (!newCenter.ValueEquals(centers[i]))
                    {
                        centers[i] = newCenter;
                        end        = false;
                    }
                }
            }

            return(clusters);
        }
Ejemplo n.º 2
0
        public List <List <Vector> > Cluster(Vector[] vectors)
        {
            int N = vectors.Length;

            List <Vector> centers = new List <Vector>(Nc);

            List <List <Vector> > clusters = null;

            //1 预选Nc个聚类中心
            for (int i = 0; i < Nc; i++)
            {
                centers.Add(vectors[i]);
            }

            int  times   = 1;
            bool end2_14 = false;

            while (!end2_14)
            {
                bool end2_10 = false;

                while (!end2_10)
                {
                    end2_10 = true;

                    //2 分配所有样本
                    clusters = DistanceOperations.Sort(centers, vectors);

                    //3 取消样本数太少的类
                    clusters.RemoveAll(new Predicate <List <Vector> >((vs) => vs.Count < Theta_N));
                    Nc = clusters.Count;

                    //4 修正各聚类中心值
                    centers.Clear();
                    for (int i = 0; i < Nc; i++)
                    {
                        centers.Add(DistanceOperations.CalculateCenter(clusters[i]));
                    }

                    //5 计算类内平均距离
                    decimal[] averageDistances = new decimal[Nc];
                    for (int i = 0; i < Nc; i++)
                    {
                        averageDistances[i] = DistanceOperations.CalculateAverageDistance(centers[i], clusters[i]);
                    }

                    //6 计算全部样本的总体平均距离
                    decimal averageDistanceOfAll = 0;
                    for (int i = 0; i < Nc; i++)
                    {
                        averageDistanceOfAll += averageDistances[i] * clusters[i].Count;
                    }
                    averageDistanceOfAll /= N;



                    const int NextIs8  = 8;
                    const int NextIs11 = 11;

                    int nextStep;//下一步

                    //7
                    //如果迭代已达I次,置Theta_C=0,跳到第11步
                    if (times >= I)
                    {
                        Theta_C  = 0;
                        nextStep = NextIs11;
                    }
                    //如果Nc<=K/2,进入第八步
                    if (Nc <= K / 2)
                    {
                        nextStep = NextIs8;
                    }
                    //如果迭代次数是偶数,或Nc>=2K,跳到11;否则进入第八步。
                    if (times % 2 == 0 || Nc >= 2 * K)
                    {
                        nextStep = NextIs11;
                    }
                    else
                    {
                        nextStep = NextIs8;
                    }

                    if (nextStep == NextIs11)
                    {
                        break;// End While 2-10
                    }
                    //8 计算每个聚类中样本的标准差向量
                    Vector[] ss = new Vector[Nc];
                    for (int i = 0; i < Nc; i++)
                    {
                        ss[i] = DistanceOperations.CalculateS(centers[i], clusters[i]);
                    }

                    //9 求每个标准差向量的最大分量对应的的维数
                    int[] sMaxDimentions = new int[Nc];
                    for (int i = 0; i < Nc; i++)
                    {
                        sMaxDimentions[i] = DistanceOperations.GetMaxValueIndex(ss[i].Numbers);
                    }

                    //10 判断是否分裂
                    for (int i = 0; i < Nc; i++)
                    {
                        if (ss[i][sMaxDimentions[i]] > Theta_S)
                        {
                            if ((averageDistances[i] > averageDistanceOfAll &&
                                 clusters[i].Count > 2 * (Theta_N + 1))
                                ||
                                (Nc <= K / 2))
                            {
                                //分裂
                                Vector oldCenter = centers[i];

                                decimal delta      = ss[i][sMaxDimentions[i]] * SplitK;
                                Vector  newCenter1 = oldCenter.CloneVector();
                                newCenter1.Numbers[sMaxDimentions[i]] += delta;
                                Vector newCenter2 = oldCenter.CloneVector();
                                newCenter2.Numbers[sMaxDimentions[i]] -= delta;

                                centers[i] = newCenter1;
                                centers.Add(newCenter2);

                                Nc = centers.Count;

                                times++;
                                end2_10 = false;
                            }
                        }
                    }
                }//End While 2-10

                //11 计算所有聚类中心间的距离,
                //  (i , j) 对应 i*Nc+j ; n 对应 ( n/Nc , n%Nc )
                List <KeyValuePair <int, decimal> > ds = DistanceOperations.CalculateAllDistances(centers);

                //12 将小于Theta_C的距离升序排序
                ds.RemoveAll((kv) => { return(kv.Value >= Theta_C); });
                ds.Sort((kv1, kv2) => { return(decimal.Compare(kv1.Value, kv2.Value)); });

                //13 合并
                List <Vector>         newCenters  = new List <Vector>();
                List <List <Vector> > newClusters = new List <List <Vector> >();
                foreach (var kv in ds)
                {
                    int i = kv.Key / Nc;
                    int j = kv.Key % Nc;

                    int    Ni        = clusters[i].Count;
                    int    Nj        = clusters[j].Count;
                    Vector newCenter = (centers[i] * Ni + centers[j] * Nj) / (Ni + Nj);

                    List <Vector> newCluster = new List <Vector>(Ni + Nj);
                    newCluster.AddRange(clusters[i]);
                    newCluster.AddRange(clusters[j]);

                    newCenters.Add(newCenter);
                    newClusters.Add(newCluster);

                    centers[i] = null;
                    centers[j] = null;

                    clusters[i] = null;
                    clusters[j] = null;
                }
                centers.RemoveAll((v) => v == null);
                clusters.RemoveAll((vl) => vl == null);
                Nc = clusters.Count;


                //14
                if (times >= I)
                {
                    end2_14 = true;
                }
                else
                {
                    times++;
                    end2_14 = false;
                }
            }// End While 2-14

            return(clusters);
        }