Ejemplo n.º 1
0
        public static KMeansCluster Cluster(Vector2[] data, int k)
        {
            int n = data.Length;

            Vector2[] centroids = initCentroid(data, k);
            int[]     clusters  = new int[n];
            double    RSS       = double.MaxValue;
            int       it        = 0;

            while (it < MaxIterate)
            {
                bool   changed = false;
                string cs      = "";
                foreach (var v in centroids)
                {
                    cs += v + ", ";
                }
                //Console.WriteLine("质心:" + cs);
                for (int i = 0; i < n; ++i)
                {
                    // 对剩余的每个向量测量其到每个质心的距离
                    double[] distance = new double[k];
                    for (int j = 0; j < k; ++j)
                    {
                        distance[j] = Vector2.DistanceSqr(data[i], centroids[j]) + 0.0001;
                    }
                    double sumDis = 0;
                    for (int j = 0; j < k; ++j)
                    {
                        sumDis += distance[j];
                    }
                    for (int j = 0; j < k; ++j)
                    {
                        distance[j] = distance[j] / sumDis;
                    }
                    double[] disSum = new double[k];
                    disSum[0] = distance[0];
                    for (int j = 1; j < k; ++j)
                    {
                        disSum[j] = disSum[j - 1] + distance[j];
                    }
                    double r   = rand.NextDouble();
                    int    pos = 0;
                    while (pos < k)
                    {
                        if (r < disSum[pos])
                        {
                            break;
                        }
                        pos++;
                    }
                    clusters[i] = pos;
                }

                // if (!changed) break; // 未发现改变,跳出

                // 重新计算每个类的质心
                calcCentroids(data, centroids, clusters);

                // 重新计算总方差
                double newRSS = calcRSS(data, centroids, clusters);

                double diff = Math.Abs(newRSS - RSS);
                //Console.WriteLine("迭代次数{0},方差:{1}/{2}/{3}。", it, RSS, newRSS, diff);
                RSS = newRSS;
                ++it;

                if (diff < Epsilon)
                {
                    break;                 // RSS收敛,跳出
                }
            }

            //List<Vector2>[] result = new List<Vector2>[k];
            KMeansCluster result = new KMeansCluster(n, k);

            Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引
            for (int i = 0; i < k; ++i)
            {
                result[i].Centroid = centroids[i]; // 拷贝每类的质心
            }

            for (int i = 0; i < n; ++i)
            {
                int c = clusters[i];
                result[c].Add(data[i]);
            }

            return(result);
        }
Ejemplo n.º 2
0
        public KMeansCluster <T> Cluster()
        {
            initCenter(); // 随机初始化中心
            double RSS = double.MaxValue;
            int    it  = 0;

            while (it < MaxIterate)
            {
                bool changed = false;
                for (int i = 0; i < n; ++i)
                {
                    // 对剩余的每个向量测量其到每个质心的距离
                    double[] distance = new double[k];
                    for (int j = 0; j < k; ++j)
                    {
                        distance[j] = distanceOf(data[i], centers[j]) + Epsilon;
                    }

                    int minCluster = 0;
                    for (int j = 1; j < k; ++j)
                    {
                        if (distance[j] < distance[minCluster])
                        {
                            minCluster = j;
                        }
                    }
                    clusters[i] = minCluster;
                    #region 随机化
                    //double sumDis = 0;
                    //for (int j = 0; j < k; ++j) {
                    //    sumDis += distance[j];
                    //}
                    //for (int j = 0; j < k; ++j) {
                    //    distance[j] = distance[j] / sumDis;
                    //}
                    //double[] disSum = new double[k];
                    //disSum[0] = distance[0];
                    //for (int j = 1; j < k; ++j) {
                    //    disSum[j] = disSum[j - 1] + distance[j];
                    //}
                    //double r = rand.NextDouble();
                    //int pos = 0;
                    //while (pos < k) {
                    //    if (r < disSum[pos]) {
                    //        break;
                    //    }
                    //    pos++;
                    //}
                    //clusters[i] = pos;
                    #endregion
                }

                // if (!changed) break; // 未发现改变,跳出

                // 重新计算每个类的质心
                computeCenters();

                // 重新计算总方差
                double newRSS = computeRSS();

                double diff = Math.Abs(newRSS - RSS);
                //Console.WriteLine("迭代次数{0},方差:{1}/{2}/{3}。", it, RSS, newRSS, diff);
                RSS = newRSS;
                Console.WriteLine("第{0}次迭代,方差{1:F4}.", it, RSS);
                ++it;

                if (diff < Epsilon)
                {
                    break;                 // RSS收敛,跳出
                }
            }

            KMeansCluster <T> result = new KMeansCluster <T>(n, k);
            Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引
            for (int i = 0; i < k; ++i)
            {
                result[i].Center = centers[i]; // 拷贝每类的中心
            }
            for (int i = 0; i < n; ++i)
            {
                int c = clusters[i];
                result[c].Add(data[i]);
            }

            return(result);
        }
Ejemplo n.º 3
0
        /*  我自己的,先不写
         * private static double createRandom(Vector2[] data, Vector2[] centroids, int[] clusters) {
         *  int n = data.Length, k = centroids.Length;
         *  Dictionary<int, bool> used = new Dictionary<int, bool>();
         *  while (used.Count < k) {
         *      int r = rand.Next(data.Length);
         *      if (!used.ContainsKey(r)) {
         *          centroids[used.Count] = data[r];
         *          used[r] = true;
         *      }
         *  }
         *
         *  for (int i = 0; i < n; ++i) {
         *      // 对每个向量测量其到每个质心的距离
         *      double minDis = double.MaxValue;
         *      for (int j = 0; j < k; ++j) {
         *          double dis = Vector2.DistanceSqr(data[i], centroids[j]);
         *          if (dis < minDis) {
         *              minDis = dis;
         *              clusters[i] = j;
         *          }
         *      }
         *  }
         *
         *  // 计算每个类的质心
         *  calcCentroids(data, centroids, clusters);
         *
         *  // 计算总方差
         *  double RSS = calcRSS(data, centroids, clusters);
         *  return RSS;
         * }
         *
         * public static KMeansCluster AnnealCluster(Vector2[] data, int k) {
         *  throw new Exception();
         * }
         *
         * */

        /// <summary>
        /// 基于模拟退火的动态聚类算法,杨忠明,黄道,王行愚。
        /// </summary>
        public static KMeansCluster AnnealCluster(Vector2[] data, int c, out int it)
        {
            it = 0;
            int    m = data.Length;
            double dmax = double.MinValue, dmin = double.MaxValue;

            for (int i = 0; i < m; ++i)
            {
                for (int j = 0; j < m; ++j)
                {
                    if (i == j)
                    {
                        continue;
                    }
                    double dis = (data[i] - data[j]).Mod();
                    if (dis > dmax)
                    {
                        dmax = dis;
                    }
                    if (dis < dmin)
                    {
                        dmin = dis;
                    }
                }
            } // 确定最大最小距离(最小不是通常都是0吗……)
            double t = 10 * dmax, tmin = 1e-1 * dmin + 1e-6;

            Vector2[] center = new Vector2[c];
            int[]     cls    = new int[m];
            for (int i = 0; i < c; ++i)
            {
                center[i] = data[i];
                cls[i]    = i;
            } // 以前c个样本作为初始聚类中心

            for (int i = c; i < m; ++i)
            {
                double[] ds = new double[c];
                for (int j = 0; j < c; ++j)
                {
                    ds[j] = (data[i] - center[j]).Mod();
                }
                int pos = getPos(ds, data[i], center, t);
                cls[i] = pos;
            } // 对其他m-c个样本计算类别

            Vector2[] cSum    = new Vector2[c];
            int[]     cCounts = new int[c];
            for (int i = 0; i < m; ++i)
            {
                int cs = cls[i];
                cSum[cs] += data[i];
                ++cCounts[cs];
            }
            for (int i = 0; i < c; ++i)
            {
                center[i] = cSum[i] / cCounts[i];
            }
            double RSS = 0;

            for (int i = 0; i < m; ++i)
            {
                int cs = cls[i];
                RSS += Vector2.DistanceSqr(data[i], center[cs]);
            } // 计算新的聚类中心和累计类内距离平方和

            while (t > tmin && it < MaxIterate)
            {
                it++;

                double nr   = adjust(data, center, cls, cCounts, t, RSS);
                double diff = Math.Abs(nr - RSS);
                RSS = nr;
                if (diff < Epsilon)
                {
                    break;
                }

                t *= alpha; // 降温
            }

            KMeansCluster result = new KMeansCluster(m, c);

            Array.Copy(cls, result.ClusterIndex, m); // 拷贝聚类索引
            for (int i = 0; i < c; ++i)
            {
                result[i].Centroid = center[i]; // 拷贝每类的质心
            }

            for (int i = 0; i < m; ++i)
            {
                int cc = cls[i];
                result[cc].Add(data[i]);
            }

            return(result);
        }
Ejemplo n.º 4
0
        public KMeansCluster <T> AnnealCluster(Func <T, T, T> add, Func <T, T, T> sub, Func <T, double, T> div, out int it)
        {
            this.add = add;
            this.sub = sub;
            this.div = div;
            it       = 0;
            double dmax = double.MinValue, dmin = double.MaxValue;

            for (int i = 0; i < n; ++i)
            {
                for (int j = 0; j < n; ++j)
                {
                    if (i == j)
                    {
                        continue;
                    }
                    double dis = distanceOf(data[i], data[j]);
                    if (dis > dmax)
                    {
                        dmax = dis;
                    }
                    if (dis < dmin)
                    {
                        dmin = dis;
                    }
                }
            } // 确定最大最小距离(最小不是通常都是0吗……)
            double t = 10 * dmax, tmin = 1e-1 * dmin + Epsilon;

            for (int i = 0; i < k; ++i)
            {
                centers[i]  = data[i];
                clusters[i] = i;
            } // 以前c个样本作为初始聚类中心

            for (int i = k; i < n; ++i)
            {
                double[] ds = new double[k];
                for (int j = 0; j < k; ++j)
                {
                    ds[j] = distanceOf(data[i], centers[j]);
                }
                int pos = getPos(ds, t);
                clusters[i] = pos;
            } // 对其他m-c个样本计算类别

            computeCenters(); // 重新计算中心

            double RSS = computeRSS(); // 计算方差

            while (t > tmin && it < MaxIterate)
            {
                it++;

                double nr   = adjust(t, RSS);
                double diff = Math.Abs(nr - RSS);
                RSS = nr;
                if (diff < Epsilon)
                {
                    break;
                }

                t *= alpha; // 降温
            }

            KMeansCluster <T> result = new KMeansCluster <T>(n, k);

            Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引
            for (int i = 0; i < k; ++i)
            {
                result[i].Center = centers[i]; // 拷贝每类的中心
            }
            for (int i = 0; i < n; ++i)
            {
                int c = clusters[i];
                result[c].Add(data[i]);
            }

            return(result);
        }