public static KMeansCluster Cluster(Vector2[] data, int k) { int n = data.Length; Vector2[] centroids = initCentroid(data, k); int[] clusters = new int[n]; double RSS = double.MaxValue; int it = 0; while (it < MaxIterate) { bool changed = false; string cs = ""; foreach (var v in centroids) { cs += v + ", "; } //Console.WriteLine("质心:" + cs); for (int i = 0; i < n; ++i) { // 对剩余的每个向量测量其到每个质心的距离 double[] distance = new double[k]; for (int j = 0; j < k; ++j) { distance[j] = Vector2.DistanceSqr(data[i], centroids[j]) + 0.0001; } double sumDis = 0; for (int j = 0; j < k; ++j) { sumDis += distance[j]; } for (int j = 0; j < k; ++j) { distance[j] = distance[j] / sumDis; } double[] disSum = new double[k]; disSum[0] = distance[0]; for (int j = 1; j < k; ++j) { disSum[j] = disSum[j - 1] + distance[j]; } double r = rand.NextDouble(); int pos = 0; while (pos < k) { if (r < disSum[pos]) { break; } pos++; } clusters[i] = pos; } // if (!changed) break; // 未发现改变,跳出 // 重新计算每个类的质心 calcCentroids(data, centroids, clusters); // 重新计算总方差 double newRSS = calcRSS(data, centroids, clusters); double diff = Math.Abs(newRSS - RSS); //Console.WriteLine("迭代次数{0},方差:{1}/{2}/{3}。", it, RSS, newRSS, diff); RSS = newRSS; ++it; if (diff < Epsilon) { break; // RSS收敛,跳出 } } //List<Vector2>[] result = new List<Vector2>[k]; KMeansCluster result = new KMeansCluster(n, k); Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引 for (int i = 0; i < k; ++i) { result[i].Centroid = centroids[i]; // 拷贝每类的质心 } for (int i = 0; i < n; ++i) { int c = clusters[i]; result[c].Add(data[i]); } return(result); }
public KMeansCluster <T> Cluster() { initCenter(); // 随机初始化中心 double RSS = double.MaxValue; int it = 0; while (it < MaxIterate) { bool changed = false; for (int i = 0; i < n; ++i) { // 对剩余的每个向量测量其到每个质心的距离 double[] distance = new double[k]; for (int j = 0; j < k; ++j) { distance[j] = distanceOf(data[i], centers[j]) + Epsilon; } int minCluster = 0; for (int j = 1; j < k; ++j) { if (distance[j] < distance[minCluster]) { minCluster = j; } } clusters[i] = minCluster; #region 随机化 //double sumDis = 0; //for (int j = 0; j < k; ++j) { // sumDis += distance[j]; //} //for (int j = 0; j < k; ++j) { // distance[j] = distance[j] / sumDis; //} //double[] disSum = new double[k]; //disSum[0] = distance[0]; //for (int j = 1; j < k; ++j) { // disSum[j] = disSum[j - 1] + distance[j]; //} //double r = rand.NextDouble(); //int pos = 0; //while (pos < k) { // if (r < disSum[pos]) { // break; // } // pos++; //} //clusters[i] = pos; #endregion } // if (!changed) break; // 未发现改变,跳出 // 重新计算每个类的质心 computeCenters(); // 重新计算总方差 double newRSS = computeRSS(); double diff = Math.Abs(newRSS - RSS); //Console.WriteLine("迭代次数{0},方差:{1}/{2}/{3}。", it, RSS, newRSS, diff); RSS = newRSS; Console.WriteLine("第{0}次迭代,方差{1:F4}.", it, RSS); ++it; if (diff < Epsilon) { break; // RSS收敛,跳出 } } KMeansCluster <T> result = new KMeansCluster <T>(n, k); Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引 for (int i = 0; i < k; ++i) { result[i].Center = centers[i]; // 拷贝每类的中心 } for (int i = 0; i < n; ++i) { int c = clusters[i]; result[c].Add(data[i]); } return(result); }
/* 我自己的,先不写 * private static double createRandom(Vector2[] data, Vector2[] centroids, int[] clusters) { * int n = data.Length, k = centroids.Length; * Dictionary<int, bool> used = new Dictionary<int, bool>(); * while (used.Count < k) { * int r = rand.Next(data.Length); * if (!used.ContainsKey(r)) { * centroids[used.Count] = data[r]; * used[r] = true; * } * } * * for (int i = 0; i < n; ++i) { * // 对每个向量测量其到每个质心的距离 * double minDis = double.MaxValue; * for (int j = 0; j < k; ++j) { * double dis = Vector2.DistanceSqr(data[i], centroids[j]); * if (dis < minDis) { * minDis = dis; * clusters[i] = j; * } * } * } * * // 计算每个类的质心 * calcCentroids(data, centroids, clusters); * * // 计算总方差 * double RSS = calcRSS(data, centroids, clusters); * return RSS; * } * * public static KMeansCluster AnnealCluster(Vector2[] data, int k) { * throw new Exception(); * } * * */ /// <summary> /// 基于模拟退火的动态聚类算法,杨忠明,黄道,王行愚。 /// </summary> public static KMeansCluster AnnealCluster(Vector2[] data, int c, out int it) { it = 0; int m = data.Length; double dmax = double.MinValue, dmin = double.MaxValue; for (int i = 0; i < m; ++i) { for (int j = 0; j < m; ++j) { if (i == j) { continue; } double dis = (data[i] - data[j]).Mod(); if (dis > dmax) { dmax = dis; } if (dis < dmin) { dmin = dis; } } } // 确定最大最小距离(最小不是通常都是0吗……) double t = 10 * dmax, tmin = 1e-1 * dmin + 1e-6; Vector2[] center = new Vector2[c]; int[] cls = new int[m]; for (int i = 0; i < c; ++i) { center[i] = data[i]; cls[i] = i; } // 以前c个样本作为初始聚类中心 for (int i = c; i < m; ++i) { double[] ds = new double[c]; for (int j = 0; j < c; ++j) { ds[j] = (data[i] - center[j]).Mod(); } int pos = getPos(ds, data[i], center, t); cls[i] = pos; } // 对其他m-c个样本计算类别 Vector2[] cSum = new Vector2[c]; int[] cCounts = new int[c]; for (int i = 0; i < m; ++i) { int cs = cls[i]; cSum[cs] += data[i]; ++cCounts[cs]; } for (int i = 0; i < c; ++i) { center[i] = cSum[i] / cCounts[i]; } double RSS = 0; for (int i = 0; i < m; ++i) { int cs = cls[i]; RSS += Vector2.DistanceSqr(data[i], center[cs]); } // 计算新的聚类中心和累计类内距离平方和 while (t > tmin && it < MaxIterate) { it++; double nr = adjust(data, center, cls, cCounts, t, RSS); double diff = Math.Abs(nr - RSS); RSS = nr; if (diff < Epsilon) { break; } t *= alpha; // 降温 } KMeansCluster result = new KMeansCluster(m, c); Array.Copy(cls, result.ClusterIndex, m); // 拷贝聚类索引 for (int i = 0; i < c; ++i) { result[i].Centroid = center[i]; // 拷贝每类的质心 } for (int i = 0; i < m; ++i) { int cc = cls[i]; result[cc].Add(data[i]); } return(result); }
public KMeansCluster <T> AnnealCluster(Func <T, T, T> add, Func <T, T, T> sub, Func <T, double, T> div, out int it) { this.add = add; this.sub = sub; this.div = div; it = 0; double dmax = double.MinValue, dmin = double.MaxValue; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { if (i == j) { continue; } double dis = distanceOf(data[i], data[j]); if (dis > dmax) { dmax = dis; } if (dis < dmin) { dmin = dis; } } } // 确定最大最小距离(最小不是通常都是0吗……) double t = 10 * dmax, tmin = 1e-1 * dmin + Epsilon; for (int i = 0; i < k; ++i) { centers[i] = data[i]; clusters[i] = i; } // 以前c个样本作为初始聚类中心 for (int i = k; i < n; ++i) { double[] ds = new double[k]; for (int j = 0; j < k; ++j) { ds[j] = distanceOf(data[i], centers[j]); } int pos = getPos(ds, t); clusters[i] = pos; } // 对其他m-c个样本计算类别 computeCenters(); // 重新计算中心 double RSS = computeRSS(); // 计算方差 while (t > tmin && it < MaxIterate) { it++; double nr = adjust(t, RSS); double diff = Math.Abs(nr - RSS); RSS = nr; if (diff < Epsilon) { break; } t *= alpha; // 降温 } KMeansCluster <T> result = new KMeansCluster <T>(n, k); Array.Copy(clusters, result.ClusterIndex, n); // 拷贝聚类索引 for (int i = 0; i < k; ++i) { result[i].Center = centers[i]; // 拷贝每类的中心 } for (int i = 0; i < n; ++i) { int c = clusters[i]; result[c].Add(data[i]); } return(result); }