static void TestKmeans(int cnt, double conv) { StreamWriter sw = new StreamWriter(@".\"+fileName+@"\result_kmeans.txt"); ValidationPair result; PrintLog("开始执行kmeans..."); KmeansGenerator kmeans = new KmeansGenerator(dataset, conv); result = kmeans.Generate(cnt, 10, PrintLog); sw.WriteLine("purity=" + result.purity.ToString()); sw.WriteLine("gini=" + result.gini.ToString()); PrintLog("kmeans结束..."); sw.Close(); }
/// <summary> /// 执行谱聚类算法,生成K个类,每个点选最近的neighbourCnt个邻居 /// </summary> /// <param name="K">类个数</param> /// <param name="neighbourCnt">邻居个数</param> /// <returns>聚类结果的purity和gini指标</returns> public ValidationPair Generate(int K, int neighbourCnt,PrintLogFunction PrintLog) { GetNeighbours(neighbourCnt); int n = arrData.Count(); matlab.Execute("n = " + n.ToString() + ";"); Array W = new double[n, n]; //计算W矩阵 for (int i=0;i< n;i++) { for(int j=0;j<neighbourCnt;j++) { int a = i; int b = nearestNeighbours[i][j]; W.SetValue(1, a, b); W.SetValue(1, b, a); } } Array piW = new double[n, n]; matlab.PutFullMatrix("W", "base", W, piW); //计算D矩阵 matlab.Execute("s = sum(W);");//对W的每一行求和 matlab.Execute("D = full(sparse(1:n, 1:n, s));");//将s赋值给D的对角线 //计算E矩阵,E的K个最大特征值等于归一化L后L的K个最小特征值 matlab.Execute("L = D - W;"); matlab.Execute("E = D^(-1/2)*W*D^(-1/2);"); matlab.Execute("k=" + K.ToString() + ";"); PrintLog("正在计算特征值特征向量..."); matlab.Execute("[Q, V] = eigs(E, k);"); PrintLog("特征值特征向量计算完毕"); //将特征向量填充给新的数据集 Array pr = new double[n, K]; Array pi = new double[n, K]; matlab.GetFullMatrix("Q", "base", ref pr, ref pi); Dataset newData = new Dataset(); for (int i = 0; i < n; i++) { DataType d = new DataType(K); for (int j = 0; j < K; j++) d.features[j] = (double)pr.GetValue(i, j); d.label_grountTruth = arrData[i].label_grountTruth; newData.AddData(d); } //对新的数据进行kmeans聚类 KmeansGenerator kmeans = new KmeansGenerator(newData, convergence); return kmeans.Generate(K, 10, PrintLog); }