예제 #1
0
 static void TestKmeans(int cnt, double conv)
 {
     StreamWriter sw = new StreamWriter(@".\"+fileName+@"\result_kmeans.txt");
     ValidationPair result;
     PrintLog("开始执行kmeans...");
     KmeansGenerator kmeans = new KmeansGenerator(dataset, conv);
     result = kmeans.Generate(cnt, 10, PrintLog);
     sw.WriteLine("purity=" + result.purity.ToString());
     sw.WriteLine("gini=" + result.gini.ToString());
     PrintLog("kmeans结束...");
     sw.Close();
 }
        /// <summary>
        /// 执行谱聚类算法,生成K个类,每个点选最近的neighbourCnt个邻居
        /// </summary>
        /// <param name="K">类个数</param>
        /// <param name="neighbourCnt">邻居个数</param>
        /// <returns>聚类结果的purity和gini指标</returns>
        public ValidationPair Generate(int K, int neighbourCnt,PrintLogFunction PrintLog)
        {
            GetNeighbours(neighbourCnt);

            int n = arrData.Count();
            matlab.Execute("n = " + n.ToString() + ";");
            Array W = new double[n, n];
            //计算W矩阵
            for (int i=0;i< n;i++)
            {
                for(int j=0;j<neighbourCnt;j++)
                {
                    int a = i;
                    int b = nearestNeighbours[i][j];
                    W.SetValue(1, a, b);
                    W.SetValue(1, b, a);
                }
            }
            Array piW = new double[n, n];
            matlab.PutFullMatrix("W", "base", W, piW);

            //计算D矩阵
            matlab.Execute("s = sum(W);");//对W的每一行求和
            matlab.Execute("D = full(sparse(1:n, 1:n, s));");//将s赋值给D的对角线
            //计算E矩阵,E的K个最大特征值等于归一化L后L的K个最小特征值
            matlab.Execute("L = D - W;");
            matlab.Execute("E = D^(-1/2)*W*D^(-1/2);");
            matlab.Execute("k=" + K.ToString() + ";");
            PrintLog("正在计算特征值特征向量...");
            matlab.Execute("[Q, V] = eigs(E, k);");
            PrintLog("特征值特征向量计算完毕");
            //将特征向量填充给新的数据集
            Array pr = new double[n, K];
            Array pi = new double[n, K];
            matlab.GetFullMatrix("Q", "base", ref pr, ref pi);
            Dataset newData = new Dataset();
            for (int i = 0; i < n; i++)
            {
                DataType d = new DataType(K);
                for (int j = 0; j < K; j++) d.features[j] = (double)pr.GetValue(i, j);
                d.label_grountTruth = arrData[i].label_grountTruth;
                newData.AddData(d);
            }
            //对新的数据进行kmeans聚类
            KmeansGenerator kmeans = new KmeansGenerator(newData, convergence);
            return kmeans.Generate(K, 10, PrintLog);
        }