/// <summary>
 /// 获取聚类结果的评价指标
 /// </summary>
 /// <param name="data">聚类结果</param>
 /// <param name="K">类的个数</param>
 /// <returns>结果的purity和gini</returns>
 public static ValidationPair GetValidation(Dataset data, int K)
 {
     dataset = data.Data.ToArray();
     nClusters = K;
     ValidationPair result = new ValidationPair();
     result.gini = GetGini();
     result.purity = GetPurity();
     return result;
 }
 public static void OutputResult(Dataset dataset)
 {
     StreamWriter sw = new StreamWriter("output.txt");
     sw.WriteLine("生成类,原始类");
     foreach(var data in dataset.Data)
     {
         sw.WriteLine(data.label.ToString() + "," + data.label_grountTruth.ToString());
     }
     sw.Close();
 }
 static void Input()
 {
     Console.WriteLine("输入读入文件的文件名:");
     fileName = Console.ReadLine();
     dataset = new Dataset();
     StreamReader sr = new StreamReader(fileName);
     string line;
     while ((line=sr.ReadLine())!=null)
     {
         var tmp = line.Split(',');
         DataType newdata = new DataType(tmp.Length - 1);
         newdata.features = tmp.Take(tmp.Length - 1).Select<string, double>(x => Convert.ToDouble(x)).ToArray();
         newdata.cntFeatures = tmp.Length - 1;
         newdata.label_grountTruth = Convert.ToInt32(tmp[tmp.Length - 1]);
         dataset.AddData(newdata);
     }
     sr.Close();
 }
 /// <summary>
 /// 构造函数
 /// </summary>
 /// <param name="data">数据集</param>
 /// <param name="conv">kmeans时的收敛值</param>
 /// <param name="addr">邻接表的地址</param>
 public SpectralClusteringGenerator(Dataset data, double conv, PrintLogFunction PrintLog, string addr)
 {
     dataset = data;
     arrData = data.Data.ToArray();
     convergence = conv;
     PrintLog("初始化MATLAB组件...");
     matlab = new MLApp.MLAppClass();
     matlab.Visible = 0;
     PrintLog("初始化结束...");
     if (addr == null /*|| File.Exists(addr)==false*/)
     {
         PrintLog("初始化Spectral邻居点图, 正在寻找每个点相邻最近的数个点...");
         FindNeighbours(9, PrintLog);
         PrintLog("初始化结束...");
     }
     else
     {
         addrNeighbours = addr;
     }
 }
        private DataType[] presentative; //聚类的代表

        #endregion Fields

        #region Constructors

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="data">数据集</param>
        /// <param name="conv">收敛条件</param>
        public KmeansGenerator(Dataset data, double conv)
        {
            dataset = data;
            dataset.NormalizeLabel();
            convergence = conv;
        }
        /// <summary>
        /// 执行谱聚类算法,生成K个类,每个点选最近的neighbourCnt个邻居
        /// </summary>
        /// <param name="K">类个数</param>
        /// <param name="neighbourCnt">邻居个数</param>
        /// <returns>聚类结果的purity和gini指标</returns>
        public ValidationPair Generate(int K, int neighbourCnt,PrintLogFunction PrintLog)
        {
            GetNeighbours(neighbourCnt);

            int n = arrData.Count();
            matlab.Execute("n = " + n.ToString() + ";");
            Array W = new double[n, n];
            //计算W矩阵
            for (int i=0;i< n;i++)
            {
                for(int j=0;j<neighbourCnt;j++)
                {
                    int a = i;
                    int b = nearestNeighbours[i][j];
                    W.SetValue(1, a, b);
                    W.SetValue(1, b, a);
                }
            }
            Array piW = new double[n, n];
            matlab.PutFullMatrix("W", "base", W, piW);

            //计算D矩阵
            matlab.Execute("s = sum(W);");//对W的每一行求和
            matlab.Execute("D = full(sparse(1:n, 1:n, s));");//将s赋值给D的对角线
            //计算E矩阵,E的K个最大特征值等于归一化L后L的K个最小特征值
            matlab.Execute("L = D - W;");
            matlab.Execute("E = D^(-1/2)*W*D^(-1/2);");
            matlab.Execute("k=" + K.ToString() + ";");
            PrintLog("正在计算特征值特征向量...");
            matlab.Execute("[Q, V] = eigs(E, k);");
            PrintLog("特征值特征向量计算完毕");
            //将特征向量填充给新的数据集
            Array pr = new double[n, K];
            Array pi = new double[n, K];
            matlab.GetFullMatrix("Q", "base", ref pr, ref pi);
            Dataset newData = new Dataset();
            for (int i = 0; i < n; i++)
            {
                DataType d = new DataType(K);
                for (int j = 0; j < K; j++) d.features[j] = (double)pr.GetValue(i, j);
                d.label_grountTruth = arrData[i].label_grountTruth;
                newData.AddData(d);
            }
            //对新的数据进行kmeans聚类
            KmeansGenerator kmeans = new KmeansGenerator(newData, convergence);
            return kmeans.Generate(K, 10, PrintLog);
        }