/// <summary> /// 获取聚类结果的评价指标 /// </summary> /// <param name="data">聚类结果</param> /// <param name="K">类的个数</param> /// <returns>结果的purity和gini</returns> public static ValidationPair GetValidation(Dataset data, int K) { dataset = data.Data.ToArray(); nClusters = K; ValidationPair result = new ValidationPair(); result.gini = GetGini(); result.purity = GetPurity(); return result; }
public static void OutputResult(Dataset dataset) { StreamWriter sw = new StreamWriter("output.txt"); sw.WriteLine("生成类,原始类"); foreach(var data in dataset.Data) { sw.WriteLine(data.label.ToString() + "," + data.label_grountTruth.ToString()); } sw.Close(); }
static void Input() { Console.WriteLine("输入读入文件的文件名:"); fileName = Console.ReadLine(); dataset = new Dataset(); StreamReader sr = new StreamReader(fileName); string line; while ((line=sr.ReadLine())!=null) { var tmp = line.Split(','); DataType newdata = new DataType(tmp.Length - 1); newdata.features = tmp.Take(tmp.Length - 1).Select<string, double>(x => Convert.ToDouble(x)).ToArray(); newdata.cntFeatures = tmp.Length - 1; newdata.label_grountTruth = Convert.ToInt32(tmp[tmp.Length - 1]); dataset.AddData(newdata); } sr.Close(); }
/// <summary> /// 构造函数 /// </summary> /// <param name="data">数据集</param> /// <param name="conv">kmeans时的收敛值</param> /// <param name="addr">邻接表的地址</param> public SpectralClusteringGenerator(Dataset data, double conv, PrintLogFunction PrintLog, string addr) { dataset = data; arrData = data.Data.ToArray(); convergence = conv; PrintLog("初始化MATLAB组件..."); matlab = new MLApp.MLAppClass(); matlab.Visible = 0; PrintLog("初始化结束..."); if (addr == null /*|| File.Exists(addr)==false*/) { PrintLog("初始化Spectral邻居点图, 正在寻找每个点相邻最近的数个点..."); FindNeighbours(9, PrintLog); PrintLog("初始化结束..."); } else { addrNeighbours = addr; } }
private DataType[] presentative; //聚类的代表 #endregion Fields #region Constructors /// <summary> /// 构造函数 /// </summary> /// <param name="data">数据集</param> /// <param name="conv">收敛条件</param> public KmeansGenerator(Dataset data, double conv) { dataset = data; dataset.NormalizeLabel(); convergence = conv; }
/// <summary> /// 执行谱聚类算法,生成K个类,每个点选最近的neighbourCnt个邻居 /// </summary> /// <param name="K">类个数</param> /// <param name="neighbourCnt">邻居个数</param> /// <returns>聚类结果的purity和gini指标</returns> public ValidationPair Generate(int K, int neighbourCnt,PrintLogFunction PrintLog) { GetNeighbours(neighbourCnt); int n = arrData.Count(); matlab.Execute("n = " + n.ToString() + ";"); Array W = new double[n, n]; //计算W矩阵 for (int i=0;i< n;i++) { for(int j=0;j<neighbourCnt;j++) { int a = i; int b = nearestNeighbours[i][j]; W.SetValue(1, a, b); W.SetValue(1, b, a); } } Array piW = new double[n, n]; matlab.PutFullMatrix("W", "base", W, piW); //计算D矩阵 matlab.Execute("s = sum(W);");//对W的每一行求和 matlab.Execute("D = full(sparse(1:n, 1:n, s));");//将s赋值给D的对角线 //计算E矩阵,E的K个最大特征值等于归一化L后L的K个最小特征值 matlab.Execute("L = D - W;"); matlab.Execute("E = D^(-1/2)*W*D^(-1/2);"); matlab.Execute("k=" + K.ToString() + ";"); PrintLog("正在计算特征值特征向量..."); matlab.Execute("[Q, V] = eigs(E, k);"); PrintLog("特征值特征向量计算完毕"); //将特征向量填充给新的数据集 Array pr = new double[n, K]; Array pi = new double[n, K]; matlab.GetFullMatrix("Q", "base", ref pr, ref pi); Dataset newData = new Dataset(); for (int i = 0; i < n; i++) { DataType d = new DataType(K); for (int j = 0; j < K; j++) d.features[j] = (double)pr.GetValue(i, j); d.label_grountTruth = arrData[i].label_grountTruth; newData.AddData(d); } //对新的数据进行kmeans聚类 KmeansGenerator kmeans = new KmeansGenerator(newData, convergence); return kmeans.Generate(K, 10, PrintLog); }