public static double[] Stdev(this IDataTable <DataRow> data, string[] selectedcolumns, double[] avg = null) { if (avg == null) { avg = data.Avg(selectedcolumns); } int rowcount = data.RowCount; int columncount = selectedcolumns.Length; var result = new double[columncount]; for (int i = 0; i < columncount; i++) { for (int j = 0; j < rowcount; j++) { result[i] += Math.Pow(data[j, selectedcolumns[i]].ConvertToDouble() - avg[i], 2); } result[i] = Math.Pow(result[i] / rowcount, 0.5); } return(result); }
public static DataSet KMeans(object threadpool, IDataTable <DataRow> data, string[] properties, int maxCount, int minClusterCount, int maxClusterCount, double m, double s, Protocol.Structure.WaitObject wt, int initialmode, int methodmode, int maxthread) { minClusterCount = minClusterCount < 2 ? 2 : minClusterCount; maxClusterCount = maxClusterCount < 2 ? (int)Math.Pow(data.RowCount, 0.5) : maxClusterCount; maxCount = maxCount < 2 ? 20 : maxCount; maxthread = maxthread < 1 ? 1 : maxthread; int paracount = properties.Length; int i, j, k; double[] mean; double[] std; if (!double.IsNaN(m)) { mean = new double[paracount]; for (i = 0; i < paracount; i++) { mean[i] = m; } } else { mean = data.Avg(properties); } if (!double.IsNaN(s)) { std = new double[paracount]; for (i = 0; i < paracount; i++) { std[i] = s; } } else { std = data.Stdev(properties, mean); } var temp = new MPPO.DataMining.KMeans(threadpool, data, properties, maxCount, minClusterCount, maxClusterCount, mean, std, wt, initialmode, methodmode, maxthread); var clusterreport = temp.ParallelStart(); var report = new DataSet(); DataTable overview = new DataTable("OverView"); overview.Columns.Add(new DataColumn("序号", typeof(int))); overview.Columns.Add(new DataColumn("分类数", typeof(int))); overview.Columns.Add(new DataColumn("AvgBWP", typeof(double))); overview.Columns.Add(new DataColumn("耗时", typeof(double))); overview.Columns.Add(new DataColumn("循环次数", typeof(int))); DataTable centers = new DataTable("Centers"); int len = properties.Length; centers.Columns.Add(new DataColumn("序号", typeof(int))); centers.Columns.Add(new DataColumn("类标号", typeof(int))); for (i = 0; i < len; i++) { centers.Columns.Add(new DataColumn(properties[i], typeof(double))); } DataTable results = new DataTable("Results"); results.Columns.Add(new DataColumn("序号", typeof(int))); results.Columns.Add(new DataColumn("行号", typeof(int))); results.Columns.Add(new DataColumn("类标号", typeof(int))); results.Columns.Add(new DataColumn("BWP", typeof(double))); results.Columns.Add(new DataColumn("B", typeof(double))); results.Columns.Add(new DataColumn("W", typeof(double))); report.Tables.Add(overview); report.Tables.Add(centers); report.Tables.Add(results); report.Relations.Add(new DataRelation("OverView_Centers", overview.Columns[0], centers.Columns[0])); int count = clusterreport.HisResult.Count; int datacount = data.RowCount; for (i = 0; i < count; i++) { var tempresult = clusterreport.HisResult[i]; var tempreport = clusterreport.HisReport[i]; overview.Rows.Add(i, tempresult.cCount, tempreport.AvgBWP, tempresult.CostTime, tempresult.LoopCount); for (j = 0; j < tempresult.cCount; j++) { var temprow = centers.NewRow(); temprow[0] = i; temprow[1] = j; for (k = 0; k < len; k++) { temprow[k + 2] = tempresult.Centers[j, k]; } centers.Rows.Add(temprow); } for (j = 0; j < datacount; j++) { results.Rows.Add(i, j, tempresult.ClassNumbers[j], tempreport.BWP[j], tempreport.B[j], tempreport.W[j]); } } return(report); }
public static DataSet KMeans(object threadpool,IDataTable<DataRow> data, string[] properties, int maxCount, int minClusterCount, int maxClusterCount, double m, double s, Protocol.Structure.WaitObject wt,int initialmode, int methodmode,int maxthread) { minClusterCount = minClusterCount < 2 ? 2 : minClusterCount; maxClusterCount = maxClusterCount < 2 ? (int)Math.Pow(data.RowCount, 0.5) : maxClusterCount; maxCount = maxCount < 2 ? 20 : maxCount; maxthread = maxthread < 1 ? 1 : maxthread; int paracount = properties.Length; int i, j, k; double[] mean; double[] std; if (!double.IsNaN(m)) { mean = new double[paracount]; for (i = 0; i < paracount; i++) mean[i] = m; } else mean = data.Avg(properties); if (!double.IsNaN(s)) { std = new double[paracount]; for (i = 0; i < paracount; i++) std[i] = s; } else std = data.Stdev(properties, mean); var temp = new MPPO.DataMining.KMeans(threadpool,data, properties, maxCount, minClusterCount, maxClusterCount, mean, std, wt,initialmode,methodmode,maxthread); var clusterreport = temp.ParallelStart(); var report = new DataSet(); DataTable overview = new DataTable("OverView"); overview.Columns.Add(new DataColumn("序号", typeof(int))); overview.Columns.Add(new DataColumn("分类数", typeof(int))); overview.Columns.Add(new DataColumn("AvgBWP", typeof(double))); overview.Columns.Add(new DataColumn("耗时", typeof(double))); overview.Columns.Add(new DataColumn("循环次数", typeof(int))); DataTable centers = new DataTable("Centers"); int len = properties.Length; centers.Columns.Add(new DataColumn("序号", typeof(int))); centers.Columns.Add(new DataColumn("类标号", typeof(int))); for (i = 0; i < len; i++) { centers.Columns.Add(new DataColumn(properties[i], typeof(double))); } DataTable results = new DataTable("Results"); results.Columns.Add(new DataColumn("序号", typeof(int))); results.Columns.Add(new DataColumn("行号", typeof(int))); results.Columns.Add(new DataColumn("类标号", typeof(int))); results.Columns.Add(new DataColumn("BWP", typeof(double))); results.Columns.Add(new DataColumn("B", typeof(double))); results.Columns.Add(new DataColumn("W", typeof(double))); report.Tables.Add(overview); report.Tables.Add(centers); report.Tables.Add(results); report.Relations.Add(new DataRelation("OverView_Centers", overview.Columns[0], centers.Columns[0])); int count = clusterreport.HisResult.Count; int datacount = data.RowCount; for (i = 0; i < count; i++) { var tempresult = clusterreport.HisResult[i]; var tempreport = clusterreport.HisReport[i]; overview.Rows.Add(i, tempresult.cCount, tempreport.AvgBWP, tempresult.CostTime,tempresult.LoopCount); for (j = 0; j < tempresult.cCount; j++) { var temprow = centers.NewRow(); temprow[0] = i; temprow[1] = j; for (k = 0; k < len; k++) { temprow[k+2] = tempresult.Centers[j,k]; } centers.Rows.Add(temprow); } for (j = 0; j < datacount; j++) { results.Rows.Add(i, j, tempresult.ClassNumbers[j],tempreport.BWP[j], tempreport.B[j], tempreport.W[j]); } } return report; }