Exemplo n.º 1
0
        public static double[] Stdev(this IDataTable <DataRow> data, string[] selectedcolumns, double[] avg = null)
        {
            if (avg == null)
            {
                avg = data.Avg(selectedcolumns);
            }
            int rowcount    = data.RowCount;
            int columncount = selectedcolumns.Length;
            var result      = new double[columncount];

            for (int i = 0; i < columncount; i++)
            {
                for (int j = 0; j < rowcount; j++)
                {
                    result[i] += Math.Pow(data[j, selectedcolumns[i]].ConvertToDouble() - avg[i], 2);
                }
                result[i] = Math.Pow(result[i] / rowcount, 0.5);
            }
            return(result);
        }
Exemplo n.º 2
0
        public static DataSet KMeans(object threadpool, IDataTable <DataRow> data, string[] properties, int maxCount, int minClusterCount, int maxClusterCount, double m, double s, Protocol.Structure.WaitObject wt, int initialmode, int methodmode, int maxthread)
        {
            minClusterCount = minClusterCount < 2 ? 2 : minClusterCount;
            maxClusterCount = maxClusterCount < 2 ? (int)Math.Pow(data.RowCount, 0.5) : maxClusterCount;
            maxCount        = maxCount < 2 ? 20 : maxCount;
            maxthread       = maxthread < 1 ? 1 : maxthread;
            int paracount = properties.Length;
            int i, j, k;

            double[] mean;
            double[] std;
            if (!double.IsNaN(m))
            {
                mean = new double[paracount];
                for (i = 0; i < paracount; i++)
                {
                    mean[i] = m;
                }
            }
            else
            {
                mean = data.Avg(properties);
            }
            if (!double.IsNaN(s))
            {
                std = new double[paracount];
                for (i = 0; i < paracount; i++)
                {
                    std[i] = s;
                }
            }
            else
            {
                std = data.Stdev(properties, mean);
            }
            var       temp          = new MPPO.DataMining.KMeans(threadpool, data, properties, maxCount, minClusterCount, maxClusterCount, mean, std, wt, initialmode, methodmode, maxthread);
            var       clusterreport = temp.ParallelStart();
            var       report        = new DataSet();
            DataTable overview      = new DataTable("OverView");

            overview.Columns.Add(new DataColumn("序号", typeof(int)));
            overview.Columns.Add(new DataColumn("分类数", typeof(int)));
            overview.Columns.Add(new DataColumn("AvgBWP", typeof(double)));
            overview.Columns.Add(new DataColumn("耗时", typeof(double)));
            overview.Columns.Add(new DataColumn("循环次数", typeof(int)));
            DataTable centers = new DataTable("Centers");
            int       len     = properties.Length;

            centers.Columns.Add(new DataColumn("序号", typeof(int)));
            centers.Columns.Add(new DataColumn("类标号", typeof(int)));
            for (i = 0; i < len; i++)
            {
                centers.Columns.Add(new DataColumn(properties[i], typeof(double)));
            }
            DataTable results = new DataTable("Results");

            results.Columns.Add(new DataColumn("序号", typeof(int)));
            results.Columns.Add(new DataColumn("行号", typeof(int)));
            results.Columns.Add(new DataColumn("类标号", typeof(int)));
            results.Columns.Add(new DataColumn("BWP", typeof(double)));
            results.Columns.Add(new DataColumn("B", typeof(double)));
            results.Columns.Add(new DataColumn("W", typeof(double)));
            report.Tables.Add(overview);
            report.Tables.Add(centers);
            report.Tables.Add(results);
            report.Relations.Add(new DataRelation("OverView_Centers", overview.Columns[0], centers.Columns[0]));
            int count     = clusterreport.HisResult.Count;
            int datacount = data.RowCount;

            for (i = 0; i < count; i++)
            {
                var tempresult = clusterreport.HisResult[i];
                var tempreport = clusterreport.HisReport[i];
                overview.Rows.Add(i, tempresult.cCount, tempreport.AvgBWP, tempresult.CostTime, tempresult.LoopCount);
                for (j = 0; j < tempresult.cCount; j++)
                {
                    var temprow = centers.NewRow();
                    temprow[0] = i;
                    temprow[1] = j;
                    for (k = 0; k < len; k++)
                    {
                        temprow[k + 2] = tempresult.Centers[j, k];
                    }
                    centers.Rows.Add(temprow);
                }
                for (j = 0; j < datacount; j++)
                {
                    results.Rows.Add(i, j, tempresult.ClassNumbers[j], tempreport.BWP[j], tempreport.B[j], tempreport.W[j]);
                }
            }
            return(report);
        }
 public static DataSet KMeans(object threadpool,IDataTable<DataRow> data, string[] properties, int maxCount, int minClusterCount, int maxClusterCount, double m, double s, Protocol.Structure.WaitObject wt,int initialmode, int methodmode,int maxthread)
 {
     minClusterCount = minClusterCount < 2 ? 2 : minClusterCount;
     maxClusterCount = maxClusterCount < 2 ? (int)Math.Pow(data.RowCount, 0.5) : maxClusterCount;
     maxCount = maxCount < 2 ? 20 : maxCount;
     maxthread = maxthread < 1 ? 1 : maxthread;
     int paracount = properties.Length;
     int i, j, k;
     double[] mean;
     double[] std;
     if (!double.IsNaN(m))
     {
         mean = new double[paracount];
         for (i = 0; i < paracount; i++)
             mean[i] = m;
     }
     else
         mean = data.Avg(properties);
     if (!double.IsNaN(s))
     {
         std = new double[paracount];
         for (i = 0; i < paracount; i++)
             std[i] = s;
     }
     else
         std = data.Stdev(properties, mean);
     var temp = new MPPO.DataMining.KMeans(threadpool,data, properties, maxCount, minClusterCount, maxClusterCount, mean, std, wt,initialmode,methodmode,maxthread);
     var clusterreport = temp.ParallelStart();
     var report = new DataSet();
     DataTable overview = new DataTable("OverView");
     overview.Columns.Add(new DataColumn("序号", typeof(int)));
     overview.Columns.Add(new DataColumn("分类数", typeof(int)));
     overview.Columns.Add(new DataColumn("AvgBWP", typeof(double)));
     overview.Columns.Add(new DataColumn("耗时", typeof(double)));
     overview.Columns.Add(new DataColumn("循环次数", typeof(int)));
     DataTable centers = new DataTable("Centers");
     int len = properties.Length;
     centers.Columns.Add(new DataColumn("序号", typeof(int)));
     centers.Columns.Add(new DataColumn("类标号", typeof(int)));
     for (i = 0; i < len; i++)
     {
         centers.Columns.Add(new DataColumn(properties[i], typeof(double)));
     }
     DataTable results = new DataTable("Results");
     results.Columns.Add(new DataColumn("序号", typeof(int)));
     results.Columns.Add(new DataColumn("行号", typeof(int)));
     results.Columns.Add(new DataColumn("类标号", typeof(int)));
     results.Columns.Add(new DataColumn("BWP", typeof(double)));
     results.Columns.Add(new DataColumn("B", typeof(double)));
     results.Columns.Add(new DataColumn("W", typeof(double)));
     report.Tables.Add(overview);
     report.Tables.Add(centers);
     report.Tables.Add(results);
     report.Relations.Add(new DataRelation("OverView_Centers", overview.Columns[0], centers.Columns[0]));
     int count = clusterreport.HisResult.Count;
     int datacount = data.RowCount;
     for (i = 0; i < count; i++)
     {
         var tempresult = clusterreport.HisResult[i];
         var tempreport = clusterreport.HisReport[i];
         overview.Rows.Add(i, tempresult.cCount, tempreport.AvgBWP, tempresult.CostTime,tempresult.LoopCount);
         for (j = 0; j < tempresult.cCount; j++)
         {
             var temprow = centers.NewRow();
             temprow[0] = i;
             temprow[1] = j;
             for (k = 0; k < len; k++)
             {
                 temprow[k+2] = tempresult.Centers[j,k];
             }
             centers.Rows.Add(temprow);
         }
         for (j = 0; j < datacount; j++)
         {
             results.Rows.Add(i, j, tempresult.ClassNumbers[j],tempreport.BWP[j], tempreport.B[j], tempreport.W[j]);
         }
     }
     return report;
 }