//INPUT : get list of H(fi) //PROCESS : select 1 candidate, return col name public static string select1Candidate_entropy(int method, DataTable dt, List <string> list_F, List <string> list_S , List <str_double> list_entropy, List <str2_double> list_MI) { List <string> list_predict = new List <string>(); list_predict.AddRange(list_S); try { temp_listEntropy.Clear(); temp_colWithHigestEntropy = ""; double highestQuality = -9999; foreach (string f in list_F) { //----- test 1 candidate --------- list_predict.Add(f); str_double new_data = new str_double(); new_data.str = f; if (method == methodSelection_Greedy) { new_data.v = calJointEntropy(dt, list_predict); } //target else if (method == methodSelection_MIFS) { new_data.v = candidate_calMIFS(dt, f, list_S, list_entropy, list_MI); } else if (method == methodSelection_JMIM) { new_data.v = candidate_calMAXIMIN(miximinMethod_JMIM, dt, f, list_S, list_entropy, list_MI); } else if (method == methodSelection_CMIM) { new_data.v = candidate_calMAXIMIN(miximinMethod_CMIM, dt, f, list_S, list_entropy, list_MI); } temp_listEntropy.Add(new_data); if (new_data.v > highestQuality) { temp_colWithHigestEntropy = new_data.str; highestQuality = new_data.v; } list_predict.RemoveAt(list_predict.Count - 1); } } catch (Exception ex) { TheSys.showError(ex); temp_colWithHigestEntropy = ""; } return(temp_colWithHigestEntropy); }
public static str_double calEntropyOfColumn(DataTable dt, DataColumn dc) { str_double entropy = new str_double(); try { //--- build unique list ------------------------------------------------ List <str_int> list_unique = new List <str_int>(); foreach (DataRow dr in dt.Rows) { Boolean neverExist = true; string current_v = dr[dc].ToString(); foreach (str_int uni in list_unique) { if (uni.str == current_v) { uni.i++; neverExist = false; break; } } if (neverExist) { list_unique.Add(new str_int() { str = current_v, i = 1 }); } } //--- Compute Entropy ------------------------------------------------ double prob_log_cummulative = 0; int total_row = dt.Rows.Count; foreach (str_int uni in list_unique) { prob_log_cummulative += calPossibleLog(uni.i, total_row); } entropy.str = dc.ColumnName; entropy.v = -prob_log_cummulative; } catch (Exception ex) { TheSys.showError(ex); } return(entropy); }
//Threshold End - Start //public static Feature getFeature(string selected_feature, DataTable dt) //{ // Feature f = new Feature(); // f.name = selected_feature; // try // { // double start = TheTool.getDouble(dt.Rows[0][selected_feature].ToString()); // double end = TheTool.getDouble(dt.Rows[dt.Rows.Count - 1][selected_feature].ToString()); // if (end > start) { f.opt = ">="; } // else { f.opt = "<="; } // //----- // start = Math.Round(start, 2); // end = Math.Round(end, 2); // double v = (start + end) / 2; // f.v = Math.Round(v, 2); // } // catch (Exception ex) { TheSys.showError(ex); } // return f; //} ////indices is row number //public static Feature getFeature(string selected_feature, DataTable dt_concat // , List<int> start_indices, List<int> end_indices) //{ // Feature f = new Feature(); // f.name = selected_feature; // try // { // double start = TheTool.dataTable_getAverage(dt_concat, start_indices, selected_feature); // double end = TheTool.dataTable_getAverage(dt_concat, end_indices, selected_feature); // if (end > start) { f.opt = ">="; } // else { f.opt = "<="; } // //----- // start = Math.Round(start, 2); // end = Math.Round(end, 2); // double v = (start + end) / 2; // f.v = Math.Round(v, 2); // } // catch (Exception ex) { TheSys.showError(ex); } // return f; //} ////Feature Name with Threshold by DataTable (First - End) //public static Feature getFeature(string s) //{ // Feature f = new Feature(); // f.name = s; // return f; //} //public static List<string> Feature_getListName(List<Feature> list_f) //{ // List<string> list = new List<string>(); // foreach (Feature f in list_f) { list.Add(f.name); } // return list; //} //public static List<string> Feature_getListFullString(List<Feature> list_f) //{ // List<string> list = new List<string>(); // foreach (Feature f in list_f) { list.Add(f.name + " " + f.opt + " " + f.v); } // return list; //} //H(X) //table contain only useful Column, value are discretized //analyze all row public static List <str_double> calEntropy(DataTable dt) { List <str_double> list_entropy = new List <str_double>(); List <string> final_output = new List <string>();//Data try { double highestEntropy = 0; foreach (DataColumn dc in dt.Columns) { str_double new_data = calEntropyOfColumn(dt, dc); list_entropy.Add(new_data); if (new_data.v > highestEntropy) { temp_colWithHigestEntropy = new_data.str; highestEntropy = new_data.v; } } } catch (Exception ex) { TheSys.showError(ex); temp_colWithHigestEntropy = ""; } return(list_entropy); }