private int classification(double[,] test_fv, int i, m_tree node, Dictionary <int, int> col_string_dic) { int state = -1; if (node.leaf_node == true) { return(Convert.ToInt32(node.value)); } int entry; if (col_string_dic.TryGetValue(Convert.ToInt32(node.column), out entry))//// continues attr codn { if (test_fv[i, Convert.ToInt32(node.column)] != node.value) { state = classification(test_fv, i, node.lnode, col_string_dic); } else { state = classification(test_fv, i, node.rnode, col_string_dic); } } else { if (test_fv[i, Convert.ToInt32(node.column)] <= node.value) { state = classification(test_fv, i, node.lnode, col_string_dic); } else { state = classification(test_fv, i, node.rnode, col_string_dic); } } return(state); }
private void Postorder(m_tree head, TreeNode Root, Dictionary <int, int> col_string_dic, Dictionary <string, int> val_string) { if (head != null) { if (head.leaf_node == true) { TreeNode node2 = new TreeNode(" Class: " + head.value); Root.Nodes.Add(node2); } else { int entry; if (col_string_dic.TryGetValue(head.column, out entry))//// continues attr codn { foreach (KeyValuePair <string, int> ky1 in val_string) { if (ky1.Value == head.value) { string[] ty = ky1.Key.Split('_'); if (Convert.ToDouble(ty[0]) == head.column) { TreeNode node2 = new TreeNode("Left Node: Best Split Column: " + head.column + " Split Value !=" + ty[1] + " "); TreeNode node3 = new TreeNode("Right Node:Best Split Column: " + head.column + " Split Value ==" + ty[1] + " "); TreeNode[] array = new TreeNode[] { node2, node3 }; Root.Nodes.Add(node2); Root.Nodes.Add(node3); Postorder(head.lnode, node2, col_string_dic, val_string); Postorder(head.rnode, node3, col_string_dic, val_string); } } } } else { TreeNode node2 = new TreeNode("Left Node: Best Split Column: " + head.column + " Split Value <=" + head.value + " "); TreeNode node3 = new TreeNode("Right Node:Best Split Column: " + head.column + " Split Value >" + head.value + " "); TreeNode[] array = new TreeNode[] { node2, node3 }; Root.Nodes.Add(node2); Root.Nodes.Add(node3); Postorder(head.lnode, node2, col_string_dic, val_string); Postorder(head.rnode, node3, col_string_dic, val_string); } } } }
private void button2_Click(object sender, EventArgs e) { DialogResult result = openFileDialog1.ShowDialog(); if (result == DialogResult.OK) // Test result. { filepath = openFileDialog1.FileName; } Microsoft.Office.Interop.Excel.Application IExcel = new Microsoft.Office.Interop.Excel.Application(); ///IExcel.Visible = true; //string fileName = "C:\\Users\\Avijeet\\Desktop\\Data Mining 601\\Project 2\\cho.xlsx"; string fileName = filepath; //open the workbook Workbook workbook = IExcel.Workbooks.Open(fileName, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); //select the first sheet Worksheet worksheet = (Worksheet)workbook.Worksheets[1]; //find the used range in worksheet Microsoft.Office.Interop.Excel.Range excelRange = worksheet.UsedRange; //get an object array of all of the cells in the worksheet (their values) object[,] valueArray = (object[, ])excelRange.get_Value( XlRangeValueDataType.xlRangeValueDefault); rows = worksheet.UsedRange.Rows.Count; cols = worksheet.UsedRange.Columns.Count; double accuracy_f = 0, precision_f = 0, recall_f = 0, f_measure_f = 0; int k_fold = 10; int rows_counter = 0; int whole_counter = 1; // Random r = new Random(); while (rows_counter < k_fold && whole_counter < rows) { string ind = ","; int t = rows / k_fold; int k = 0; for (int i = 1; i <= t + 1 && whole_counter <= rows; i++) { k++; ind = ind + "," + whole_counter + ","; whole_counter++; } rows_counter++; int[] cl = new int[rows + 1]; //(training data)classification values of data(row) int[] test_cl = new int[t + 2]; //(testing data)classification values of data(row) double[,] fv = new double[rows + 1, cols + 1]; //(training data)feature values of all the tuples and their values //double[] avg = new double[cols + 1]; double[,] test_fv = new double[t + 2, cols + 1]; //(testing data)feature values all the tuples and their values int[] tr_name = new int[rows - k + 1]; t_rows = 1; r_rows = 1; string col_string = ","; int[] ent = new int[cols]; Dictionary <string, int> val_string = new Dictionary <string, int>(); Dictionary <int, int> col_string_dic = new Dictionary <int, int>(); //get initial data from excel for (int row = 1; row <= rows - 1; ++row) { if (!ind.Contains("," + row + ",")) { tr_name[r_rows] = row; for (int col = 1; col <= cols - 1; ++col) { //access each cell if (valueArray[row, col] is string) { string key = col + "_" + Convert.ToString(valueArray[row, col]); int entry; if (val_string.TryGetValue(key, out entry)) { fv[row, col] = entry; } else { ent[col]++; fv[row, col] = ent[col]; val_string.Add(key, ent[col]); if (!(col_string_dic.TryGetValue(col, out entry))) { col_string = col_string + "," + col + ","; col_string_dic.Add(col, 1); } } } else { fv[row, col] = Convert.ToDouble(valueArray[row, col]); } } cl[row] = Convert.ToInt32(valueArray[row, cols]); r_rows++; } else { for (int col = 1; col <= cols - 1; ++col) { //access each cell if (valueArray[row, col] is string) { string key = col + "_" + Convert.ToString(valueArray[row, col]); int entry; if (val_string.TryGetValue(key, out entry)) { test_fv[t_rows, col] = entry; } else { ent[col]++; test_fv[t_rows, col] = ent[col]; val_string.Add(key, ent[col]); if (!(col_string_dic.TryGetValue(col, out entry))) { col_string = col_string + "," + col + ","; col_string_dic.Add(col, 1); } } } else { test_fv[t_rows, col] = Convert.ToDouble(valueArray[row, col]); } } test_cl[t_rows] = Convert.ToInt32(valueArray[row, cols]); t_rows++; } } //clean up stuffs int[] temp_init = new int[1]; m_tree head = new m_tree(0, 0.0, temp_init); head.index_name = tr_name; partition(head, cl, fv, col_string_dic); int[] assign = new int[t_rows]; for (int i = 1; i < t_rows; i++) { assign[i] = classification(test_fv, i, head, col_string_dic); } double tp = 0, tn = 0, fp = 0, fn = 0; for (int i = 1; i < t_rows; i++) { if (test_cl[i] == 1) { if (assign[i] == 1) { tp++; } else if (assign[i] == 0) { fn++; } } else if (test_cl[i] == 0) { if (assign[i] == 1) { fp++; } else if (assign[i] == 0) { tn++; } } } double accuracy = 0, precision = 0, recall = 0, f_measure = 0; accuracy = (tp + tn) / (tp + tn + fp + fn); if (tp == 0 && fp == 0) { precision = 0; } else { precision = (tp) / (tp + fp); } if (tp == 0 && fn == 0) { recall = 0; } else { recall = (tp) / (tp + fn); } if (recall == 0 && precision == 0) { f_measure = 0; } else { f_measure = (2 * recall * precision) / (recall + precision); } accuracy_f = accuracy_f + accuracy; precision_f = precision_f + precision; recall_f = recall_f + recall; f_measure_f = f_measure_f + f_measure; //if (rows_counter==2) //{ TreeNode Root = new TreeNode("Classifier : " + rows_counter); int[] blank = new int[1]; m_tree temp = new m_tree(0, 0, blank); temp = head; Postorder(head, Root, col_string_dic, val_string); treeView1.Nodes.Add(Root); //} } label2.Text = label2.Text + (accuracy_f / Convert.ToDouble(rows_counter)); label3.Text = label3.Text + (f_measure_f / Convert.ToDouble(rows_counter)); label4.Text = label4.Text + (recall_f / Convert.ToDouble(rows_counter)); label5.Text = label5.Text + (precision_f / Convert.ToDouble(rows_counter)); workbook.Close(false, Type.Missing, Type.Missing); IExcel.Quit(); }
private void partition(m_tree S, int[] cl, double[,] fv, Dictionary <int, int> col_string_dic) { double[] S1 = new double[2]; int[] S_left = new int[S.index_name.Length]; int[] S_right = new int[S.index_name.Length]; int count_left = 1; int count_right = 1; itr++; if (S.index_name.Length == 2) { S.leaf_node = true; S.value = cl[S.index_name[1]]; return; } int p = 2; for (int i = 2; i < S.index_name.Length; i++) { if (cl[S.index_name[i]] != (cl[S.index_name[i - 1]])) { break; } else { p++; } } if (p == S.index_name.Length) { S.leaf_node = true; S.value = cl[S.index_name[1]]; return; } S1 = best_split(S.index_name, cl, fv, col_string_dic); int entry; if (col_string_dic.TryGetValue(Convert.ToInt32(S1[0]), out entry))//// continues attr codn { for (int i = 1; i < S.index_name.Length; i++) { if (fv[S.index_name[i], Convert.ToInt32(S1[0])] != S1[1]) { S_left[count_left] = S.index_name[i]; count_left++; } else { S_right[count_right] = S.index_name[i]; count_right++; } } } else { for (int i = 1; i < S.index_name.Length; i++) { if (fv[S.index_name[i], Convert.ToInt32(S1[0])] <= S1[1]) { S_left[count_left] = S.index_name[i]; count_left++; } else { S_right[count_right] = S.index_name[i]; count_right++; } } } int[] tr_left = new int[count_left]; int[] tr_right = new int[count_right]; Array.Resize(ref S_left, count_left); Array.Resize(ref S_right, count_right); S.index_left = S_left; S.index_right = S_right; S.column = Convert.ToInt32(S1[0]); S.value = S1[1]; m_tree s_tree_left = new m_tree(0, 0, S.index_left); m_tree s_tree_right = new m_tree(0, 0, S.index_right); S.lnode = s_tree_left; S.rnode = s_tree_right; if (S.lnode != null) { partition(S.lnode, cl, fv, col_string_dic); } if (S.lnode != null) { partition(S.rnode, cl, fv, col_string_dic); } return; }
private void button1_Click(object sender, EventArgs e) { DialogResult result = openFileDialog1.ShowDialog(); if (result == DialogResult.OK) // Test result. { filepath = openFileDialog1.FileName; } Microsoft.Office.Interop.Excel.Application IExcel = new Microsoft.Office.Interop.Excel.Application(); ///IExcel.Visible = true; //string fileName = "C:\\Users\\Avijeet\\Desktop\\Data Mining 601\\Project 2\\cho.xlsx"; string fileName = filepath; //open the workbook Workbook workbook = IExcel.Workbooks.Open(fileName, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); //select the first sheet Worksheet worksheet = (Worksheet)workbook.Worksheets[1]; //find the used range in worksheet Microsoft.Office.Interop.Excel.Range excelRange = worksheet.UsedRange; //get an object array of all of the cells in the worksheet (their values) object[,] valueArray = (object[, ])excelRange.get_Value( XlRangeValueDataType.xlRangeValueDefault); rows = worksheet.UsedRange.Rows.Count; cols = worksheet.UsedRange.Columns.Count; double[,] data = new double[rows + 1, cols + 2]; int[] data_cls = new int[rows + 1]; //initialize weights double[] wt = new double[rows]; for (int i = 1; i < rows; i++) { wt[i] = 1 / Convert.ToDouble(rows); } string col_string = ","; int[] ent = new int[cols]; Dictionary <string, int> val_string = new Dictionary <string, int>(); Dictionary <int, int> col_string_dic = new Dictionary <int, int>(); //get initial data from excel for (int row = 1; row <= rows - 1; ++row) { data[row, 0] = Convert.ToInt32(row); data[row, cols] = wt[row]; for (int col = 1; col <= cols - 1; ++col) { //access each cell if (valueArray[row, col] is string) { string key = col + "_" + Convert.ToString(valueArray[row, col]); int entry; if (val_string.TryGetValue(key, out entry)) { data[row, col] = entry; } else { ent[col]++; data[row, col] = ent[col]; val_string.Add(key, ent[col]); if (!(col_string_dic.TryGetValue(col, out entry))) { col_string = col_string + "," + col + ","; col_string_dic.Add(col, 1); } } } else { data[row, col] = Convert.ToDouble(valueArray[row, col]); } } data_cls[row] = Convert.ToInt32(valueArray[row, cols]); } double accuracy_f = 0, precision_f = 0, recall_f = 0, f_measure_f = 0; int k_fold = 10; int rows_counter = 0; int whole_counter = 1; // Random r = new Random(); while (rows_counter < k_fold && whole_counter < rows) { string ind = ","; int k = 0; int t = rows / k_fold; if (t < 3) { t = 3; } for (int i = 1; i <= t + 1 && whole_counter <= rows; i++) { k++; ind = ind + "," + whole_counter + ","; whole_counter++; } rows_counter++; int[] data_cl = new int[rows + 1]; //(training data)classification values of data(row) int[] test_cl = new int[t + 2]; //(testing data)classification values of data(row) double[,] data_fv = new double[rows + 1, cols + 1]; //(training data)feature values of all the tuples and their values //double[] avg = new double[cols + 1]; double[,] test_fv = new double[t + 2, cols + 1]; //(testing data)feature values all the tuples and their values t_rows = 1; data_r_rows = 1; r_rows = 1; //get initial data from excel for (int row = 1; row <= rows - 1; ++row) { if (!ind.Contains("," + row + ",")) { data_fv[data_r_rows, 0] = row; //tr_name[r_rows] = row; for (int col = 1; col <= cols - 1; ++col) { data_fv[data_r_rows, col] = data[row, col]; } data_cl[data_r_rows] = data_cls[row]; data_r_rows++; } else { test_fv[t_rows, 0] = row; for (int col = 1; col <= cols - 1; ++col) { test_fv[t_rows, col] = data[row, col]; } test_cl[t_rows] = data_cls[row]; t_rows++; } } int rows_counter_2 = 1; double[] alpha = new double[bag + 1]; double[,] prediction = new double[t_rows, bag + 1]; while (rows_counter_2 <= bag) { int t_2 = Convert.ToInt32(.632 * data_r_rows); string indstr = ";"; int[] ind_2 = new int[data_r_rows + 1]; Random r = new Random(); int cluster_count, tt = 0;; for (int i = 0; i <= data_r_rows - 1; i++) { cluster_count = r.Next(1, data_r_rows); if (tt < t_2) { if (indstr.Contains(Convert.ToString(";" + cluster_count + ";"))) { i--; } else { indstr = indstr + ";" + cluster_count; ind_2[i] = cluster_count; tt++; } } else { ind_2[i] = cluster_count; } } int[] tr_name = new int[data_r_rows + 1]; r_rows = 1; int[] cl = new int[data_r_rows + 1]; //(training data)classification values of data(row) double[,] fv = new double[data_r_rows + 1, cols]; //(training data)feature values of all the tuples and their values for (int rw = 1; rw <= data_r_rows - 1; ++rw) { tr_name[r_rows] = ind_2[rw]; for (int c = 1; c <= cols - 1; ++c) { //access each cell fv[r_rows, c] = data_fv[ind_2[rw], c]; } cl[r_rows] = data_cl[ind_2[rw]]; r_rows++; } int[] temp_init = new int[1]; m_tree head = new m_tree(0, 0.0, temp_init); head.index_name = tr_name; partition(head, cl, fv, col_string_dic); int[] assign = new int[t_rows]; for (int i = 1; i < t_rows; i++) { assign[i] = classification(test_fv, i, head, col_string_dic); prediction[i, rows_counter_2] = assign[i]; } double same = 0; double not_same = 0; for (int i = 1; i < t_rows; i++) { if (assign[i] == test_cl[i]) { same = same + data[Convert.ToInt32(test_fv[i, 0]), cols]; //test_fv[i,0]//current index // data[test_fv[i, 0],cols]//weight which has to be manipulated } else { not_same = not_same + data[Convert.ToInt32(test_fv[i, 0]), cols]; } } not_same = not_same / (not_same + same); if (not_same >= 0.5) { break; } if (not_same == 0) { break; } alpha[rows_counter_2] = (Math.Log((1 - not_same) / not_same)) / 2; double c_correct = Math.Exp((-1) * alpha[rows_counter_2]); double c_incorrect = Math.Exp((1) * alpha[rows_counter_2]); //reassign appropriate weight for (int i = 1; i < t_rows; i++) { if (assign[i] == test_cl[i]) { data[Convert.ToInt32(test_fv[i, 0]), cols] = data[Convert.ToInt32(test_fv[i, 0]), cols] * c_correct; } else { data[Convert.ToInt32(test_fv[i, 0]), cols] = data[Convert.ToInt32(test_fv[i, 0]), cols] * c_incorrect; } } //normalize weight double norm_wt = 0; for (int i = 1; i < test_fv.GetLength(0); i++) { norm_wt += data[Convert.ToInt32(test_fv[i, 0]), cols]; } for (int i = 1; i < test_fv.GetLength(0); i++) { data[Convert.ToInt32(test_fv[i, 0]), cols] = data[Convert.ToInt32(test_fv[i, 0]), cols] / norm_wt; } rows_counter_2++; } int[] assign_final = new int[t_rows]; for (int i = 1; i < t_rows; i++) { double s0 = 0, s1 = 0, ss0 = 0, ss1 = 0;; for (int j = 1; j < bag + 1; j++) { if (prediction[i, j] == 1) { s1 = s1 + (alpha[j]); ss1++; } else { s0 = s0 + (alpha[j]); ss0++; } } if (ss0 == 0) { assign_final[i] = 1; } else if (ss1 == 0) { assign_final[i] = 0; } else { s1 = s1 / ss1; s0 = s0 / ss0; if (s0 > s1) { assign_final[i] = 0; } else { assign_final[i] = 1; } } } double tp = 0, tn = 0, fp = 0, fn = 0; for (int i = 1; i < t_rows; i++) { if (test_cl[i] == 1) { if (assign_final[i] == 1) { tp++; } else if (assign_final[i] == 0) { fn++; } } else if (test_cl[i] == 0) { if (assign_final[i] == 1) { fp++; } else if (assign_final[i] == 0) { tn++; } } } double accuracy = 0, precision = 0, recall = 0, f_measure = 0; accuracy = (tp + tn) / (tp + tn + fp + fn); if (tp == 0 && fp == 0) { precision = 0; } else { precision = (tp) / (tp + fp); } if (tp == 0 && fn == 0) { recall = 0; } else { recall = (tp) / (tp + fn); } if (recall == 0 && precision == 0) { f_measure = 0; } else { f_measure = (2 * recall * precision) / (recall + precision); } accuracy_f = accuracy_f + accuracy; precision_f = precision_f + precision; recall_f = recall_f + recall; f_measure_f = f_measure_f + f_measure; } label2.Text = label2.Text + (accuracy_f / Convert.ToDouble(rows_counter)); label3.Text = label3.Text + (f_measure_f / Convert.ToDouble(rows_counter)); label4.Text = label4.Text + (recall_f / Convert.ToDouble(rows_counter)); label5.Text = label5.Text + (precision_f / Convert.ToDouble(rows_counter)); workbook.Close(false, Type.Missing, Type.Missing); IExcel.Quit(); }