private void ClassifyDataSetDT() { string[] queries = BuildDataSetFromData(); if (DT.Columns.Contains(PRML)) { DT.Columns.Remove(PRML); } DT.Columns.Add(PRML, typeof(string)); int i = 0; foreach (DataRow row in DT.Rows) { row[PRML] = DecisionTree.Classify(new Datarow(queries[i])).ToString(); i++; } }
private void pictureBox1_Paint(object sender, PaintEventArgs e) { if (this.root == null) { MessageBox.Show("Generating tree. This might take a while", "Please wait"); dmC.LoadCSV(); Dictionary <String, Patient> trainData = dmC.GetPatients(); DecisionTree <Patient> destree = new DecisionTree <Patient>(trainData); List <Patient> rows = new List <Patient>(); foreach (String k in trainData.Keys) { rows.Add(trainData[k]); } Node <Patient> t = destree.BuildTree(rows); this.InitializeTree(t); dmC.LoadCSVTest(); Dictionary <String, Patient> test = dmC.GetClassifiedPatients(); List <String> classification = new List <string>(); double correct = 0; double total = 0; foreach (String k in test.Keys) { String a = test[k].getAttributes()[test[k].getAttributes().Length - 1]; String b = destree.PrintLeaf(destree.Classify(test[k], t))[1]; if (a.Equals(b)) { correct++; } total++; } double accurracy = (correct / total); this.accu.Text = "Accurracy: " + accurracy; this.addPatientControl2.SetRoot(t); this.addPatientControl2.SetTree(destree); } e.Graphics.SmoothingMode = SmoothingMode.AntiAlias; e.Graphics.TextRenderingHint = TextRenderingHint.AntiAliasGridFit; root.DrawTree(e.Graphics); }
public string Clasify(String[] info) { DataRow dr = table.NewRow(); for (int i = 0; i < info.Length; i++) { dr[i] = info[i]; //carga los datos de cada columna para su respectiva fila } return(tree.Classify(dr)); }
private void TrainData_Click(object sender, EventArgs e) { dmC.LoadCSV(); Dictionary <String, Dato> trainData = dmC.GetBanks(); DecisionTree <Dato> destree = new DecisionTree <Dato>(trainData); List <Dato> rows = new List <Dato>(); foreach (String k in trainData.Keys) { rows.Add(trainData[k]); } Node <Dato> t = destree.BuildTree(rows); //printTree(t, ""); dmC.LoadCSVTest(); Dictionary <String, Dato> test = dmC.GetClassifiedBank(); List <String> classification = new List <string>(); foreach (String k in test.Keys) { classification.Add("Actual -> " + test[k].getAttributes()[test[k].getAttributes().Length - 1] + "\n" + "Predicted -> " + destree.PrintLeaf(destree.Classify(test[k], t))); resultadosArbol.Add((test[k].getAttributes()[test[k].getAttributes().Length - 1] == 0 + "" ? "no" : "yes")); Console.WriteLine("Actual -> " + test[k].getAttributes()[test[k].getAttributes().Length - 1] + "\n" + "Predicted -> " + destree.PrintLeaf(destree.Classify(test[k], t))); } ResultClasification c = new ResultClasification(classification); // c.Show(); Recorrer(); }
static void Main(string[] args) { // SOME BASIC EXAMPLES OF USING THIS LIBRARY // begin of data loading Datarow[] testdata = Datarow.GetDatarowsFromCSV(@"C:\Users\Marián Trpkoš\source\repos\DecisionTreeClassifier\DecisionTreeClassifierV2\testdata.csv"); Datarow[] training = Datarow.GetDatarowsFromStringArray(new string[] { "Green;3;Apple", "Yellow;3;Apple", "Red;1;Grape", "Red;1;Grape", "Yellow;3;Lemon", }); // end of data loading foreach (object val in DecisionTree.FindUniqueValues(testdata)) { Console.WriteLine(val.ToString()); // finding unique values in last column } Console.WriteLine("----------------"); foreach (Tuple <object, int> tuple in DecisionTree.GetClassCounts(testdata)) { Console.WriteLine($"{tuple.Item1.ToString()}: {tuple.Item2}"); // getting counts of unique values } Console.WriteLine("----------------"); Tuple <Datarow[], Datarow[]> truefalse_rows = DecisionTree.Partition(testdata, new Question(1, testdata[1].values[1])); // partition data with one basic question - Is <something> orange? Console.WriteLine("TRUE: "); foreach (Datarow row in truefalse_rows.Item1) { Console.WriteLine(row.ToString()); // printing all true rows } Console.WriteLine("FALSE: "); foreach (Datarow row in truefalse_rows.Item2) { Console.WriteLine(row.ToString()); // printing all false rows } Console.WriteLine("----------------"); Datarow[] lots_of_mixing = Datarow.GetDatarowsFromStringArray(new string[] { "Apple", "Orange", "Grape", "Grapefruit", "Blueberry" }); Console.WriteLine(DecisionTree.Gini(lots_of_mixing)); // just a basic test of gini - value which tells how many different kinds of things there are (uncertainty) Console.WriteLine("----------------"); double uncertainty = DecisionTree.Gini(training); // getting uncertainty for training dataset Console.WriteLine(uncertainty); // pritning this uncertainty Tuple <Datarow[], Datarow[]> truefalse_training = DecisionTree.Partition(training, new Question(0, "Red")); // splitting data with question Is <something> red? Console.WriteLine(DecisionTree.InfoGain(truefalse_training.Item1, truefalse_training.Item2, uncertainty)); // getting info (numeric value) how efficient this question is (higher = better) Console.WriteLine("----------------"); var bestdata = DecisionTree.FindBestQuestion(training); // finding best question to ask for training data Console.WriteLine(bestdata.Item1); // printing gain Console.WriteLine(bestdata.Item2.ToString()); // printing question Console.WriteLine("----------------"); DecisionTree trainingDataTree = new DecisionTree(training); // creating decision tree classifier with training dataset trainingDataTree.BuildTree(); // building tree trainingDataTree.PrintTree(); // tree visualized Console.WriteLine(); // spacing lel // classifying data Console.WriteLine((trainingDataTree.Classify(new Datarow("Yellow;5")).ToString())); // should output: can be both apple and lemon Console.WriteLine((trainingDataTree.Classify(new Datarow("Red;2")).ToString())); // should output: can be only grape Console.WriteLine((trainingDataTree.Classify(new Datarow("Green;10")).ToString())); // should ouput: can be only apple // --> save tree to .dat file trainingDataTree.SaveToDat(path); // --> load tree from .dat file DecisionTree trainingDataTree = DecisionTree.ReadFromDat(path); Console.ReadKey(); }
public Tuple <double, double, double, double> TrainTree(double percent) { DataTable copy = new DataTable(); DataTable copylibrary = new DataTable(); DataTable training = new DataTable(); DataTable traininglibrary = new DataTable(); for (int i = 0; i < 33; i++) { string header = Convert.ToString(table.Columns[i].ColumnName); if (i == 2 || i == 6 || i == 7 || (i >= 12 && i <= 14) || i >= 23) { copylibrary.Columns.Add(header, typeof(double)); traininglibrary.Columns.Add(header, typeof(double)); } else { copylibrary.Columns.Add(header, typeof(string)); traininglibrary.Columns.Add(header, typeof(string)); } copy.Columns.Add(header); training.Columns.Add(header); } foreach (DataRow dr in table.Rows) { copy.Rows.Add(dr.ItemArray); } foreach (DataRow dr in tableTwo.Rows) { copylibrary.Rows.Add(dr.ItemArray); } Random a = new Random(); for (int i = 0; i < table.Rows.Count * percent; i++) { int randmNum = a.Next(0, copy.Rows.Count); training.Rows.Add(copy.Rows[randmNum].ItemArray); traininglibrary.Rows.Add(copylibrary.Rows[randmNum].ItemArray); copy.Rows[randmNum].Delete(); copylibrary.Rows[randmNum].Delete(); } tree = new DecisionTree(training); tree.BuildTree(); double correct = 0; var sw = new Stopwatch(); sw.Start(); foreach (DataRow dr in copy.Rows) { string classification = tree.Classify(dr); if (classification.Equals(dr[32])) { correct++; } } sw.Stop(); double timeManualTree = sw.ElapsedMilliseconds; double impPercent = correct / Convert.ToDouble(copy.Rows.Count); //entrenamiento del segundo arbol treeLibrary.TrainTree(traininglibrary); sw = new Stopwatch(); sw.Start(); double libPercent = treeLibrary.Test(copylibrary); sw.Stop(); double timeLibraryTree = sw.ElapsedMilliseconds; Tuple <double, double, double, double> percents = new Tuple <double, double, double, double>(impPercent, libPercent, timeManualTree, timeLibraryTree); return(percents); }