public double DecisionTreeAccuracyPercentageLib() { DataTable data = GenerateTestingDataTableLib(); DataTable symbols = codebook.Apply(data); int[][] inputs = DataTableToMatrix(symbols, new string[] { "CAP SHAPE", "CAP SURFACE", "CAP COLOR", "BRUISES", "ODOR", "GILL ATTACHMENT", "GILL SPACING", "GILL SIZE", "GILL COLOR", "STALK SHAPE", "STALK ROOT", "STALK SURFACE ABOVE RING", "STALK SURFACE BELOW RING", "STALK COLOR ABOVE RING", "STALK COLOR BELOW RING", "VEIL TYPE", "VEIL COLOR", "RING NUMBER", "RING TYPE", "SPORE PRINT COLOR", "POPULATION", "HABITAT" }); int[][] mOutputs = DataTableToMatrix(symbols, new string[] { "TYPE" }); int[] outputs = new int[mOutputs.Length]; for (int i = 0; i < mOutputs.Length; i++) { outputs[i] = mOutputs[i][0]; } double error = new ZeroOneLoss(outputs).Loss(decisionTreeLib.Decide(inputs)); return(1 - error); }
static void Main(string[] args) { // Create a new reader, opening a given path ExcelReader excel = new ExcelReader("Intake Inf Cohort 2017 - Training Set.xlsx"); ExcelReader excelTest = new ExcelReader("Intake Inf Cohort 2017 - Test Set.xlsx"); // Afterwards, we can query the file for all // worksheets within the specified workbook: string[] sheets = excel.GetWorksheetList(); string[] sheetsTest = excelTest.GetWorksheetList(); // Finally, we can request an specific sheet: DataTable data = excel.GetWorksheet(sheets[0]); DataTable dataTest = excelTest.GetWorksheet(sheets[0]); // Loop through each column in data foreach (DataColumn column in data.Columns) { // Replace empty with underscore column.ColumnName = column.ColumnName.Replace(" ", "_"); } // Create a new codification codebook to // convert strings into integer symbols Codification codibook = new Codification(data); // Set codibook Codification = codibook; // Translate our training data into integer symbols using our codebook: DataTable symbols = codibook.Apply(data); int[][] inputs = symbols.ToJagged <int>( codibook.Columns[5].ColumnName, codibook.Columns[7].ColumnName, codibook.Columns[8].ColumnName, codibook.Columns[9].ColumnName, codibook.Columns[12].ColumnName, codibook.Columns[13].ColumnName, codibook.Columns[14].ColumnName, codibook.Columns[15].ColumnName, codibook.Columns[16].ColumnName, codibook.Columns[20].ColumnName, codibook.Columns[29].ColumnName, codibook.Columns[30].ColumnName, codibook.Columns[34].ColumnName ); int[] outputs = symbols.ToMatrix <int>(codibook.Columns[6].ColumnName).GetColumn(0); // Create a teacher ID3 algorithm var id3 = new ID3Learning() { new DecisionVariable(codibook.Columns[5].ColumnName, 2), new DecisionVariable(codibook.Columns[7].ColumnName, codibook.Columns[7].NumberOfSymbols), new DecisionVariable(codibook.Columns[8].ColumnName, codibook.Columns[8].NumberOfSymbols), new DecisionVariable(codibook.Columns[9].ColumnName, 3), new DecisionVariable(codibook.Columns[12].ColumnName, 10), new DecisionVariable(codibook.Columns[13].ColumnName, 10), new DecisionVariable(codibook.Columns[14].ColumnName, 10), new DecisionVariable(codibook.Columns[15].ColumnName, 10), new DecisionVariable(codibook.Columns[16].ColumnName, 2), new DecisionVariable(codibook.Columns[20].ColumnName, 2), new DecisionVariable(codibook.Columns[29].ColumnName, 2), new DecisionVariable(codibook.Columns[30].ColumnName, 2), new DecisionVariable(codibook.Columns[34].ColumnName, 2), }; // Learn the training instances! Accord.MachineLearning.DecisionTrees.DecisionTree tree = id3.Learn(inputs, outputs); // Create a console table for display ConsoleTable table = new ConsoleTable("Studentnumber", "Advice", "Conclusion"); // Loop through each row in data foreach (DataRow row in dataTest.Rows) { // The tree can now be queried for new examples through // its decide method. For example, we can create a query int[] query = null; try { query = codibook.Transform(new[, ] { { codibook.Columns[5].ColumnName, row.ItemArray[5].ToString() }, { codibook.Columns[7].ColumnName, row.ItemArray[7].ToString() }, { codibook.Columns[8].ColumnName, row.ItemArray[8].ToString() }, { codibook.Columns[9].ColumnName, row.ItemArray[9].ToString() }, { codibook.Columns[12].ColumnName, row.ItemArray[12].ToString() }, { codibook.Columns[13].ColumnName, row.ItemArray[13].ToString() }, { codibook.Columns[14].ColumnName, row.ItemArray[14].ToString() }, { codibook.Columns[15].ColumnName, row.ItemArray[15].ToString() }, { codibook.Columns[16].ColumnName, row.ItemArray[16].ToString() }, { codibook.Columns[20].ColumnName, row.ItemArray[20].ToString() }, { codibook.Columns[29].ColumnName, row.ItemArray[29].ToString() }, { codibook.Columns[30].ColumnName, row.ItemArray[30].ToString() }, { codibook.Columns[34].ColumnName, row.ItemArray[34].ToString() }, }); } catch (Exception) { // Show the result of skipped students var studentnumber = row.ItemArray[0].ToString(); var advice = row.ItemArray[6].ToString(); var conclusion = "(Twijfel)"; table.AddRow(studentnumber, advice, conclusion); continue; } // And then predict the label using int predicted = tree.Decide(query); // Any predictions off are ignored for consistency if (predicted != -1) { // We can translate it back to strings using string answer = codibook.Revert("advies", predicted); // Show the result in the output var studentnumber = row.ItemArray[0].ToString(); var advice = row.ItemArray[6].ToString(); var conclusion = answer; table.AddRow(studentnumber, advice, conclusion); } else { // Show the result of skipped students var studentnumber = row.ItemArray[0].ToString(); var advice = row.ItemArray[6].ToString(); var conclusion = "(Twijfel)"; table.AddRow(studentnumber, advice, conclusion); } } // Write the table in console table.Write(); // Read Key Console.ReadKey(); }
public string Decide(Accord.MachineLearning.DecisionTrees.DecisionTree tree, Codification codebook, params double[] query) { int predicted = tree.Decide(query); return(codebook.Revert(Constants.RESULT_COLUMN_NAME, predicted)); }
public void same_input_different_output() { double[][] inputs = new double[][] { new double[] { 1 }, new double[] { 0 }, new double[] { 2 }, new double[] { 3 }, new double[] { 0 }, }; int[] outputs = new int[] { 11, 00, 22, 33, 01 }; DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) }; DecisionTree decisionTree = new DecisionTree(variables, 34); C45Learning c45Learning = new C45Learning(decisionTree) { Join = 10, MaxHeight = 10 }; c45Learning.Run(inputs, outputs); // System.AggregateException thrown here int[] actual = decisionTree.Decide(inputs); Assert.AreEqual(11, actual[0]); Assert.AreEqual(00, actual[1]); Assert.AreEqual(22, actual[2]); Assert.AreEqual(33, actual[3]); Assert.AreEqual(00, actual[4]); }
public void same_input_different_output_minimal() { double[][] inputs = new double[][] { new double[] { 0 }, new double[] { 0 } }; int[] outputs = new int[] { 1, 0 }; DecisionVariable[] variables = { new DecisionVariable("x", DecisionVariableKind.Continuous) }; DecisionTree decisionTree = new DecisionTree(variables, 2); C45Learning c45Learning = new C45Learning(decisionTree); c45Learning.Run(inputs, outputs); // System.AggregateException thrown here Assert.AreEqual(decisionTree.Decide(new[] { 0 }), 0); }
public void IrisDatasetTest() { #region doc_iris // In this example, we will process the famous Fisher's Iris dataset in // which the task is to classify weather the features of an Iris flower // belongs to an Iris setosa, an Iris versicolor, or an Iris virginica: // // - https://en.wikipedia.org/wiki/Iris_flower_data_set // // First, let's load the dataset into an array of text that we can process string[][] text = Resources.iris_data.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); // The first four columns contain the flower features double[][] inputs = text.GetColumns(0, 1, 2, 3).To<double[][]>(); // The last column contains the expected flower type string[] labels = text.GetColumn(4); // Since the labels are represented as text, the first step is to convert // those text labels into integer class labels, so we can process them // more easily. For this, we will create a codebook to encode class labels: // var codebook = new Codification("Output", labels); // With the codebook, we can convert the labels: int[] outputs = codebook.Translate("Output", labels); // Let's declare the names of our input variables: DecisionVariable[] features = { new DecisionVariable("sepal length", DecisionVariableKind.Continuous), new DecisionVariable("sepal width", DecisionVariableKind.Continuous), new DecisionVariable("petal length", DecisionVariableKind.Continuous), new DecisionVariable("petal width", DecisionVariableKind.Continuous), }; // Now, we can finally create our tree for the 3 classes: var tree = new DecisionTree(inputs: features, classes: 3); // And we can use the C4.5 for learning: var teacher = new C45Learning(tree); // And finally induce the tree: teacher.Learn(inputs, outputs); // To get the estimated class labels, we can use int[] predicted = tree.Decide(inputs); // And the classification error can be computed as double error = new ZeroOneLoss(outputs) // 0.0266 { Mean = true }.Loss(tree.Decide(inputs)); // Moreover, we may decide to convert our tree to a set of rules: DecisionSet rules = tree.ToRules(); // And using the codebook, we can inspect the tree reasoning: string ruleText = rules.ToString(codebook, "Output", System.Globalization.CultureInfo.InvariantCulture); // The output is: string expected = @"Iris-setosa =: (petal length <= 2.45) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width <= 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length <= 7.05) && (sepal width > 2.85) Iris-versicolor =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width > 3.05) Iris-virginica =: (petal length > 2.45) && (petal width <= 1.75) && (sepal length > 7.05) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length > 5.95) Iris-virginica =: (petal length > 2.45) && (petal width > 1.75) && (sepal length <= 5.95) && (sepal width <= 3.05) "; #endregion Assert.AreEqual(0.026666666666666668, error, 1e-10); Assert.AreEqual(4, tree.NumberOfInputs); Assert.AreEqual(3, tree.NumberOfOutputs); double newError = ComputeError(rules, inputs, outputs); Assert.AreEqual(0.026666666666666668, newError, 1e-10); Assert.AreEqual(expected, ruleText); }