private DecisionTree addEstimationLeaf(DecisionTree tree, List <DataInstance> subset, Node parent, string value_splitter)
        {
            // We are out of attributes to split on! We need to identify the most common classifier.
            string most_common_classifier = SetHelper.mostCommonClassifier(subset, target_attribute);

            // Adjust for the uncertainty that comes with this prediction.  We combine the certainty of classifier (percentage) with the certainty of the instances belonging here (weight).
            double percentage_with_this_classifier = (double)subset.Where(A => A.getProperty(target_attribute) == most_common_classifier).ToList().Count / (double)subset.Count;
            double certainty = 0;

            foreach (DataInstance instance in subset)
            {
                certainty += instance.getWeight();
            }
            certainty /= (double)subset.Count;
            certainty  = certainty * percentage_with_this_classifier;
            Leaf leaf = tree.addUncertainLeaf(value_splitter, most_common_classifier, parent, certainty);

            tree.data_locations[leaf] = subset;
            return(tree);
        }
Пример #2
0
        public DecisionTree TRAIN(ObservationSet set, Dictionary <string, object> parameters)
        {
            string parameter_string = "TRAIN(";

            foreach (string key in parameters.Keys.ToList())
            {
                parameter_string += $"{key}={parameters[key]}";
                if (key != parameters.Keys.Last())
                {
                    parameter_string += ",";
                }
            }
            parameter_string += ")";
            Console.WriteLine(parameter_string);

            DecisionTree tree = this.algorithm.train(set.instances, set.target_attribute, set.attributes, this, parameters);

            this.SNAPSHOT("final", tree);
            return(tree);
        }
Пример #3
0
        public static void Exemplo01()
        {
            //LINK: http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_ID3Learning.htm
            //LINK: http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_DecisionTree.htm

            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs = symbols.ToArray <int>("PlayTennis");

            // Gather information about decision variables
            DecisionVariable[] attributes =
            {
                new DecisionVariable("Outlook", 3),     // 3 possible values (Sunny, overcast, rain)
                new DecisionVariable("Temperature", 3), // 3 possible values (Hot, mild, cool)
                new DecisionVariable("Humidity", 2),    // 2 possible values (High, normal)
                new DecisionVariable("Wind", 2)         // 2 possible values (Weak, strong)
            };

            int classCount = 2; // 2 possible output values for playing tennis: yes or no

            //Create the decision tree using the attributes and classes
            DecisionTree tree = new DecisionTree(attributes, classCount);

            //Create a new instance of the ID3 algorithm
            ID3Learning id3learning = new ID3Learning(tree);

            // Learn the training instances!
            id3learning.Run(inputs, outputs);

            //Suggest you read the example in the guide carefully. At the very end of the procedure they generate the expression tree with var expression = tree.ToExpression(); and compile it:
            var expression = tree.ToExpression();
            var func       = expression.Compile();
            //The result is a delegate that you can simply execute to get a decision for a given input.In the example, you could do something like
            bool willPlayTennis = func(new double[] { 1.0, 1.0, 1.0, 1.0 }) == 1;

            int[]  query  = codebook.Translate("Sunny", "Hot", "High", "Strong");
            int    output = tree.Decide(query);
            string answer = codebook.Translate("PlayTennis", output);

            Console.WriteLine(answer);
            Console.ReadLine();

            //RESULT:
            //In the above example, answer will be "No".
        }
Пример #4
0
        private DecisionTree pruneIterate(DecisionTree tree, List <Node> queue, string target_attribute)
        {
            // Manage queue.
            Node node = queue[0];

            queue.RemoveAt(0);

            agent.THINK("consider-node-for-pruning").finish();

            // Lets consider this node.
            List <DataInstance> node_set = new List <DataInstance>();

            // Calculate error estimate of the leafs
            double leaf_estimated_errors = 0;
            int    leaf_actual_errors    = 0;

            foreach (Leaf child in SetHelper.all_leaf_children(node))
            {
                List <DataInstance> leaf_set = tree.data_locations[child];
                node_set.AddRange(leaf_set);

                // Calculate estimated error.
                int my_errors = SetHelper.subset_errors(leaf_set, target_attribute);
                leaf_actual_errors += my_errors;
                double errorRate      = Calculator.confidenceIntervalExact(my_errors, leaf_set.Count, this.confidence);
                double estimatedError = errorRate * leaf_set.Count;
                leaf_estimated_errors += estimatedError;
            }

            // Calculate estimated error of node.
            int    node_errors        = SetHelper.subset_errors(node_set, target_attribute);
            double nodeErrorRate      = Calculator.confidenceIntervalExact(node_errors, node_set.Count, this.confidence);
            double nodeEstimatedError = nodeErrorRate * node_set.Count;

            // Compare
            // If a node has a lower estimated error than its leafs, it should be pruned.
            Dictionary <string, object> state = StateRecording.generateState("estimated_prune_errors", nodeEstimatedError, "estimated_keep_errors", leaf_estimated_errors,
                                                                             "node_attribute", node.label, "node_data_size", node_set.Count, "node_id", node.identifier, "node_value_splitter", (node.value_splitter != null) ? node.value_splitter : "NULL",
                                                                             "node_threshold", (node is ContinuousNode) ? (double?)(node as ContinuousNode).threshold : null,
                                                                             "parent_id", (node.getParent() != null) ? node.getParent().identifier : "NULL", "parent_attribute", (node.getParent() != null) ? node.getParent().label : "NULL", "parent_threshold", (node.getParent() != null && node.getParent() is ContinuousNode) ? (double?)((ContinuousNode)node.getParent()).threshold : null);

            if (nodeEstimatedError < leaf_estimated_errors)
            {
                // We need to prune!
                this.prepareSnapshot(node);
                agent.THINK("prune-node").setState(state).finish();

                tree = tree.replaceNodeByNewLeaf(node);
            }
            else
            {
                agent.THINK("keep-node").setState(state).finish();
            }

            // Iterate further if necessary.
            if (queue.Count > 0)
            {
                tree = this.pruneIterate(tree, queue, target_attribute);
            }
            return(tree);
        }
        public DecisionTree iterate(DecisionTree tree, List <DataInstance> sets_todo, List <string> considerable_attributes, Node parent_node, string parent_value_splitter)
        {
            agent.THINK("iterate").finish();
            List <string> attributes_copy = new List <string>(considerable_attributes.ToArray());
            // Find best possible way to split these sets. For each attribute we will calculate the gain, and select the highest.
            string best_attr    = "UNDETERMINED";
            double highest_gain = 0;

            foreach (string attr in attributes_copy)
            {
                agent.THINK("consider-attribute").finish();
                double my_gain = Calculator.gain(sets_todo, attr, this.target_attribute, this.possible_attribute_values[attr]);

                Dictionary <string, object> state = StateRecording.generateState("current_best_attribute", best_attr, "competing_attribute", attr, "current_best_gain", highest_gain, "competing_gain", my_gain, "parent_id", (parent_node != null) ? parent_node.identifier : "NULL", "parent_attribute", (parent_node != null) ? parent_node.label : "NULL", "previous_value_split", (parent_value_splitter != null) ? parent_value_splitter : "NULL");
                if (my_gain > highest_gain)
                {
                    agent.THINK("set-new-best-attribute").setState(state).finish();
                    best_attr    = attr;
                    highest_gain = my_gain;
                }
                else
                {
                    agent.THINK("keep-old-attribute").setState(state).finish();
                }
            }
            agent.THINK("end-attribute-loop").finish();

            if (highest_gain == 0)
            {
                // This set cannot be split further.
                // We have tried all attributes so we can't go further. The tree ends here my friend.
                // This happens when instances have all attributes the same except for the classifier.

                throw new Exception("This dataset contains instances with exactly the same attribute values but different classifiers, which this algorithm does not support.");

                // I previously made an implementation of the algorithm that adds a 'Best Guess leaf' to address this problem,
                // but this is not described as such in the algorithm description and has therefore been left out for the experimentation.


                agent.THINK("add-best-guess-leaf").set("best_attribute", best_attr).set("highest_gain", 0d).set("possible_attributes", attributes_copy.Count).finish();
                string classifier_value = SetHelper.mostCommonClassifier(sets_todo, target_attribute);
                Leaf   leaf             = tree.addBestGuessLeaf(parent_value_splitter, classifier_value, parent_node);
                tree.data_locations[leaf] = sets_todo;
                return(tree);
            }

            // The best attribute to split this set is now saved in best_attr. Create a node for that.
            agent.THINK("add-node").finish();

            // Remove this attribute as a splitter for the dataset.
            attributes_copy.RemoveAt(considerable_attributes.IndexOf(best_attr));

            // Parent value splitter is to give a node an idea what it's parent splitted on. For decision rules this is needed information.
            Node new_node = tree.addNode(best_attr, parent_value_splitter, parent_node);

            // Create subsets for each possible value of the attribute we created a node for.
            int values_left = this.possible_attribute_values[best_attr].Count;

            foreach (string value_splitter in this.possible_attribute_values[best_attr])
            {
                agent.THINK("subset-on-value").finish();
                List <DataInstance>         subset            = sets_todo.Where(A => A.getProperty(best_attr) == value_splitter).ToList();
                Dictionary <string, object> considering_state = StateRecording.generateState("node_attribute", best_attr, "value_split", value_splitter, "current_node_id", new_node.identifier, "parent_node_id", (parent_node != null) ? parent_node.identifier : "NULL", "parent_attribute", (parent_node != null) ? parent_node.label : "NULL", "previous_value_split", (parent_value_splitter != null) ? parent_value_splitter : "NULL");
                if (subset.Count == 0)
                {
                    // There are no more of this subset. We need to skip this iteration.
                    agent.THINK("ignore-value").setState(considering_state).finish();
                    continue;
                }
                if (SetHelper.hasUniformClassifier(subset, target_attribute))
                {
                    // This subset doesn't have to be split anymore. We can just add it to the node as a leaf.
                    // Each leaf represents one decision rule.
                    agent.THINK("add-leaf").setState(considering_state).finish();
                    string classifier_value = subset.First().getProperty(target_attribute);
                    Leaf   leaf             = tree.addLeaf(value_splitter, classifier_value, new_node);
                    tree.data_locations[leaf] = subset;
                }
                else
                {
                    // We still haven't resolved this set. We need to iterate upon it to split it again.
                    agent.THINK("iterate-further").setState(considering_state).finish();
                    tree = this.iterate(tree, subset, attributes_copy, new_node, value_splitter);
                    // If we got here in the code then the set that was previously not all the same classifier has been resolved. We need to move up.
                }
                values_left -= 1;
            }
            agent.THINK("end-value-loop").finish();
            if (parent_node != null)
            {
                agent.THINK("return-tree-to-self").finish();
            }
            // We have succesfully split all examples on this attribute. Return the tree in its current state.
            return(tree);
        }
        private DecisionTree iterate(DecisionTree tree, List <DataInstance> set, Dictionary <string, string> attributes, Node parent, string last_split)
        {
            this.agent.THINK("iterate").finish();

            // Calculate gains and thresholds.
            Dictionary <string, Dictionary <string, double> > gains_and_thresholds = calculate_attribute_gain_ratios(set, target_attribute, attributes);
            Dictionary <string, double> thresholds = gains_and_thresholds["thresholds"];

            Tuple <string, Dictionary <string, List <DataInstance> > > attributeFound = this.findAttributeSplit(tree, set, attributes, gains_and_thresholds, parent, last_split);

            // We need to know what the best attribute to split on is, and  what the subsets of splitting on it would be.
            string best_split_attribute = attributeFound.Item1;
            Dictionary <string, List <DataInstance> > subsets = attributeFound.Item2;

            double threshold = -1000000;

            // This is to come to the same result as J48 [TODO: This has to go at some point]
            if (!have_been_at_root && best_split_attribute == "petal-length")
            {
                have_been_at_root = true;
                Console.WriteLine("Adjust to J48");
                best_split_attribute = "petal-width";
                threshold            = thresholds[best_split_attribute];
            }

            bool split_on_continuous = (best_split_attribute != "[INTERNAL_VARIABLE]-NOTFOUND") ? attributes[best_split_attribute] == "continuous" : false;

            // Check if a split attribute could even be found
            Dictionary <string, object> checkBestAttributeWasPossibleState = StateRecording.generateState("attribute_was_found", best_split_attribute == "[INTERNAL_VARIABLE]-NOTFOUND" ? "TRUE" : "FALSE", "best_attribute", best_split_attribute,
                                                                                                          "suggested_threshold", (split_on_continuous) ? (double?)thresholds[best_split_attribute] : null,
                                                                                                          "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null);

            if (best_split_attribute == "[INTERNAL_VARIABLE]-NOTFOUND")
            {
                // Okay so the subset we received could not be split such that it did not create too small of a leaf.
                // Therefore we will make an estimation leaf and move up.
                agent.THINK("add-estimation-leaf").setState(checkBestAttributeWasPossibleState).finish();
                tree = this.addEstimationLeaf(tree, set, parent, last_split);
                return(tree);
            }
            // If we got here then we did not return an estimation leaf and therefore we found a suitable attribute to split on!
            agent.THINK("add-node").setState(checkBestAttributeWasPossibleState).finish();

            if (split_on_continuous)
            {
                threshold = thresholds[best_split_attribute];
            }

            // Get started on making a node

            Dictionary <string, string> attributes_for_further_iteration = AttributeHelper.CopyAttributeDictionary(attributes);

            // We now know the best splitting attribute and how to split it.
            Node newnode = null;

            if (split_on_continuous)
            {
                newnode = tree.addContinuousNode(best_split_attribute, last_split, threshold, parent);
            }
            else
            {
                newnode = tree.addNode(best_split_attribute, last_split, parent);
                attributes_for_further_iteration.Remove(best_split_attribute);
            }

            // We now have a dictionary where each string represents the value split and the list of datainstances is the subset.
            foreach (string subset_splitter in subsets.Keys)
            {
                List <DataInstance> subset = subsets[subset_splitter];
                agent.THINK("subset-on-value").finish();

                bool uniformClassifier = false;

                if (subset.Count > 0)
                {
                    uniformClassifier = SetHelper.hasUniformClassifier(subset, target_attribute);
                }
                Dictionary <string, object> state = StateRecording.generateState("set_count", subset.Count, "set_has_uniform_classifier", (subset.Count > 0) ? (uniformClassifier ? "TRUE" : "FALSE") : "EMPTY SET", "chosen_attribute", best_split_attribute, "value_split", subset_splitter, "possible_attribute_count", attributes_for_further_iteration.Count,
                                                                                 "chosen_threshold", (split_on_continuous) ? (double?)thresholds[best_split_attribute] : null, "current_node_id", newnode.identifier,
                                                                                 "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null);

                if (subset.Count == 0)
                {
                    // There are no more of this subset. We need to skip this iteration.
                    agent.THINK("ignore-value").setState(state).finish();
                    continue;
                }

                if (uniformClassifier)
                {
                    // This subset doesn't have to be split anymore. We can just add it to the node as a leaf.
                    // Each leaf represents one decision rule.
                    string classifier_value = subset.First().getProperty(target_attribute);
                    double certainty        = 0;

                    // Calculate the certainty of this leaf. It's the average weight of the dataset.
                    foreach (DataInstance instance in subset)
                    {
                        certainty += instance.getWeight();
                    }
                    certainty /= (double)subset.Count;

                    agent.THINK("add-leaf").setState(state).finish();
                    Leaf leaf = tree.addUncertainLeaf(subset_splitter, classifier_value, newnode, certainty);
                    tree.data_locations[leaf] = subset;
                }
                else
                {
                    // We still haven't resolved this set. We need to iterate upon it to split it again.
                    if (attributes_for_further_iteration.Count == 0)
                    {
                        // If this happens than we have no more
                        agent.THINK("add-majority-leaf").setState(state).finish();
                        tree = this.addEstimationLeaf(tree, subset, newnode, subset_splitter);
                    }
                    else
                    {
                        // We still have attributes left, we can continue further!
                        agent.THINK("iterate-further").setState(state).finish();
                        tree = this.iterate(tree, subset, attributes_for_further_iteration, newnode, subset_splitter);
                    }
                    // If we got here in the code then the set that was previously not all the same classifier has been resolved.
                    // Therefore we can let the foreach continue further!
                }
            }

            // The set that we have received has been dealt with completely. We can now move up!
            agent.THINK("end-value-loop").finish();
            if (parent != null)
            {
                agent.THINK("return-tree-to-self").finish();
            }
            return(tree);
        }
        private Tuple <string, Dictionary <string, List <DataInstance> > > findAttributeSplit(DecisionTree tree, List <DataInstance> set, Dictionary <string, string> attributes, Dictionary <string, Dictionary <string, double> > gains_and_thresholds, Node parent, string last_split)
        {
            // Get gains and thresholds from parameters
            Dictionary <string, double> gains      = gains_and_thresholds["gains"];
            Dictionary <string, double> thresholds = gains_and_thresholds["thresholds"];

            // Select the best attribute to split on
            double  highest_gain_ratio   = -1;
            string  best_split_attribute = "[INTERNAL_VARIABLE]-NOTFOUND";
            Boolean split_on_continuous  = false;
            double  threshold            = 0;
            Dictionary <string, List <DataInstance> > subsets = null;

            foreach (string competing_attribute in attributes.Keys.ToList())
            {
                agent.THINK("consider-attribute").finish();
                double my_gain_ratio           = gains[competing_attribute];
                bool   competing_is_continuous = (attributes[competing_attribute] == "continuous");
                Dictionary <string, object> comparingAttributeState = StateRecording.generateState("current_best_attribute", best_split_attribute, "competing_attribute", competing_attribute, "current_best_gain", highest_gain_ratio, "competing_gain", my_gain_ratio,
                                                                                                   "current_best_threshold", (split_on_continuous) ? (double?)threshold : null, "competing_threshold", (competing_is_continuous) ? (double?)thresholds[competing_attribute] : null,
                                                                                                   "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "NULL", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null);

                if (my_gain_ratio > highest_gain_ratio)
                {
                    // This attribute has the potential to become the new best attribute, but first we need to make sure splitting on this
                    // attribute will not result in a leaf that has a subset lower than the minimum leaf size.
                    agent.THINK("propose-competing-attribute").setState(comparingAttributeState).finish();

                    Dictionary <string, List <DataInstance> > competing_subsets = (competing_is_continuous) ? SetHelper.subsetOnAttributeContinuous(set, competing_attribute, thresholds[competing_attribute]) : SetHelper.subsetOnAttributeNominal(set, competing_attribute, possible_nominal_values[competing_attribute]);
                    int subsets_above_minimum_requirement = 0;

                    foreach (string value_splitter in competing_subsets.Keys.ToList())
                    {
                        List <DataInstance> subset = competing_subsets[value_splitter];

                        // If at least one of these subsets has less instances than the minimum leaf size, then this split should NOT happen.
                        if (subset.Count >= minimum_leaf_size)
                        {
                            subsets_above_minimum_requirement++;
                        }
                    }

                    // I could not find proof of how J48 determines how many 'wrong' subsets are allowed.
                    // I found a suggestion (https://stackoverflow.com/questions/21762161/what-does-the-minnumobj-parameter-do-in-j48-classifier-weka)
                    // And that works perfectly like J48 so I assume that this is how they do it.
                    Dictionary <string, object> verifyCompetingAttributeState = StateRecording.generateState("minimum_objects", minimum_leaf_size, "valid_subset_count", subsets_above_minimum_requirement, "chosen_attribute", best_split_attribute,
                                                                                                             "suggested_threshold", (split_on_continuous) ? (double?)thresholds[best_split_attribute] : null,
                                                                                                             "parent_id", (parent != null) ? parent.identifier : "NULL", "parent_attribute", (parent != null) ? parent.label : "NULL", "previous_value_split", (last_split != null) ? last_split : "", "parent_threshold", (parent != null && parent is ContinuousNode) ? (double?)((ContinuousNode)parent).threshold : null);

                    if (subsets_above_minimum_requirement < 2)
                    {
                        // Although this attribute has a better gain ratio than the best one we have now, it also forces us to create a leaf
                        // that is below the minimum leaf size and therefore we cannot choose this one!
                        agent.THINK("disregard-competing-attribute").setState(verifyCompetingAttributeState).finish();
                    }
                    else
                    {
                        agent.THINK("allow-competing-attribute").setState(verifyCompetingAttributeState).finish();
                        highest_gain_ratio   = my_gain_ratio;
                        best_split_attribute = competing_attribute;
                        split_on_continuous  = competing_is_continuous;
                        subsets = competing_subsets;
                        if (split_on_continuous)
                        {
                            threshold = thresholds[competing_attribute];
                        }
                    }
                }
                else
                {
                    // Previous attribute had a better gain ratio
                    agent.THINK("keep-best-attribute").setState(comparingAttributeState).finish();
                }
            }
            agent.THINK("end-attribute-loop").finish();
            return(new Tuple <string, Dictionary <string, List <DataInstance> > >(best_split_attribute, subsets));
        }
Пример #8
0
 public void SNAPSHOT(string name, DecisionTree tree)
 {
     this.snapShot.Make(name, tree);
 }
Пример #9
0
        private static void ReadStudentRecords()
        {
            var csv     = new CsvReader(File.OpenText("IntakesTrainingSet.csv"));
            var records = csv.GetRecords <StudentInfo>().ToList();

            //Convert data to table
            var data = new DataTable();

            using (var reader = ObjectReader.Create(records))
            {
                data.Load(reader);
            }

            // Loop through each column in data
            foreach (DataColumn column in data.Columns)
            {
                // Replace empty with underscore
                column.ColumnName = column.ColumnName.Replace(" ", "_");
            }

            // Create a new codification codebook to
            // convert strings into integer symbols
            Codification codebook = new Codification(data);

            // Translate our training data into integer symbols using our codebook:
            DataTable symbols = codebook.Apply(data);

            int[][] inputs = symbols.ToJagged <int>(
                "was_aanwezig",
                //"gewogen_gemiddelde",
                "competenties",
                "capaciteiten",
                "intr_motivatie",
                "extr_motivatie",
                "is_mbo_deficient",
                "persoonlijk_bijspijker_advies",
                "Aanmelden_voor_verkort_opleidingstraject"
                );

            int[] outputs = symbols.ToMatrix <int>("advies").GetColumn(0);

            var id3 = new ID3Learning()
            {
                new DecisionVariable("was_aanwezig", 2),
                //new DecisionVariable("gewogen_gemiddelde", codebook.Columns.First(c => c.ColumnName == "gewogen_gemiddelde").NumberOfSymbols),
                new DecisionVariable("competenties", 10),
                new DecisionVariable("capaciteiten", 10),
                new DecisionVariable("intr_motivatie", 10),
                new DecisionVariable("extr_motivatie", 10),
                new DecisionVariable("is_mbo_deficient", 2),
                new DecisionVariable("persoonlijk_bijspijker_advies", 2),
                new DecisionVariable("Aanmelden_voor_verkort_opleidingstraject", 2)
            };

            DecisionTree tree = id3.Learn(inputs, outputs);

            //Now that we have a decision tree, load in the test set and test
            csv = new CsvReader(File.OpenText("IntakesTestSet.csv"));
            var testRecords = csv.GetRecords <StudentInfo>();

            foreach (StudentInfo record in testRecords)
            {
                //Transform the values of the test set into the same internal values used in the training set
                int[] query = codebook.Transform(new[, ]
                {
                    { "was_aanwezig", record.was_aanwezig },
                    //{ "gewogen_gemiddelde", record.gewogen_gemiddelde },
                    { "competenties", record.competenties },
                    { "capaciteiten", record.capaciteiten },
                    { "intr_motivatie", record.intr_motivatie },
                    { "extr_motivatie", record.extr_motivatie },
                    { "is_mbo_deficient", record.is_mbo_deficient },
                    { "persoonlijk_bijspijker_advies", record.persoonlijk_bijspijker_advies },
                    { "Aanmelden_voor_verkort_opleidingstraject", record.Aanmelden_voor_verkort_opleidingstraject },
                }
                                                 );

                int predicted = tree.Decide(query);

                string answer;

                try
                {
                    answer = codebook.Revert("advies", predicted);
                }
                catch (KeyNotFoundException)
                {
                    Console.WriteLine($"Could not generate advice for student {record.studentnummer}");
                    continue;
                }

                Console.WriteLine($"Student {record.studentnummer}: {answer}");
            }
        }