public Node buildBranch_Thresh(Node root, Node pivot, Node[] nodes)
        {
            int       colID = pivot.attribute.Ordinal;
            DataTable lowerOrEqualTable2 = pivot.subTable.Clone();
            DataTable higherTable2       = pivot.subTable.Clone();

            foreach (DataRow r in pivot.subTable.Rows)
            {
                string value = r[colID].ToString();
                double d     = double.Parse(value);
                if (d <= pivot.threshold)
                {
                    lowerOrEqualTable2.ImportRow(r);
                }
                else
                {
                    higherTable2.ImportRow(r);
                }
            }
            //Build new branch using new threshold
            DecisionTree dt = new DecisionTree();

            pivot.children.Nodes[0]           = dt.RunC4_5(lowerOrEqualTable2, pivot.subTableAttributes); //Rebuild left sub-tree
            pivot.children.Nodes[0].parent    = pivot;
            pivot.children.Nodes[0].parentRef = 0;
            pivot.children.Nodes[1]           = dt.RunC4_5(higherTable2, pivot.subTableAttributes); //Rebuild right sub-tree
            pivot.children.Nodes[1].parent    = pivot;
            pivot.children.Nodes[1].parentRef = 1;

            nodeList.Clear();
            nodes = root.flattenTree(root);
            nodeList.Add(root);

            //Replace old branch with new branch
            if (pivot.parent != null && pivot.children.Nodes.Count != 0)
            {  //if selected node is not root or a leaf
                pivot.parent.children.Nodes[pivot.parentRef] = pivot;
            }
            else
            {
                Console.WriteLine("pivot.parent == null" + pivot.parent == null);
                return(root);
            }
            return(root);
        }
        public Node buildBranch_Attribute(Node root, Node pivot, Node[] nodes)
        {
            DecisionTree dt = new DecisionTree();
            int          p  = pivot.parentRef;

            pivot           = dt.RunC4_5(pivot.subTable, pivot.subTableAttributes, pivot.attribute); //parent and parent ref will remain the same
            pivot.parentRef = p;
            nodeList.Clear();
            nodes = root.flattenTree(root);
            nodeList.Add(root);

            return(root);
        }
        public void run_KMeans(string file_path, int k, out double train, out double valid, out double test, out double size, out double rmse)
        {
            Accuracies_training.Clear();
            Accuracies_validation.Clear();
            Accuracies_testing.Clear();
            trees.Clear();
            tree_sizes.Clear();
            rmse_list.Clear();

            DataLoader   d    = new DataLoader();
            DecisionTree tree = new DecisionTree();
            Accuracy     a    = new Accuracy();

            d.get_K_Partitions(file_path, k);       //fills d.partitions with k even partitions of the dataset (each contains a header row)

            for (int i = 0; i < k; i++)             //for each partition configuration

            {
                Console.WriteLine("Partition  " + i + " / " + k + "   ---------------------------------------------------------------");

                List <string> training_data   = new List <string>();
                List <string> testing_data    = new List <string>();
                List <string> validation_data = new List <string>();

                training_data.Add(d.title_row);                        //Add title row to top of training set

                for (int j = 0; j < k; j++)
                {
                    if (j != i)                                        //Iteratively keep one partition to be used as the test set
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            training_data.Add(d.partitions[j][z]);
                        }
                    }
                    else
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            testing_data.Add(d.partitions[j][z]);
                        }
                    }
                }

                //Reserve 50% of the training data to be the validation set (move the rows to validation_data)
                int s = training_data.Count / 2;
                validation_data = training_data.GetRange(training_data.Count - s, s);
                training_data.RemoveRange(training_data.Count - s, s);

                DataTable         x = d.CreateTable(training_data.ToArray()); //input: string[] output: DataTable
                List <DataColumn> all_attributes = d.getAllAttributes(x);
                Node root = tree.root = tree.RunC4_5(x, all_attributes);
                root.isRoot = true;                                           //Set identifier of the root
                root.pruneTree(root);
                trees.Add(root);

                training_data.RemoveAt(0);

                List <string> validation_subset = getValidationSubset(validation_data);

                //Optimise with respect to the validation set
                for (int it = 0; it < 10000; it++)
                {
                    /////////////////////////////////////////////////SELECT OBJECTIVE FUNCTION///////////////////////////////////////////////////////////
                    //root = root.randomMutateAndRebuild_Accuracy(root);                   //Objective Function: Maximise Accuracy (regardless of size)
                    //root = root.randomMutateAndRebuild_RMSE(root);                       //Objective Function: Minimise RMSE (For regression trees, regardless of size)

                    //PARETO FRONT
                    //The below objective function is a pareto front. It minimises the size of the tree while also increasing accuracy (if either remain stable, the change is accepted)
                    if ((it % 100) == 0)
                    {
                        validation_subset = getValidationSubset(validation_data);
                    }                                                                                                   //Randomise validation subset every x iterations
                    root = root.randomMutateAndRebuild_Size(root, validation_subset.ToArray());                         //Objective Function: Minimise size of the tree (number of nodes)
                    //force a mutation here if the accuracy has not improved in the last 100 iterations, for instance...
                }

                //Save the accuracies of each partition
                Accuracies_training.Add(a.GetAccuracy(root, training_data.ToArray()));
                Accuracies_validation.Add(a.GetAccuracy(root, validation_data.ToArray()));
                Accuracies_testing.Add(a.GetAccuracy(root, testing_data.ToArray()));
                tree_sizes.Add(Convert.ToDouble(root.flattenTree(root).Length));
                rmse_list.Add(a.getRMSE(root, testing_data.ToArray()));

                x.Clear();      //Clear DataTable so that we can begin the next C4.5 run - on the next partition
            }

            Console.WriteLine("\n\n");
            Console.WriteLine("Final report: ");

            double training_total = 0;

            foreach (double q in Accuracies_training.Reverse <double>())
            {
                if (q != 0)
                {
                    training_total += q;
                }
                else
                {
                    Accuracies_training.Remove(q);
                }
            }
            double average_training_accuracy = training_total / Accuracies_training.Count;

            double validation_total = 0;

            foreach (double q in Accuracies_validation.Reverse <double>())
            {
                if (q != 0)
                {
                    validation_total += q;
                }
                else
                {
                    Accuracies_validation.Remove(q);
                }
            }
            double average_validation_accuracy = validation_total / Accuracies_validation.Count;

            double testing_total     = 0;
            double highest_acc       = double.NegativeInfinity;
            int    highest_acc_index = 0;

            for (int t = 0; t < Accuracies_testing.Count; t++)
            {
                if (Accuracies_testing[t] != 0)
                {
                    testing_total += Accuracies_testing[t];
                    if (Accuracies_testing[t] > highest_acc)
                    {
                        highest_acc = Accuracies_testing[t]; highest_acc_index = t;
                    }
                }
                else
                {
                    Accuracies_testing.RemoveAt(t);
                }
            }

            double average_testing_accuracy = testing_total / Accuracies_testing.Count;

            double tot = 0;

            foreach (double i in tree_sizes)
            {
                tot += i / 2;
            }

            double average_size = tot / tree_sizes.Count;

            double tot_rmse = 0;

            foreach (double r in rmse_list.Reverse <double>())
            {
                if (r != 0)
                {
                    tot_rmse += r;
                }
                else
                {
                    rmse_list.Remove(r);
                }
            }

            double average_rmse = tot_rmse / rmse_list.Count;

            //Set 'out' variables for collection
            train = average_training_accuracy;
            valid = average_validation_accuracy;
            test  = average_testing_accuracy;
            size  = average_size;
            rmse  = average_rmse;


            Console.WriteLine("Training accuracies:");
            foreach (double p in Accuracies_training)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Validation accuracies:");
            foreach (double p in Accuracies_validation)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Testing accuracies:");
            foreach (double p in Accuracies_testing)
            {
                Console.WriteLine(p);
            }


            Console.WriteLine("Average training accuracy: " + average_training_accuracy);
            Console.WriteLine("Average validation accuracy: " + average_validation_accuracy);
            Console.WriteLine("Average testing accuracy: " + average_testing_accuracy);
            Console.WriteLine("Average tree size: " + average_size);

            Console.WriteLine("Printed tree (highest test accuracy) :  " + Accuracies_testing[highest_acc_index]);

            //Visualise the tree with the highest test accuracy
            DOT_file_generator df = new DOT_file_generator();

            df.createDOTfile(trees[highest_acc_index]);
        }