public Node randomMutateAndRebuild_Accuracy(Node root, string[] r)
        {
            Node copy = root.getDeepCopy(root);

            copy.isRoot = true;

            nodeList.Clear();
            Node[] nodes = root.flattenTree(root);
            nodeList.Add(root);
            Accuracy a = new Accuracy();

            initialAcc = a.GetAccuracy(root, r);     //Store accuracy of tree before mutation
            double initialThresh = 9.999;
            int    code          = 0;
            Node   pivot         = new Node();

            int       selectedNodeIndex = new Random().Next(0, nodes.Length);
            DataTable initialTable      = nodes[selectedNodeIndex].subTable.Copy();

            int mutatorID = rnd.Next(0, 2);                  //2 set as (exclusive) upper bound

            //int mutatorID = 1;
            if (mutatorID == 0)
            {
                zero_count += 1;
            }
            if (mutatorID == 1)
            {
                one_count += 1;
            }
            if (mutatorID == 0)
            {
                pivot = changeThreshold(nodes, selectedNodeIndex, out double initialT, out int c, root); initialThresh = initialT; code = c;
            }
            if (mutatorID == 1)
            {
                pivot = changeAttribute(nodes, selectedNodeIndex, out int c); code = c;
            }
            if (code == 1)
            {
                return(root);
            }                               //If mutator picked an invalid node, return the root.
            if (mutatorID == 0)             //Remove and rebuild sub-tree of pivot if threshold was mutated
            {
                buildBranch_Thresh(root, pivot, nodes);
            }
            if (mutatorID == 1)
            {
                buildBranch_Attribute(root, pivot, nodes);
            }
            newAcc = a.GetAccuracy(root, r);
            if (newAcc >= initialAcc)
            {
                root.pruneTree(root);          //Prune tree after successful mutation
                return(root);
            }

            if (newAcc < initialAcc)
            {
                //If the accuracy has worsened, roll back the change
                pivot.threshold = initialThresh;
                root            = copy;
                double newAcc2 = a.GetAccuracy(root, r);
                if (newAcc2 < initialAcc) //CATCH EXCEPTION
                {
                    int higher = 0;
                    int lower  = 0;
                    int colID  = pivot.attribute.Ordinal;
                    foreach (DataRow row in initialTable.Rows)
                    {
                        string value = row[colID].ToString();
                        double d     = double.Parse(value);
                        if (d <= pivot.threshold)
                        {
                            lower += 1;
                        }
                        else
                        {
                            higher += 1;
                        }
                    }
                    throw new Exception();
                }
            }
            return(root);
        }
        public Node randomMutateAndRebuild_Size(Node root, string[] r)
        {
            Node copy = root.getDeepCopy(root);

            copy.isRoot = true;

            nodeList.Clear();
            nodeList.Add(root);
            Node[] nodes = root.flattenTree(root);

            Accuracy a           = new Accuracy();
            int      initialSize = nodes.Length;

            initialAcc = a.GetAccuracy(root, r);
            double initialThresh = 9.999;
            int    code          = 0;
            Node   pivot         = new Node();

            int       selectedNodeIndex = new Random().Next(0, nodes.Length);
            DataTable initialTable      = nodes[selectedNodeIndex].subTable.Copy();

            int mutatorID = rnd.Next(0, 2);

            if (mutatorID == 0)
            {
                pivot = changeThreshold(nodes, selectedNodeIndex, out double initialT, out int c, root); initialThresh = initialT; code = c;
            }
            if (mutatorID == 1)
            {
                pivot = changeAttribute(nodes, selectedNodeIndex, out int c); code = c;
            }
            if (code == 1)
            {
                return(root);
            }
            if (mutatorID == 0)
            {
                buildBranch_Thresh(root, pivot, nodes);
            }
            if (mutatorID == 1)
            {
                buildBranch_Attribute(root, pivot, nodes);
            }
            newAcc = a.GetAccuracy(root, r);
            root.pruneTree(root);
            nodeList.Clear();
            nodeList.Add(root);
            Node[] nodes_2 = root.flattenTree(root);
            int    newSize = nodes_2.Length;

            if (newAcc >= initialAcc && newSize <= initialSize)
            {
                return(root);
            }
            if (newAcc < initialAcc || newSize > initialSize)
            {
                pivot.threshold = initialThresh;
                root            = copy;
                double newAcc2 = a.GetAccuracy(root, r);
                if (newAcc2 < initialAcc)
                {
                    int higher = 0;
                    int lower  = 0;
                    int colID  = pivot.attribute.Ordinal;
                    foreach (DataRow roq in initialTable.Rows)
                    {
                        string value = roq[colID].ToString();
                        double d     = double.Parse(value);
                        if (d <= pivot.threshold)
                        {
                            lower += 1;
                        }
                        else
                        {
                            higher += 1;
                        }
                    }
                    throw new Exception();
                }
            }
            return(root);
        }
        public void run_KMeans(string file_path, int k, out double train, out double valid, out double test, out double size, out double rmse)
        {
            Accuracies_training.Clear();
            Accuracies_validation.Clear();
            Accuracies_testing.Clear();
            trees.Clear();
            tree_sizes.Clear();
            rmse_list.Clear();

            DataLoader   d    = new DataLoader();
            DecisionTree tree = new DecisionTree();
            Accuracy     a    = new Accuracy();

            d.get_K_Partitions(file_path, k);       //fills d.partitions with k even partitions of the dataset (each contains a header row)

            for (int i = 0; i < k; i++)             //for each partition configuration

            {
                Console.WriteLine("Partition  " + i + " / " + k + "   ---------------------------------------------------------------");

                List <string> training_data   = new List <string>();
                List <string> testing_data    = new List <string>();
                List <string> validation_data = new List <string>();

                training_data.Add(d.title_row);                        //Add title row to top of training set

                for (int j = 0; j < k; j++)
                {
                    if (j != i)                                        //Iteratively keep one partition to be used as the test set
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            training_data.Add(d.partitions[j][z]);
                        }
                    }
                    else
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            testing_data.Add(d.partitions[j][z]);
                        }
                    }
                }

                //Reserve 50% of the training data to be the validation set (move the rows to validation_data)
                int s = training_data.Count / 2;
                validation_data = training_data.GetRange(training_data.Count - s, s);
                training_data.RemoveRange(training_data.Count - s, s);

                DataTable         x = d.CreateTable(training_data.ToArray()); //input: string[] output: DataTable
                List <DataColumn> all_attributes = d.getAllAttributes(x);
                Node root = tree.root = tree.RunC4_5(x, all_attributes);
                root.isRoot = true;                                           //Set identifier of the root
                root.pruneTree(root);
                trees.Add(root);

                training_data.RemoveAt(0);

                List <string> validation_subset = getValidationSubset(validation_data);

                //Optimise with respect to the validation set
                for (int it = 0; it < 10000; it++)
                {
                    /////////////////////////////////////////////////SELECT OBJECTIVE FUNCTION///////////////////////////////////////////////////////////
                    //root = root.randomMutateAndRebuild_Accuracy(root);                   //Objective Function: Maximise Accuracy (regardless of size)
                    //root = root.randomMutateAndRebuild_RMSE(root);                       //Objective Function: Minimise RMSE (For regression trees, regardless of size)

                    //PARETO FRONT
                    //The below objective function is a pareto front. It minimises the size of the tree while also increasing accuracy (if either remain stable, the change is accepted)
                    if ((it % 100) == 0)
                    {
                        validation_subset = getValidationSubset(validation_data);
                    }                                                                                                   //Randomise validation subset every x iterations
                    root = root.randomMutateAndRebuild_Size(root, validation_subset.ToArray());                         //Objective Function: Minimise size of the tree (number of nodes)
                    //force a mutation here if the accuracy has not improved in the last 100 iterations, for instance...
                }

                //Save the accuracies of each partition
                Accuracies_training.Add(a.GetAccuracy(root, training_data.ToArray()));
                Accuracies_validation.Add(a.GetAccuracy(root, validation_data.ToArray()));
                Accuracies_testing.Add(a.GetAccuracy(root, testing_data.ToArray()));
                tree_sizes.Add(Convert.ToDouble(root.flattenTree(root).Length));
                rmse_list.Add(a.getRMSE(root, testing_data.ToArray()));

                x.Clear();      //Clear DataTable so that we can begin the next C4.5 run - on the next partition
            }

            Console.WriteLine("\n\n");
            Console.WriteLine("Final report: ");

            double training_total = 0;

            foreach (double q in Accuracies_training.Reverse <double>())
            {
                if (q != 0)
                {
                    training_total += q;
                }
                else
                {
                    Accuracies_training.Remove(q);
                }
            }
            double average_training_accuracy = training_total / Accuracies_training.Count;

            double validation_total = 0;

            foreach (double q in Accuracies_validation.Reverse <double>())
            {
                if (q != 0)
                {
                    validation_total += q;
                }
                else
                {
                    Accuracies_validation.Remove(q);
                }
            }
            double average_validation_accuracy = validation_total / Accuracies_validation.Count;

            double testing_total     = 0;
            double highest_acc       = double.NegativeInfinity;
            int    highest_acc_index = 0;

            for (int t = 0; t < Accuracies_testing.Count; t++)
            {
                if (Accuracies_testing[t] != 0)
                {
                    testing_total += Accuracies_testing[t];
                    if (Accuracies_testing[t] > highest_acc)
                    {
                        highest_acc = Accuracies_testing[t]; highest_acc_index = t;
                    }
                }
                else
                {
                    Accuracies_testing.RemoveAt(t);
                }
            }

            double average_testing_accuracy = testing_total / Accuracies_testing.Count;

            double tot = 0;

            foreach (double i in tree_sizes)
            {
                tot += i / 2;
            }

            double average_size = tot / tree_sizes.Count;

            double tot_rmse = 0;

            foreach (double r in rmse_list.Reverse <double>())
            {
                if (r != 0)
                {
                    tot_rmse += r;
                }
                else
                {
                    rmse_list.Remove(r);
                }
            }

            double average_rmse = tot_rmse / rmse_list.Count;

            //Set 'out' variables for collection
            train = average_training_accuracy;
            valid = average_validation_accuracy;
            test  = average_testing_accuracy;
            size  = average_size;
            rmse  = average_rmse;


            Console.WriteLine("Training accuracies:");
            foreach (double p in Accuracies_training)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Validation accuracies:");
            foreach (double p in Accuracies_validation)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Testing accuracies:");
            foreach (double p in Accuracies_testing)
            {
                Console.WriteLine(p);
            }


            Console.WriteLine("Average training accuracy: " + average_training_accuracy);
            Console.WriteLine("Average validation accuracy: " + average_validation_accuracy);
            Console.WriteLine("Average testing accuracy: " + average_testing_accuracy);
            Console.WriteLine("Average tree size: " + average_size);

            Console.WriteLine("Printed tree (highest test accuracy) :  " + Accuracies_testing[highest_acc_index]);

            //Visualise the tree with the highest test accuracy
            DOT_file_generator df = new DOT_file_generator();

            df.createDOTfile(trees[highest_acc_index]);
        }