C# (CSharp) Decision_Tree_Optimisation_Generalised DecisionTree 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: Decision_Tree_Optimisation_Generalised

클래스/타입: DecisionTree

hotexamples.com에서의 예제들: 3

C# (CSharp) Decision_Tree_Optimisation_Generalised DecisionTree - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Decision_Tree_Optimisation_Generalised.DecisionTree에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

RunC4_5(3)

runProgram(1)

run_KMeans(1)

예제 #1

파일 보기

파일: DecisionTree.cs 프로젝트: aprippington/C4.5-Algorithm-Optimisation-Framework-and-Visualiser

        public void runProgram()
        {
            var watch = new System.Diagnostics.Stopwatch();

            watch.Start();

            /*
             * //PRINT CONSOLE OUTPUT TO CSV
             * FileStream filestream = new FileStream("console_data.csv", FileMode.Create);   //Outputs console text to CSV file: "console_data.csv"
             * var streamwriter = new StreamWriter(filestream);
             * streamwriter.AutoFlush = true;
             * Console.SetOut(streamwriter);
             * Console.SetError(streamwriter);*/

            List <double> trains = new List <double>();
            List <double> valids = new List <double>();
            List <double> tests  = new List <double>();
            List <double> sizes  = new List <double>();
            List <double> rmses  = new List <double>();

            DecisionTree tree = new DecisionTree();



            for (int i = 0; i < 10; i++)
            {
                tree.run_KMeans("PATH_TO_TARGET_DATASET", 10, out double train, out double valid, out double test, out double size, out double rmse);
                trains.Add(train);
                valids.Add(valid);
                tests.Add(test);
                sizes.Add(size);
                rmses.Add(rmse);
                Console.WriteLine("---------------------------------- ITERATION: " + i + "  -----------------------------------");
            }

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine("\n");
                Console.WriteLine("Iteration: " + i + "\n" + "Train:   " + trains[i] + "\n" + "Valid:   " + valids[i] + "\n" + "Test:   " + tests[i] + "\n" + "Size:   " + sizes[i] + "\n" + "RMSE:   " + rmses[i]);
            }

            //Create new Excel file containing test results
            XLWorkbook workbook = new XLWorkbook();
            DataTable  table    = new DataTable("table");


            DataColumn column1 = new DataColumn();

            column1.DataType    = typeof(double);
            column1.ColumnName  = "Training Accuracy";
            column1.Unique      = false;
            column1.AllowDBNull = false;

            DataColumn column2 = new DataColumn();

            column2.DataType    = typeof(double);
            column2.ColumnName  = "Validation Accuracy";
            column2.Unique      = false;
            column2.AllowDBNull = false;

            DataColumn column3 = new DataColumn();

            column3.DataType    = typeof(double);
            column3.ColumnName  = "Test Accuracy";
            column3.Unique      = false;
            column3.AllowDBNull = false;

            DataColumn column4 = new DataColumn();

            column4.DataType    = typeof(double);
            column4.ColumnName  = "Average Size";
            column4.Unique      = false;
            column4.AllowDBNull = false;


            DataColumn column5 = new DataColumn();

            column5.DataType    = typeof(double);
            column5.ColumnName  = "RMSE";
            column5.Unique      = false;
            column5.AllowDBNull = false;

            table.Columns.Add(column1);
            table.Columns.Add(column2);
            table.Columns.Add(column3);
            table.Columns.Add(column4);
            table.Columns.Add(column5);

            for (int i = 0; i < trains.Count; i++)
            {
                DataRow row = table.NewRow();
                row[0] = trains[i];
                row[1] = valids[i];
                row[2] = tests[i];
                row[3] = sizes[i];
                row[4] = rmses[i];
                table.Rows.Add(row);
            }

            workbook.Worksheets.Add(table);
            workbook.SaveAs("Example_statistics_output");

            watch.Stop();
            TimeSpan ts          = watch.Elapsed;
            string   elapsedTime = String.Format("{0:00}:{1:00}:{2:00}",
                                                 ts.Hours, ts.Minutes, ts.Seconds);

            Console.WriteLine("RunTime " + elapsedTime);

            TimeSpan tt           = watch.Elapsed / 100;
            string   elapsedTime2 = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
                                                  tt.Hours, tt.Minutes, tt.Seconds, tt.Milliseconds / 10);

            Console.WriteLine("Avg. training time (seconds) :  " + elapsedTime2);
        }

예제 #2

파일 보기

파일: Program.cs 프로젝트: aprippington/C4.5-Algorithm-Optimisation-Framework-and-Visualiser

        static void Main(string[] args)
        {
            DecisionTree t = new DecisionTree();

            t.runProgram();
        }

예제 #3

파일 보기

파일: DecisionTree.cs 프로젝트: aprippington/C4.5-Algorithm-Optimisation-Framework-and-Visualiser

        public void run_KMeans(string file_path, int k, out double train, out double valid, out double test, out double size, out double rmse)
        {
            Accuracies_training.Clear();
            Accuracies_validation.Clear();
            Accuracies_testing.Clear();
            trees.Clear();
            tree_sizes.Clear();
            rmse_list.Clear();

            DataLoader   d    = new DataLoader();
            DecisionTree tree = new DecisionTree();
            Accuracy     a    = new Accuracy();

            d.get_K_Partitions(file_path, k);       //fills d.partitions with k even partitions of the dataset (each contains a header row)

            for (int i = 0; i < k; i++)             //for each partition configuration

            {
                Console.WriteLine("Partition  " + i + " / " + k + "   ---------------------------------------------------------------");

                List <string> training_data   = new List <string>();
                List <string> testing_data    = new List <string>();
                List <string> validation_data = new List <string>();

                training_data.Add(d.title_row);                        //Add title row to top of training set

                for (int j = 0; j < k; j++)
                {
                    if (j != i)                                        //Iteratively keep one partition to be used as the test set
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            training_data.Add(d.partitions[j][z]);
                        }
                    }
                    else
                    {
                        for (int z = 0; z < d.partitions[j].Length; z++)
                        {
                            testing_data.Add(d.partitions[j][z]);
                        }
                    }
                }

                //Reserve 50% of the training data to be the validation set (move the rows to validation_data)
                int s = training_data.Count / 2;
                validation_data = training_data.GetRange(training_data.Count - s, s);
                training_data.RemoveRange(training_data.Count - s, s);

                DataTable         x = d.CreateTable(training_data.ToArray()); //input: string[] output: DataTable
                List <DataColumn> all_attributes = d.getAllAttributes(x);
                Node root = tree.root = tree.RunC4_5(x, all_attributes);
                root.isRoot = true;                                           //Set identifier of the root
                root.pruneTree(root);
                trees.Add(root);

                training_data.RemoveAt(0);

                List <string> validation_subset = getValidationSubset(validation_data);

                //Optimise with respect to the validation set
                for (int it = 0; it < 10000; it++)
                {
                    /////////////////////////////////////////////////SELECT OBJECTIVE FUNCTION///////////////////////////////////////////////////////////
                    //root = root.randomMutateAndRebuild_Accuracy(root);                   //Objective Function: Maximise Accuracy (regardless of size)
                    //root = root.randomMutateAndRebuild_RMSE(root);                       //Objective Function: Minimise RMSE (For regression trees, regardless of size)

                    //PARETO FRONT
                    //The below objective function is a pareto front. It minimises the size of the tree while also increasing accuracy (if either remain stable, the change is accepted)
                    if ((it % 100) == 0)
                    {
                        validation_subset = getValidationSubset(validation_data);
                    }                                                                                                   //Randomise validation subset every x iterations
                    root = root.randomMutateAndRebuild_Size(root, validation_subset.ToArray());                         //Objective Function: Minimise size of the tree (number of nodes)
                    //force a mutation here if the accuracy has not improved in the last 100 iterations, for instance...
                }

                //Save the accuracies of each partition
                Accuracies_training.Add(a.GetAccuracy(root, training_data.ToArray()));
                Accuracies_validation.Add(a.GetAccuracy(root, validation_data.ToArray()));
                Accuracies_testing.Add(a.GetAccuracy(root, testing_data.ToArray()));
                tree_sizes.Add(Convert.ToDouble(root.flattenTree(root).Length));
                rmse_list.Add(a.getRMSE(root, testing_data.ToArray()));

                x.Clear();      //Clear DataTable so that we can begin the next C4.5 run - on the next partition
            }

            Console.WriteLine("\n\n");
            Console.WriteLine("Final report: ");

            double training_total = 0;

            foreach (double q in Accuracies_training.Reverse <double>())
            {
                if (q != 0)
                {
                    training_total += q;
                }
                else
                {
                    Accuracies_training.Remove(q);
                }
            }
            double average_training_accuracy = training_total / Accuracies_training.Count;

            double validation_total = 0;

            foreach (double q in Accuracies_validation.Reverse <double>())
            {
                if (q != 0)
                {
                    validation_total += q;
                }
                else
                {
                    Accuracies_validation.Remove(q);
                }
            }
            double average_validation_accuracy = validation_total / Accuracies_validation.Count;

            double testing_total     = 0;
            double highest_acc       = double.NegativeInfinity;
            int    highest_acc_index = 0;

            for (int t = 0; t < Accuracies_testing.Count; t++)
            {
                if (Accuracies_testing[t] != 0)
                {
                    testing_total += Accuracies_testing[t];
                    if (Accuracies_testing[t] > highest_acc)
                    {
                        highest_acc = Accuracies_testing[t]; highest_acc_index = t;
                    }
                }
                else
                {
                    Accuracies_testing.RemoveAt(t);
                }
            }

            double average_testing_accuracy = testing_total / Accuracies_testing.Count;

            double tot = 0;

            foreach (double i in tree_sizes)
            {
                tot += i / 2;
            }

            double average_size = tot / tree_sizes.Count;

            double tot_rmse = 0;

            foreach (double r in rmse_list.Reverse <double>())
            {
                if (r != 0)
                {
                    tot_rmse += r;
                }
                else
                {
                    rmse_list.Remove(r);
                }
            }

            double average_rmse = tot_rmse / rmse_list.Count;

            //Set 'out' variables for collection
            train = average_training_accuracy;
            valid = average_validation_accuracy;
            test  = average_testing_accuracy;
            size  = average_size;
            rmse  = average_rmse;


            Console.WriteLine("Training accuracies:");
            foreach (double p in Accuracies_training)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Validation accuracies:");
            foreach (double p in Accuracies_validation)
            {
                Console.WriteLine(p);
            }
            Console.WriteLine("Testing accuracies:");
            foreach (double p in Accuracies_testing)
            {
                Console.WriteLine(p);
            }


            Console.WriteLine("Average training accuracy: " + average_training_accuracy);
            Console.WriteLine("Average validation accuracy: " + average_validation_accuracy);
            Console.WriteLine("Average testing accuracy: " + average_testing_accuracy);
            Console.WriteLine("Average tree size: " + average_size);

            Console.WriteLine("Printed tree (highest test accuracy) :  " + Accuracies_testing[highest_acc_index]);

            //Visualise the tree with the highest test accuracy
            DOT_file_generator df = new DOT_file_generator();

            df.createDOTfile(trees[highest_acc_index]);
        }