Exemplo n.º 1
0
        public Data(StreamReader r, StreamReader r2, StreamReader eval, StreamReader train_ID, StreamReader test_ID, StreamReader eval_ID, int depth, Random rand, int ForestSize)
        {
            Forest = new List <BaggedForest>();
            Training_Data_Forest = new List <TrainingData>();
            data_1        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            data_2        = new List <Entry>();
            Test_Data     = new List <TrainingData>();
            SetData(r, r2);
            SetTrainingData();
            Training_Data_Forest = Training_Data;

            for (int i = 0; i < ForestSize; i++)
            {
                ShuffleForestData(rand);
                data_1        = new List <Entry>();
                Training_Data = new List <TrainingData>();
                data_2        = new List <Entry>();
                Test_Data     = new List <TrainingData>();
                SetData(r, r2);
                SetTrainingData();
                List <TrainingData> trainingDataHelper = Training_Data_Forest.GetRange(0, 1000);
                Tree = new DecisionTree(ref trainingDataHelper, depth, rand);
                Tree.CollapseTree();
                List <TrainingData> testDataHelper = Test_Data;
                Error         = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
                Test_Accuracy = 100 - Error;
                Depth         = Tree.DetermineDepth(0);
                List <Prediction> Test_Predictions = SetPredictions(test_ID, Tree.Labels);

                Tree.Labels        = new List <int>();
                trainingDataHelper = Training_Data;
                Tree.DetermineError(ref trainingDataHelper);
                double            train_accuracy    = 100 - ((Convert.ToDouble(Tree.DetermineError(ref trainingDataHelper)) / Convert.ToDouble(Training_Data.Count)) * 100);
                List <Prediction> Train_Predictions = SetPredictions(train_ID, Tree.Labels);

                data_1        = new List <Entry>();
                data_2        = new List <Entry>();
                Training_Data = new List <TrainingData>();
                SetData(eval);
                SetTrainingData();
                Tree.Labels        = new List <int>();
                trainingDataHelper = Training_Data;
                double            eval_accuracy    = 100 - ((Convert.ToDouble(Tree.DetermineError(ref trainingDataHelper)) / Convert.ToDouble(Training_Data.Count)) * 100);
                List <Prediction> Eval_Predictions = SetPredictions(eval_ID, Tree.Labels);

                Forest.Add(new BaggedForest(train_accuracy, Train_Predictions, Test_Accuracy, Test_Predictions, eval_accuracy, Eval_Predictions));

                if (i % 5 == 0)
                {
                    Console.WriteLine(i);
                }
            }
            data_3 = data_1;
            data_1 = new List <Entry>();
            data_2 = new List <Entry>();
            SetData(r, r2);
        }
Exemplo n.º 2
0
        public Data(StreamReader train, StreamReader test, StreamReader eval, StreamReader eval_ID, int depth, Random r)
        {
            double temp_error1;
            double temp_error2;
            double temp_error3;
            double temp_error4;
            double temp_error5;

            Cross_Validate_Data = new List <Entry>();
            Predictions         = new List <Prediction>();
            Predictions2        = new List <Prediction>();
            Predictions3        = new List <Prediction>();
            Predictions4        = new List <Prediction>();
            Predictions5        = new List <Prediction>();
            Predictions_Average = new List <Prediction>();
            Cross_1             = new List <Entry>();
            Cross_2             = new List <Entry>();
            Cross_3             = new List <Entry>();
            Cross_4             = new List <Entry>();
            Cross_5             = new List <Entry>();
            Accuracies          = new List <double>();
            SetValidateData(train, test, r);

            #region First Fold
            data_1        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            data_2        = new List <Entry>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_4))).ToList();
            data_2 = Cross_5;
            SetTrainingData();

            List <TrainingData> trainingDataHelper = Training_Data;
            Tree = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree.CollapseTree();

            List <TrainingData> testDataHelper = Test_Data;
            temp_error1   = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree.Accuracy = 100 - temp_error1;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree.Labels        = new List <int>();
            trainingDataHelper = Training_Data;
            Tree.DetermineError(ref trainingDataHelper);
            Predictions = SetPredictions(eval_ID, Tree.Labels);
            #endregion

            #region Second Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_5))).ToList();
            data_2 = Cross_4;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree2 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree2.CollapseTree();
            testDataHelper = Test_Data;
            temp_error2    = (Convert.ToDouble(Tree2.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree2.Accuracy = 100 - temp_error2;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree2.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree2.DetermineError(ref trainingDataHelper);
            Predictions2 = SetPredictions(eval_ID, Tree2.Labels);
            #endregion

            #region Third Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_3;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree3 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree3.CollapseTree();
            testDataHelper = Test_Data;
            temp_error3    = (Convert.ToDouble(Tree3.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree3.Accuracy = 100 - temp_error3;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree3.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree3.DetermineError(ref trainingDataHelper);
            Predictions3 = SetPredictions(eval_ID, Tree3.Labels);
            #endregion

            #region Fourth Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_2;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree4 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree4.CollapseTree();
            testDataHelper = Test_Data;
            temp_error4    = (Convert.ToDouble(Tree4.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree4.Accuracy = 100 - temp_error4;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree4.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree4.DetermineError(ref trainingDataHelper);
            Predictions4 = SetPredictions(eval_ID, Tree4.Labels);
            #endregion

            #region Fifth Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_2.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_1;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree5 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree5.CollapseTree();
            testDataHelper = Test_Data;
            temp_error5    = (Convert.ToDouble(Tree5.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree5.Accuracy = 100 - temp_error5;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree5.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree5.DetermineError(ref trainingDataHelper);
            Predictions5 = SetPredictions(eval_ID, Tree5.Labels);
            #endregion

            SetAveragedPredictions();
            StandardDeviation = CalculateStandardDeviation(1 - temp_error1, 1 - temp_error2, 1 - temp_error3, 1 - temp_error4, 1 - temp_error5);
            //Console.WriteLine(temp_error1);
            //Console.WriteLine(temp_error2);
            //Console.WriteLine(temp_error3);
            //Console.WriteLine(temp_error4);
            Error = (temp_error1 + temp_error2 + temp_error3 + temp_error4) / 4;
            Accuracies.Add(Tree.Accuracy);
            Accuracies.Add(Tree2.Accuracy);
            Accuracies.Add(Tree3.Accuracy);
            Accuracies.Add(Tree4.Accuracy);
            Accuracies.Add(Tree5.Accuracy);
            Accuracy = Accuracies.Average();
            Depth    = Tree.DetermineDepth(0);
        }