Exemplo n.º 1
0
        public Data(StreamReader r, StreamReader r2, int depth, Random rand, int ForestSize)
        {
            Forest = new List <BaggedForest>();
            Training_Data_Forest = new List <Entry>();
            Training_Data        = new List <Entry>();
            Test_Data            = new List <Entry>();
            SetData(r);
            //SetTrainingData();
            Training_Data_Forest = Training_Data;

            for (int i = 0; i < ForestSize; i++)
            {
                ShuffleForestData(rand);
                Training_Data = new List <Entry>();
                Test_Data     = new List <Entry>();
                SetData(r, r2);
                //SetTrainingData();
                List <Entry> trainingDataHelper = Training_Data_Forest.GetRange(0, 100);
                Tree = new DecisionTree(ref trainingDataHelper, null, depth, rand, false, 0);
                //Tree.CollapseTree();
                List <Entry> testDataHelper = Test_Data;
                Error         = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
                Test_Accuracy = 100 - Error;
                Depth         = Tree.DetermineDepth(0);
                List <int> Test_Predictions = Tree.Labels;

                Training_Data = new List <Entry>();
                SetData(r);
                Tree.Labels        = new List <int>();
                trainingDataHelper = Training_Data; //this is really the train data
                Train_Accuracy     = 100 - ((Tree.DetermineError(ref trainingDataHelper) / Convert.ToDouble(Training_Data.Count)) * 100);
                List <int> Train_Predictions = Tree.Labels;

                Forest.Add(new BaggedForest(Train_Accuracy, Test_Accuracy, Train_Predictions, Test_Predictions));

                //if(i % 5 == 0) { Console.WriteLine(i); }
            }
        }
Exemplo n.º 2
0
        public Data(StreamReader train, StreamReader test, StreamReader eval, StreamReader eval_ID, int depth, Random r)
        {
            double temp_error1;
            double temp_error2;
            double temp_error3;
            double temp_error4;
            double temp_error5;

            Cross_Validate_Data = new List <Entry>();
            Predictions         = new List <Prediction>();
            Predictions2        = new List <Prediction>();
            Predictions3        = new List <Prediction>();
            Predictions4        = new List <Prediction>();
            Predictions5        = new List <Prediction>();
            Predictions_Average = new List <Prediction>();
            Cross_1             = new List <Entry>();
            Cross_2             = new List <Entry>();
            Cross_3             = new List <Entry>();
            Cross_4             = new List <Entry>();
            Cross_5             = new List <Entry>();
            Accuracies          = new List <double>();
            SetValidateData(train, test, r);

            #region First Fold
            data_1        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            data_2        = new List <Entry>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_4))).ToList();
            data_2 = Cross_5;
            SetTrainingData();

            List <TrainingData> trainingDataHelper = Training_Data;
            Tree = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree.CollapseTree();

            List <TrainingData> testDataHelper = Test_Data;
            temp_error1   = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree.Accuracy = 100 - temp_error1;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree.Labels        = new List <int>();
            trainingDataHelper = Training_Data;
            Tree.DetermineError(ref trainingDataHelper);
            Predictions = SetPredictions(eval_ID, Tree.Labels);
            #endregion

            #region Second Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_5))).ToList();
            data_2 = Cross_4;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree2 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree2.CollapseTree();
            testDataHelper = Test_Data;
            temp_error2    = (Convert.ToDouble(Tree2.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree2.Accuracy = 100 - temp_error2;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree2.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree2.DetermineError(ref trainingDataHelper);
            Predictions2 = SetPredictions(eval_ID, Tree2.Labels);
            #endregion

            #region Third Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_2.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_3;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree3 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree3.CollapseTree();
            testDataHelper = Test_Data;
            temp_error3    = (Convert.ToDouble(Tree3.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree3.Accuracy = 100 - temp_error3;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree3.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree3.DetermineError(ref trainingDataHelper);
            Predictions3 = SetPredictions(eval_ID, Tree3.Labels);
            #endregion

            #region Fourth Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_1.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_2;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree4 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree4.CollapseTree();
            testDataHelper = Test_Data;
            temp_error4    = (Convert.ToDouble(Tree4.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree4.Accuracy = 100 - temp_error4;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree4.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree4.DetermineError(ref trainingDataHelper);
            Predictions4 = SetPredictions(eval_ID, Tree4.Labels);
            #endregion

            #region Fifth Fold
            Training_Data = new List <TrainingData>();
            Test_Data     = new List <TrainingData>();

            data_1 = Cross_2.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList();
            data_2 = Cross_1;
            SetTrainingData();

            trainingDataHelper = Training_Data;
            Tree5 = new DecisionTree(ref trainingDataHelper, depth, r);
            Tree5.CollapseTree();
            testDataHelper = Test_Data;
            temp_error5    = (Convert.ToDouble(Tree5.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100;
            Tree5.Accuracy = 100 - temp_error5;

            data_1        = new List <Entry>();
            data_2        = new List <Entry>();
            Training_Data = new List <TrainingData>();
            SetData(eval);
            SetTrainingData();
            Tree5.Labels       = new List <int>();
            trainingDataHelper = Training_Data;
            Tree5.DetermineError(ref trainingDataHelper);
            Predictions5 = SetPredictions(eval_ID, Tree5.Labels);
            #endregion

            SetAveragedPredictions();
            StandardDeviation = CalculateStandardDeviation(1 - temp_error1, 1 - temp_error2, 1 - temp_error3, 1 - temp_error4, 1 - temp_error5);
            //Console.WriteLine(temp_error1);
            //Console.WriteLine(temp_error2);
            //Console.WriteLine(temp_error3);
            //Console.WriteLine(temp_error4);
            Error = (temp_error1 + temp_error2 + temp_error3 + temp_error4) / 4;
            Accuracies.Add(Tree.Accuracy);
            Accuracies.Add(Tree2.Accuracy);
            Accuracies.Add(Tree3.Accuracy);
            Accuracies.Add(Tree4.Accuracy);
            Accuracies.Add(Tree5.Accuracy);
            Accuracy = Accuracies.Average();
            Depth    = Tree.DetermineDepth(0);
        }
 public PredictionAccuracy(List <Prediction> predictions, double accuracy, DecisionTree tree)
 {
     Predictions = predictions;
     Accuracy    = accuracy;
     Tree        = tree;
 }
Exemplo n.º 4
0
        /// <summary>
        /// Naive Bayes Constructor
        /// </summary>
        public Data(double smoothing_term, Random r, StreamReader r1, StreamReader r2, StreamReader r3, StreamReader r4, StreamReader r5)
        {
            double temp_accuracy1;
            double temp_accuracy2;
            double temp_accuracy3;
            double temp_accuracy4;
            double temp_accuracy5;

            Smoothing_Term = smoothing_term;

            #region First Fold
            Training_Data = new List <Entry>();
            Test_Data     = new List <Entry>();

            SetData(r1, r5);
            SetData(r2);
            SetData(r3);
            SetData(r4);
            List <Entry> trainingDataHelper = Training_Data;
            Tree           = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term);
            temp_accuracy1 = Tree.Test_Accuracy;
            #endregion

            #region Second Fold
            Training_Data = new List <Entry>();
            Test_Data     = new List <Entry>();

            SetData(r1, r4);
            SetData(r2);
            SetData(r3);
            SetData(r5);
            trainingDataHelper = Training_Data;
            Tree           = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term);
            temp_accuracy2 = Tree.Test_Accuracy;
            #endregion

            #region Third Fold
            Training_Data = new List <Entry>();
            Test_Data     = new List <Entry>();

            SetData(r1, r3);
            SetData(r2);
            SetData(r4);
            SetData(r5);
            trainingDataHelper = Training_Data;
            Tree           = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term);
            temp_accuracy3 = Tree.Test_Accuracy;
            #endregion

            #region Fourth Fold
            Training_Data = new List <Entry>();
            Test_Data     = new List <Entry>();

            SetData(r1, r2);
            SetData(r3);
            SetData(r4);
            SetData(r5);
            trainingDataHelper = Training_Data;
            Tree           = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term);
            temp_accuracy4 = Tree.Test_Accuracy;
            #endregion

            #region Fifth Fold
            Training_Data = new List <Entry>();
            Test_Data     = new List <Entry>();

            SetData(r2, r1);
            SetData(r3);
            SetData(r4);
            SetData(r5);
            trainingDataHelper = Training_Data;
            Tree           = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term);
            temp_accuracy5 = Tree.Test_Accuracy;
            #endregion

            Test_Accuracy = (temp_accuracy1 + temp_accuracy2 + temp_accuracy3 + temp_accuracy4 + temp_accuracy5) / 5;
        }