public Data(StreamReader r, StreamReader r2, StreamReader eval, StreamReader train_ID, StreamReader test_ID, StreamReader eval_ID, int depth, Random rand, int ForestSize) { Forest = new List <BaggedForest>(); Training_Data_Forest = new List <TrainingData>(); data_1 = new List <Entry>(); Training_Data = new List <TrainingData>(); data_2 = new List <Entry>(); Test_Data = new List <TrainingData>(); SetData(r, r2); SetTrainingData(); Training_Data_Forest = Training_Data; for (int i = 0; i < ForestSize; i++) { ShuffleForestData(rand); data_1 = new List <Entry>(); Training_Data = new List <TrainingData>(); data_2 = new List <Entry>(); Test_Data = new List <TrainingData>(); SetData(r, r2); SetTrainingData(); List <TrainingData> trainingDataHelper = Training_Data_Forest.GetRange(0, 1000); Tree = new DecisionTree(ref trainingDataHelper, depth, rand); Tree.CollapseTree(); List <TrainingData> testDataHelper = Test_Data; Error = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Test_Accuracy = 100 - Error; Depth = Tree.DetermineDepth(0); List <Prediction> Test_Predictions = SetPredictions(test_ID, Tree.Labels); Tree.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree.DetermineError(ref trainingDataHelper); double train_accuracy = 100 - ((Convert.ToDouble(Tree.DetermineError(ref trainingDataHelper)) / Convert.ToDouble(Training_Data.Count)) * 100); List <Prediction> Train_Predictions = SetPredictions(train_ID, Tree.Labels); data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree.Labels = new List <int>(); trainingDataHelper = Training_Data; double eval_accuracy = 100 - ((Convert.ToDouble(Tree.DetermineError(ref trainingDataHelper)) / Convert.ToDouble(Training_Data.Count)) * 100); List <Prediction> Eval_Predictions = SetPredictions(eval_ID, Tree.Labels); Forest.Add(new BaggedForest(train_accuracy, Train_Predictions, Test_Accuracy, Test_Predictions, eval_accuracy, Eval_Predictions)); if (i % 5 == 0) { Console.WriteLine(i); } } data_3 = data_1; data_1 = new List <Entry>(); data_2 = new List <Entry>(); SetData(r, r2); }
public Data(StreamReader train, StreamReader test, StreamReader eval, StreamReader eval_ID, int depth, Random r) { double temp_error1; double temp_error2; double temp_error3; double temp_error4; double temp_error5; Cross_Validate_Data = new List <Entry>(); Predictions = new List <Prediction>(); Predictions2 = new List <Prediction>(); Predictions3 = new List <Prediction>(); Predictions4 = new List <Prediction>(); Predictions5 = new List <Prediction>(); Predictions_Average = new List <Prediction>(); Cross_1 = new List <Entry>(); Cross_2 = new List <Entry>(); Cross_3 = new List <Entry>(); Cross_4 = new List <Entry>(); Cross_5 = new List <Entry>(); Accuracies = new List <double>(); SetValidateData(train, test, r); #region First Fold data_1 = new List <Entry>(); Training_Data = new List <TrainingData>(); data_2 = new List <Entry>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_4))).ToList(); data_2 = Cross_5; SetTrainingData(); List <TrainingData> trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, depth, r); Tree.CollapseTree(); List <TrainingData> testDataHelper = Test_Data; temp_error1 = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree.Accuracy = 100 - temp_error1; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree.DetermineError(ref trainingDataHelper); Predictions = SetPredictions(eval_ID, Tree.Labels); #endregion #region Second Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_5))).ToList(); data_2 = Cross_4; SetTrainingData(); trainingDataHelper = Training_Data; Tree2 = new DecisionTree(ref trainingDataHelper, depth, r); Tree2.CollapseTree(); testDataHelper = Test_Data; temp_error2 = (Convert.ToDouble(Tree2.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree2.Accuracy = 100 - temp_error2; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree2.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree2.DetermineError(ref trainingDataHelper); Predictions2 = SetPredictions(eval_ID, Tree2.Labels); #endregion #region Third Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_3; SetTrainingData(); trainingDataHelper = Training_Data; Tree3 = new DecisionTree(ref trainingDataHelper, depth, r); Tree3.CollapseTree(); testDataHelper = Test_Data; temp_error3 = (Convert.ToDouble(Tree3.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree3.Accuracy = 100 - temp_error3; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree3.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree3.DetermineError(ref trainingDataHelper); Predictions3 = SetPredictions(eval_ID, Tree3.Labels); #endregion #region Fourth Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_2; SetTrainingData(); trainingDataHelper = Training_Data; Tree4 = new DecisionTree(ref trainingDataHelper, depth, r); Tree4.CollapseTree(); testDataHelper = Test_Data; temp_error4 = (Convert.ToDouble(Tree4.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree4.Accuracy = 100 - temp_error4; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree4.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree4.DetermineError(ref trainingDataHelper); Predictions4 = SetPredictions(eval_ID, Tree4.Labels); #endregion #region Fifth Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_2.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_1; SetTrainingData(); trainingDataHelper = Training_Data; Tree5 = new DecisionTree(ref trainingDataHelper, depth, r); Tree5.CollapseTree(); testDataHelper = Test_Data; temp_error5 = (Convert.ToDouble(Tree5.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree5.Accuracy = 100 - temp_error5; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree5.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree5.DetermineError(ref trainingDataHelper); Predictions5 = SetPredictions(eval_ID, Tree5.Labels); #endregion SetAveragedPredictions(); StandardDeviation = CalculateStandardDeviation(1 - temp_error1, 1 - temp_error2, 1 - temp_error3, 1 - temp_error4, 1 - temp_error5); //Console.WriteLine(temp_error1); //Console.WriteLine(temp_error2); //Console.WriteLine(temp_error3); //Console.WriteLine(temp_error4); Error = (temp_error1 + temp_error2 + temp_error3 + temp_error4) / 4; Accuracies.Add(Tree.Accuracy); Accuracies.Add(Tree2.Accuracy); Accuracies.Add(Tree3.Accuracy); Accuracies.Add(Tree4.Accuracy); Accuracies.Add(Tree5.Accuracy); Accuracy = Accuracies.Average(); Depth = Tree.DetermineDepth(0); }