public Data(StreamReader r, StreamReader r2, int depth, Random rand, int ForestSize) { Forest = new List <BaggedForest>(); Training_Data_Forest = new List <Entry>(); Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r); //SetTrainingData(); Training_Data_Forest = Training_Data; for (int i = 0; i < ForestSize; i++) { ShuffleForestData(rand); Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r, r2); //SetTrainingData(); List <Entry> trainingDataHelper = Training_Data_Forest.GetRange(0, 100); Tree = new DecisionTree(ref trainingDataHelper, null, depth, rand, false, 0); //Tree.CollapseTree(); List <Entry> testDataHelper = Test_Data; Error = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Test_Accuracy = 100 - Error; Depth = Tree.DetermineDepth(0); List <int> Test_Predictions = Tree.Labels; Training_Data = new List <Entry>(); SetData(r); Tree.Labels = new List <int>(); trainingDataHelper = Training_Data; //this is really the train data Train_Accuracy = 100 - ((Tree.DetermineError(ref trainingDataHelper) / Convert.ToDouble(Training_Data.Count)) * 100); List <int> Train_Predictions = Tree.Labels; Forest.Add(new BaggedForest(Train_Accuracy, Test_Accuracy, Train_Predictions, Test_Predictions)); //if(i % 5 == 0) { Console.WriteLine(i); } } }
public Data(StreamReader train, StreamReader test, StreamReader eval, StreamReader eval_ID, int depth, Random r) { double temp_error1; double temp_error2; double temp_error3; double temp_error4; double temp_error5; Cross_Validate_Data = new List <Entry>(); Predictions = new List <Prediction>(); Predictions2 = new List <Prediction>(); Predictions3 = new List <Prediction>(); Predictions4 = new List <Prediction>(); Predictions5 = new List <Prediction>(); Predictions_Average = new List <Prediction>(); Cross_1 = new List <Entry>(); Cross_2 = new List <Entry>(); Cross_3 = new List <Entry>(); Cross_4 = new List <Entry>(); Cross_5 = new List <Entry>(); Accuracies = new List <double>(); SetValidateData(train, test, r); #region First Fold data_1 = new List <Entry>(); Training_Data = new List <TrainingData>(); data_2 = new List <Entry>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_4))).ToList(); data_2 = Cross_5; SetTrainingData(); List <TrainingData> trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, depth, r); Tree.CollapseTree(); List <TrainingData> testDataHelper = Test_Data; temp_error1 = (Convert.ToDouble(Tree.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree.Accuracy = 100 - temp_error1; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree.DetermineError(ref trainingDataHelper); Predictions = SetPredictions(eval_ID, Tree.Labels); #endregion #region Second Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_3.Concat(Cross_5))).ToList(); data_2 = Cross_4; SetTrainingData(); trainingDataHelper = Training_Data; Tree2 = new DecisionTree(ref trainingDataHelper, depth, r); Tree2.CollapseTree(); testDataHelper = Test_Data; temp_error2 = (Convert.ToDouble(Tree2.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree2.Accuracy = 100 - temp_error2; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree2.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree2.DetermineError(ref trainingDataHelper); Predictions2 = SetPredictions(eval_ID, Tree2.Labels); #endregion #region Third Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_2.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_3; SetTrainingData(); trainingDataHelper = Training_Data; Tree3 = new DecisionTree(ref trainingDataHelper, depth, r); Tree3.CollapseTree(); testDataHelper = Test_Data; temp_error3 = (Convert.ToDouble(Tree3.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree3.Accuracy = 100 - temp_error3; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree3.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree3.DetermineError(ref trainingDataHelper); Predictions3 = SetPredictions(eval_ID, Tree3.Labels); #endregion #region Fourth Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_1.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_2; SetTrainingData(); trainingDataHelper = Training_Data; Tree4 = new DecisionTree(ref trainingDataHelper, depth, r); Tree4.CollapseTree(); testDataHelper = Test_Data; temp_error4 = (Convert.ToDouble(Tree4.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree4.Accuracy = 100 - temp_error4; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree4.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree4.DetermineError(ref trainingDataHelper); Predictions4 = SetPredictions(eval_ID, Tree4.Labels); #endregion #region Fifth Fold Training_Data = new List <TrainingData>(); Test_Data = new List <TrainingData>(); data_1 = Cross_2.Concat(Cross_3.Concat(Cross_4.Concat(Cross_5))).ToList(); data_2 = Cross_1; SetTrainingData(); trainingDataHelper = Training_Data; Tree5 = new DecisionTree(ref trainingDataHelper, depth, r); Tree5.CollapseTree(); testDataHelper = Test_Data; temp_error5 = (Convert.ToDouble(Tree5.DetermineError(ref testDataHelper)) / Convert.ToDouble(Test_Data.Count)) * 100; Tree5.Accuracy = 100 - temp_error5; data_1 = new List <Entry>(); data_2 = new List <Entry>(); Training_Data = new List <TrainingData>(); SetData(eval); SetTrainingData(); Tree5.Labels = new List <int>(); trainingDataHelper = Training_Data; Tree5.DetermineError(ref trainingDataHelper); Predictions5 = SetPredictions(eval_ID, Tree5.Labels); #endregion SetAveragedPredictions(); StandardDeviation = CalculateStandardDeviation(1 - temp_error1, 1 - temp_error2, 1 - temp_error3, 1 - temp_error4, 1 - temp_error5); //Console.WriteLine(temp_error1); //Console.WriteLine(temp_error2); //Console.WriteLine(temp_error3); //Console.WriteLine(temp_error4); Error = (temp_error1 + temp_error2 + temp_error3 + temp_error4) / 4; Accuracies.Add(Tree.Accuracy); Accuracies.Add(Tree2.Accuracy); Accuracies.Add(Tree3.Accuracy); Accuracies.Add(Tree4.Accuracy); Accuracies.Add(Tree5.Accuracy); Accuracy = Accuracies.Average(); Depth = Tree.DetermineDepth(0); }
public PredictionAccuracy(List <Prediction> predictions, double accuracy, DecisionTree tree) { Predictions = predictions; Accuracy = accuracy; Tree = tree; }
/// <summary> /// Naive Bayes Constructor /// </summary> public Data(double smoothing_term, Random r, StreamReader r1, StreamReader r2, StreamReader r3, StreamReader r4, StreamReader r5) { double temp_accuracy1; double temp_accuracy2; double temp_accuracy3; double temp_accuracy4; double temp_accuracy5; Smoothing_Term = smoothing_term; #region First Fold Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r1, r5); SetData(r2); SetData(r3); SetData(r4); List <Entry> trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term); temp_accuracy1 = Tree.Test_Accuracy; #endregion #region Second Fold Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r1, r4); SetData(r2); SetData(r3); SetData(r5); trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term); temp_accuracy2 = Tree.Test_Accuracy; #endregion #region Third Fold Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r1, r3); SetData(r2); SetData(r4); SetData(r5); trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term); temp_accuracy3 = Tree.Test_Accuracy; #endregion #region Fourth Fold Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r1, r2); SetData(r3); SetData(r4); SetData(r5); trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term); temp_accuracy4 = Tree.Test_Accuracy; #endregion #region Fifth Fold Training_Data = new List <Entry>(); Test_Data = new List <Entry>(); SetData(r2, r1); SetData(r3); SetData(r4); SetData(r5); trainingDataHelper = Training_Data; Tree = new DecisionTree(ref trainingDataHelper, Test_Data, 0, r, true, Smoothing_Term); temp_accuracy5 = Tree.Test_Accuracy; #endregion Test_Accuracy = (temp_accuracy1 + temp_accuracy2 + temp_accuracy3 + temp_accuracy4 + temp_accuracy5) / 5; }