private static void RunWithTreeLevels(ParserResults trainingData, Random rnd, int treeDepth, int sizeOfBaggers, ParserResults testData) { List <List <List <DataSetValue> > > dataSetValuesForBagging = new List <List <List <DataSetValue> > >(); for (int i = 0; i < TotalSamplesForBiasAndVariance; i++) { // Two layer sampling List <DataSetValue> layer1Sampling = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd).Single(); if (sizeOfBaggers == 1) { dataSetValuesForBagging.Add(new List <List <DataSetValue> >() { layer1Sampling }); } else { dataSetValuesForBagging.Add(Bagging.ProduceDifferentDataSets(layer1Sampling, sizeOfBaggers, rnd)); } } // Initialize the required trees List <List <DecisionTreeLevel> > listOfTreesToRunTestOn = new List <List <DecisionTreeLevel> >(); foreach (var dataSetForBagging in dataSetValuesForBagging) { // Foe each bagger, for each dataset, create a new tree listOfTreesToRunTestOn.Add( dataSetForBagging.Select( dataSet => new DecisionTreeLevel(0, trainingData.Attributes, dataSet, maximumDepth: treeDepth)).ToList()); } Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.D3()); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.TrimTree()); //string sampleSerializedTree = listOfTreesToRunTestOn[0][0].SerializeDecisionTree(); //Console.WriteLine("Evaluating trees against test data..."); double totalScoreAgainstTrainingData = 0; double totalScoreAgainstTestData = 0; foreach (List <DecisionTreeLevel> baggingSetOfTrees in listOfTreesToRunTestOn) { DecisionTreeScore scoreAgainstTrainingData = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, trainingData.Values); DecisionTreeScore scoreAgainstTestData = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, testData.Values); //score.PrintTotalScore(); totalScoreAgainstTrainingData += scoreAgainstTrainingData.GetTotalScore(); totalScoreAgainstTestData += scoreAgainstTestData.GetTotalScore(); } totalScoreAgainstTrainingData = totalScoreAgainstTrainingData / listOfTreesToRunTestOn.Count; totalScoreAgainstTestData = totalScoreAgainstTestData / listOfTreesToRunTestOn.Count; double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, listOfTreesToRunTestOn, out bias); Console.WriteLine("Variance: {0:0.00000}. Bias: {1:0.00000}. ScoreTraining : {2:0.00000}, ScoreTest : {3:0.00000}", variance, bias, totalScoreAgainstTrainingData, totalScoreAgainstTestData); //Console.WriteLine(bias); //Console.WriteLine(variance); //Console.WriteLine(totalScoreAgainstTrainingData); //Console.WriteLine(totalScoreAgainstTestData); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath); Console.WriteLine("Validating data set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); List <List <List <DataSetValue> > > dataSetValuesForBagging = new List <List <List <DataSetValue> > >() { Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 3, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 5, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 10, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 20, rnd), }; // Initialize the required trees List <List <DecisionTreeLevel> > listOfTreesToRunTestOn = new List <List <DecisionTreeLevel> >(); foreach (var dataSetForBagging in dataSetValuesForBagging) { listOfTreesToRunTestOn.Add(dataSetForBagging.Select(x => new DecisionTreeLevel(ChiTestLimit, trainingData.Attributes, x)).ToList()); } Console.WriteLine("Runnind D3 on all trees in parallel..."); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.D3()); Console.WriteLine("Deleting unecessary nodes..."); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.TrimTree()); Console.WriteLine("Getting test data set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath); Console.WriteLine("Evaluating trees against test data..."); foreach (List <DecisionTreeLevel> baggingSetOfTrees in listOfTreesToRunTestOn) { DecisionTreeScore score = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, testData.Values); score.PrintTotalScore(); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath); // Optimizations are optional // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(trainingData.Attributes, trainingData.Values); Console.WriteLine("Validating data set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); // Initialize the required trees with their respective chiTestLimits List <DecisionTreeLevel> listOfTreesToRunTestOn = new List <DecisionTreeLevel>() { new DecisionTreeLevel(chiTestLimit: 0.99), new DecisionTreeLevel(chiTestLimit: 0.95), new DecisionTreeLevel(chiTestLimit: 0), }; Console.WriteLine("Runnind D3..."); Parallel.ForEach(listOfTreesToRunTestOn, l => l.D3(trainingData.Attributes, trainingData.Values)); Console.WriteLine("Deleting unecessary nodes..."); Parallel.ForEach(listOfTreesToRunTestOn, l => l.TrimTree()); Console.WriteLine("Getting test data set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath); // Optimizations are optional // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(testData.Attributes, testData.Values); Console.WriteLine("Evaluating trees against test data..."); List <DecisionTreeScore> scores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, testData.Values)).ToList(); //Console.WriteLine("Writing trees to text files (for debugging/visualization)..."); // Dump the trees to a txt file for debugging/visualization // NOTE: This won't work the the Chi=0 case - the JSON file generated is too big // Parallel.ForEach(listOfTreesToRunTestOn, l => File.WriteAllText("Chi" + Convert.ToInt64(l.ChiTestLimit * 10000000000000) + ".json", l.SerializeDecisionTree())); List <DecisionTreeScore> trainingDataScores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, trainingData.Values)).ToList(); // Print the results to console foreach (var score in scores) { score.PrintTotalScore(); } Console.WriteLine("Evaluating trees against training data:"); foreach (var score in trainingDataScores) { score.PrintTotalScore(); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }