static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } var startTime = DateTime.Now; Console.WriteLine(startTime); Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true); Console.WriteLine("Validating training set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); Console.WriteLine("Getting test set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true); Console.WriteLine("Validating test set"); DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values); List <List <DataSetValue> > differentDataSets = Bagging.ProduceDifferentDataSets(trainingData.Values, TotalSamplesForBiasAndVariance, rnd); List <IPredictor> naiveBayesPredictors = new List <IPredictor>(); foreach (var differentTrainingDataSet in differentDataSets) { var naiveBayesPredictor = TrainNaiveBayes(differentTrainingDataSet); naiveBayesPredictors.Add(naiveBayesPredictor); } double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, naiveBayesPredictors, out bias); Console.WriteLine($"Bias:{bias} Variance:{variance}"); var originalNaiveBayesPredictor = TrainNaiveBayes(trainingData.Values); EvaluateNaiveBayesPredictor(testData, originalNaiveBayesPredictor); var endTime = DateTime.Now; Console.WriteLine(endTime); var totalMinutes = (endTime - startTime).TotalMinutes; Console.WriteLine("Took {0} minutes.", totalMinutes); Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true); Console.WriteLine("Validating training set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); Console.WriteLine("Getting test set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true); Console.WriteLine("Validating test set"); DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values); const string svmTrainingPath = "svmtraining"; const string svmOutputPath = "output"; List <int> kernelsToRunIn = new List <int>() { 0, 1, 2, 3 }; string originalTrainingFile = "originalTraining.txt"; LibSvmConverter.ConvertToLibSvm(trainingData.Values, "originalTraining.txt"); // Run all kernels in parallel kernelsToRunIn.Select((kernel) => { // Run all iterations in parallel Enumerable.Range(0, TotalSamplesForBiasAndVariance).Select((i) => { Console.WriteLine("Doing loop {0} for kernel {1}", i, kernel); var postFix = string.Format("K{0}-I{1}.txt", kernel, i); string trainingPath = svmTrainingPath + postFix; string trainingModelPath = trainingPath + ".model"; string outputPath = svmOutputPath + postFix; if (!File.Exists(trainingModelPath)) { List <List <DataSetValue> > differentTrainingData = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd); LibSvmConverter.ConvertToLibSvm(differentTrainingData.Single(), trainingPath); RunTrainingExe(trainingPath, kernel); } if (!File.Exists(outputPath)) { RunEvaluateExe(originalTrainingFile, trainingModelPath, outputPath); } return(0); }).ToList(); return(0); }).ToList(); // Evaluate bias and variance foreach (var kernel in kernelsToRunIn) { List <List <bool> > allPredictions = new List <List <bool> >(); for (int i = 0; i < TotalSamplesForBiasAndVariance; i++) { Console.WriteLine("Evaluating loop {0} for kernel {1}", i, kernel); var postFix = string.Format("K{0}-I{1}.txt", kernel, i); string trainingPath = svmTrainingPath + postFix; string outputPath = svmOutputPath + postFix; if (!File.Exists(trainingPath) || !File.Exists(outputPath)) { continue; } List <bool> predictions = GetPredictionsFromOutputPath(outputPath); allPredictions.Add(predictions); } if (allPredictions.Count == 0) { Console.WriteLine("Not enough information to evaluate kernel {0}", kernel); continue; } double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, allPredictions, out bias); Console.WriteLine("Bias:{0:0.00000} Variance:{1:0.00000}", bias, variance); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
private static void RunWithTreeLevels(ParserResults trainingData, Random rnd, int treeDepth, int sizeOfBaggers, ParserResults testData) { List <List <List <DataSetValue> > > dataSetValuesForBagging = new List <List <List <DataSetValue> > >(); for (int i = 0; i < TotalSamplesForBiasAndVariance; i++) { // Two layer sampling List <DataSetValue> layer1Sampling = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd).Single(); if (sizeOfBaggers == 1) { dataSetValuesForBagging.Add(new List <List <DataSetValue> >() { layer1Sampling }); } else { dataSetValuesForBagging.Add(Bagging.ProduceDifferentDataSets(layer1Sampling, sizeOfBaggers, rnd)); } } // Initialize the required trees List <List <DecisionTreeLevel> > listOfTreesToRunTestOn = new List <List <DecisionTreeLevel> >(); foreach (var dataSetForBagging in dataSetValuesForBagging) { // Foe each bagger, for each dataset, create a new tree listOfTreesToRunTestOn.Add( dataSetForBagging.Select( dataSet => new DecisionTreeLevel(0, trainingData.Attributes, dataSet, maximumDepth: treeDepth)).ToList()); } Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.D3()); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.TrimTree()); //string sampleSerializedTree = listOfTreesToRunTestOn[0][0].SerializeDecisionTree(); //Console.WriteLine("Evaluating trees against test data..."); double totalScoreAgainstTrainingData = 0; double totalScoreAgainstTestData = 0; foreach (List <DecisionTreeLevel> baggingSetOfTrees in listOfTreesToRunTestOn) { DecisionTreeScore scoreAgainstTrainingData = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, trainingData.Values); DecisionTreeScore scoreAgainstTestData = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, testData.Values); //score.PrintTotalScore(); totalScoreAgainstTrainingData += scoreAgainstTrainingData.GetTotalScore(); totalScoreAgainstTestData += scoreAgainstTestData.GetTotalScore(); } totalScoreAgainstTrainingData = totalScoreAgainstTrainingData / listOfTreesToRunTestOn.Count; totalScoreAgainstTestData = totalScoreAgainstTestData / listOfTreesToRunTestOn.Count; double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, listOfTreesToRunTestOn, out bias); Console.WriteLine("Variance: {0:0.00000}. Bias: {1:0.00000}. ScoreTraining : {2:0.00000}, ScoreTest : {3:0.00000}", variance, bias, totalScoreAgainstTrainingData, totalScoreAgainstTestData); //Console.WriteLine(bias); //Console.WriteLine(variance); //Console.WriteLine(totalScoreAgainstTrainingData); //Console.WriteLine(totalScoreAgainstTestData); }