static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath); Console.WriteLine("Validating training set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); Console.WriteLine("Getting test set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes); Console.WriteLine("Validating test set"); DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values); Console.WriteLine("TotalSamplesForBiasAndVariance : {0}", TotalSamplesForBiasAndVariance); List <int> sizeOfBaggers = new List <int>() { 1, 2, 5, 10 }; foreach (var sizeOfBagger in sizeOfBaggers) { Console.WriteLine("Running with SizeOfBaggers {0}", sizeOfBagger); // Run the algorithm with different tree depths for (int treeDepth = 1; treeDepth <= 3; treeDepth++) { Console.WriteLine("Running with tree depth {0}", treeDepth); RunWithTreeLevels(trainingData, rnd, treeDepth, sizeOfBagger, testData); } } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true); Console.WriteLine("Validating training set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); Console.WriteLine("Getting test set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true); Console.WriteLine("Validating test set"); DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values); const string svmTrainingPath = "svmtraining"; const string svmOutputPath = "output"; List <int> kernelsToRunIn = new List <int>() { 0, 1, 2, 3 }; string originalTrainingFile = "originalTraining.txt"; LibSvmConverter.ConvertToLibSvm(trainingData.Values, "originalTraining.txt"); // Run all kernels in parallel kernelsToRunIn.Select((kernel) => { // Run all iterations in parallel Enumerable.Range(0, TotalSamplesForBiasAndVariance).Select((i) => { Console.WriteLine("Doing loop {0} for kernel {1}", i, kernel); var postFix = string.Format("K{0}-I{1}.txt", kernel, i); string trainingPath = svmTrainingPath + postFix; string trainingModelPath = trainingPath + ".model"; string outputPath = svmOutputPath + postFix; if (!File.Exists(trainingModelPath)) { List <List <DataSetValue> > differentTrainingData = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd); LibSvmConverter.ConvertToLibSvm(differentTrainingData.Single(), trainingPath); RunTrainingExe(trainingPath, kernel); } if (!File.Exists(outputPath)) { RunEvaluateExe(originalTrainingFile, trainingModelPath, outputPath); } return(0); }).ToList(); return(0); }).ToList(); // Evaluate bias and variance foreach (var kernel in kernelsToRunIn) { List <List <bool> > allPredictions = new List <List <bool> >(); for (int i = 0; i < TotalSamplesForBiasAndVariance; i++) { Console.WriteLine("Evaluating loop {0} for kernel {1}", i, kernel); var postFix = string.Format("K{0}-I{1}.txt", kernel, i); string trainingPath = svmTrainingPath + postFix; string outputPath = svmOutputPath + postFix; if (!File.Exists(trainingPath) || !File.Exists(outputPath)) { continue; } List <bool> predictions = GetPredictionsFromOutputPath(outputPath); allPredictions.Add(predictions); } if (allPredictions.Count == 0) { Console.WriteLine("Not enough information to evaluate kernel {0}", kernel); continue; } double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, allPredictions, out bias); Console.WriteLine("Bias:{0:0.00000} Variance:{1:0.00000}", bias, variance); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } var startTime = DateTime.Now; Console.WriteLine(startTime); Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true); Console.WriteLine("Validating training set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); Console.WriteLine("Getting test set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true); Console.WriteLine("Validating test set"); DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values); List <List <DataSetValue> > differentDataSets = Bagging.ProduceDifferentDataSets(trainingData.Values, TotalSamplesForBiasAndVariance, rnd); List <IPredictor> naiveBayesPredictors = new List <IPredictor>(); foreach (var differentTrainingDataSet in differentDataSets) { var naiveBayesPredictor = TrainNaiveBayes(differentTrainingDataSet); naiveBayesPredictors.Add(naiveBayesPredictor); } double bias; double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, naiveBayesPredictors, out bias); Console.WriteLine($"Bias:{bias} Variance:{variance}"); var originalNaiveBayesPredictor = TrainNaiveBayes(trainingData.Values); EvaluateNaiveBayesPredictor(testData, originalNaiveBayesPredictor); var endTime = DateTime.Now; Console.WriteLine(endTime); var totalMinutes = (endTime - startTime).TotalMinutes; Console.WriteLine("Took {0} minutes.", totalMinutes); Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Random rnd = new Random(); Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath); Console.WriteLine("Validating data set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); List <List <List <DataSetValue> > > dataSetValuesForBagging = new List <List <List <DataSetValue> > >() { Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 3, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 5, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 10, rnd), Bagging.ProduceDifferentDataSets(trainingData.Values, 20, rnd), }; // Initialize the required trees List <List <DecisionTreeLevel> > listOfTreesToRunTestOn = new List <List <DecisionTreeLevel> >(); foreach (var dataSetForBagging in dataSetValuesForBagging) { listOfTreesToRunTestOn.Add(dataSetForBagging.Select(x => new DecisionTreeLevel(ChiTestLimit, trainingData.Attributes, x)).ToList()); } Console.WriteLine("Runnind D3 on all trees in parallel..."); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.D3()); Console.WriteLine("Deleting unecessary nodes..."); Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.TrimTree()); Console.WriteLine("Getting test data set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath); Console.WriteLine("Evaluating trees against test data..."); foreach (List <DecisionTreeLevel> baggingSetOfTrees in listOfTreesToRunTestOn) { DecisionTreeScore score = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, testData.Values); score.PrintTotalScore(); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { string errorMessage = ""; if (!File.Exists(DataSetPath)) { errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n"; } if (!File.Exists(TestSetPath)) { errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n"; } if (errorMessage != "") { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Not all files available - not running!"); Console.WriteLine(errorMessage); Console.ResetColor(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); return; } Console.WriteLine("Reading training data..."); ParserResults trainingData = ParserUtils.ParseData(DataSetPath); // Optimizations are optional // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(trainingData.Attributes, trainingData.Values); Console.WriteLine("Validating data set"); DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values); // Initialize the required trees with their respective chiTestLimits List <DecisionTreeLevel> listOfTreesToRunTestOn = new List <DecisionTreeLevel>() { new DecisionTreeLevel(chiTestLimit: 0.99), new DecisionTreeLevel(chiTestLimit: 0.95), new DecisionTreeLevel(chiTestLimit: 0), }; Console.WriteLine("Runnind D3..."); Parallel.ForEach(listOfTreesToRunTestOn, l => l.D3(trainingData.Attributes, trainingData.Values)); Console.WriteLine("Deleting unecessary nodes..."); Parallel.ForEach(listOfTreesToRunTestOn, l => l.TrimTree()); Console.WriteLine("Getting test data set..."); ParserResults testData = ParserUtils.ParseData(TestSetPath); // Optimizations are optional // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(testData.Attributes, testData.Values); Console.WriteLine("Evaluating trees against test data..."); List <DecisionTreeScore> scores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, testData.Values)).ToList(); //Console.WriteLine("Writing trees to text files (for debugging/visualization)..."); // Dump the trees to a txt file for debugging/visualization // NOTE: This won't work the the Chi=0 case - the JSON file generated is too big // Parallel.ForEach(listOfTreesToRunTestOn, l => File.WriteAllText("Chi" + Convert.ToInt64(l.ChiTestLimit * 10000000000000) + ".json", l.SerializeDecisionTree())); List <DecisionTreeScore> trainingDataScores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, trainingData.Values)).ToList(); // Print the results to console foreach (var score in scores) { score.PrintTotalScore(); } Console.WriteLine("Evaluating trees against training data:"); foreach (var score in trainingDataScores) { score.PrintTotalScore(); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }