private static double Evaluate(List <int[]> instances, Id3Bagger bagger) { // Accuracy helpers double totalExamples = instances.Count; double correctAnswers = 0; foreach (int[] instance in instances) { if (bagger.GetClass(instance) == instance[ClassIndex]) { correctAnswers++; } } return(correctAnswers / totalExamples); }
static void Main(string[] args) { // Training ArffHeader header = null; List <object[]> instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TrainingArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); // Test instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TestArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); Console.WriteLine("Number of Samples, training accuracy, test accuracy"); // Do this excercise multiple times as sampling is random. Parallel.For(0, 100, (k) => { // Dictionaries to store results for different accuracies. KEEP IN SYNC :) ConcurrentDictionary <int, double> sampleTrainingAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); ConcurrentDictionary <int, double> sampleTestAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); // Calculate different sample accuracies in parallel. Parallel.ForEach(sampleTrainingAccuraciesMap.Keys, numOfSamples => { Id3Bagger bagger = new Id3Bagger(numOfSamples); bagger.Train(trainingData, ClassIndex, Confidence); // Evaluate training and test to look out for overfitting. sampleTrainingAccuraciesMap[numOfSamples] = Evaluate(trainingData, bagger); sampleTestAccuraciesMap[numOfSamples] = Evaluate(testData, bagger); }); lock (_lockConsole) { foreach (int numOfSamples in sampleTestAccuraciesMap.Keys.OrderBy(n => n)) { Console.WriteLine($"{numOfSamples},{sampleTrainingAccuraciesMap[numOfSamples]},{sampleTestAccuraciesMap[numOfSamples]}"); } } }); Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); }