public void Train(List <int[]> trainingData, int classIndex, double confidence, int maxDepth) { Classifiers = new List <Id3Classifier>(); List <List <int[]> > samples = Sampler.SampleData(trainingData, NumberOfSamples); for (int i = 0; i < NumberOfSamples; i++) { Id3Classifier classifier = new Id3Classifier(samples[i], classIndex, confidence, maxDepth); Classifiers.Add(classifier); } }
static void Main(string[] args) { List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result; List <double[]> continuousTestData = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result; // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b) //foreach (int i in _continuesIndexes) //{ // using (StreamWriter sw = new StreamWriter($"{i}.txt")) // { // sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i]))); // } //} // Convert continuous to discrete Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap); List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping); List <int[]> discreteTestData = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping); // Verify clustering //foreach (int continuousIndex in _continuousIndexes) //{ // HashSet<int> clustersTrain = new HashSet<int>(); // HashSet<int> clustersTest = new HashSet<int>(); // foreach (int[] instance in discreteTrainData) // { // clustersTrain.Add(instance[continuousIndex]); // } // foreach (int[] instance in discreteTestData) // { // clustersTest.Add(instance[continuousIndex]); // } // Console.WriteLine($"Clustering in index {continuousIndex} generated {clustersTrain.Count} unique clusters in train and {clustersTest.Count} unique clusters in test"); // Console.WriteLine($"Train: {string.Join(",", clustersTrain.OrderBy(i => i))}"); // Console.WriteLine($"Test: {string.Join(",", clustersTest.OrderBy(i => i))}"); // Console.WriteLine(); //} List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples); ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3ClassifierDepthInstanceClassifierPredictionMappings = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(); ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3BaggerDepthInstanceClassifierPredictionMappings = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(); ConcurrentDictionary <int, Tuple <double, double, double> > id3ClassifierResult = new ConcurrentDictionary <int, Tuple <double, double, double> >(); ConcurrentDictionary <int, Tuple <double, double, double> > id3BaggerClassifierResult = new ConcurrentDictionary <int, Tuple <double, double, double> >(); // From 0 to 10 maxDepth, calculate the bias and variance of the classifiers in parallel. Parallel.For(0, 11, (maxDepth) => //for (int maxDepth = 0; maxDepth < 2; maxDepth++) { ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3BaggerInstanceClassifierPredictionMappings = id3BaggerDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >()); ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3ClassifierInstanceClassifierPredictionMappings = id3ClassifierDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >()); for (int i = 0; i < BiasVarianceNumOfSamples; i++) //Parallel.For(0, BiasVarianceNumOfSamples, i => { // Initialize and train the classifiers in parallel Id3Bagger bagger = new Id3Bagger(BaggerSamples); bagger.Train(samples[i], ClassIndex, 0, maxDepth); // Store predictions on test data //Parallel.For(0, discreteTestData.Count, (j) => for (int j = 0; j < discreteTestData.Count; j++) { ConcurrentDictionary <int, int> classifierPredictionMapping = id3BaggerInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>()); classifierPredictionMapping.GetOrAdd(i, bagger.GetClass(discreteTestData[j])); } ; // Free the memory bagger = null; // Initialize and train the classifiers in parallel Id3Classifier tree = new Id3Classifier(samples[i], ClassIndex, 0, maxDepth); // Store predictions on test data //Parallel.For(0, discreteTestData.Count, (j) => for (int j = 0; j < discreteTestData.Count; j++) { ConcurrentDictionary <int, int> classifierPredictionMapping = id3ClassifierInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>()); classifierPredictionMapping.GetOrAdd(i, tree.GetClass(discreteTestData[j])); } ; // Free the memory tree = null; } ; // Calculate bias and variance for each classifier and store it. id3BaggerClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3BaggerInstanceClassifierPredictionMappings)); id3ClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3ClassifierInstanceClassifierPredictionMappings)); }); Console.WriteLine("Id3 Classifier"); Console.WriteLine("Max Depth, Bias, Variance, Accuracy"); foreach (int maxDepth in id3ClassifierResult.Keys) { Console.WriteLine($"{maxDepth}, {id3ClassifierResult[maxDepth].Item1}, {id3ClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}"); } Console.WriteLine(); Console.WriteLine("Bagger"); Console.WriteLine("Max Depth, Bias, Variance, Accuracy"); foreach (int maxDepth in id3BaggerClassifierResult.Keys) { Console.WriteLine($"{maxDepth}, {id3BaggerClassifierResult[maxDepth].Item1}, {id3BaggerClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}"); } Console.WriteLine(); Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); }
static void Main(string[] args) { // Training ArffHeader header = null; List <object[]> instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TrainingArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); // Test instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(TestArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); Console.WriteLine("Number of Samples, training accuracy, test accuracy"); // Do this excercise multiple times as sampling is random. Parallel.For(0, 100, (k) => { // Dictionaries to store results for different accuracies. KEEP IN SYNC :) ConcurrentDictionary <int, double> sampleTrainingAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); ConcurrentDictionary <int, double> sampleTestAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double> { { 1, 0 }, { 3, 0 }, { 5, 0 }, { 10, 0 }, { 20, 0 }, { 25, 0 }, { 50, 0 }, { 75, 0 }, { 100, 0 } }); // Calculate different sample accuracies in parallel. Parallel.ForEach(sampleTrainingAccuraciesMap.Keys, numOfSamples => { List <Id3Classifier> classifiers = new List <Id3Classifier>(); Sampler sampler = new Sampler(trainingData, numOfSamples); for (int i = 0; i < numOfSamples; i++) { Id3Classifier classifier = new Id3Classifier(sampler.Samples[i], ClassIndex, Confidence); classifiers.Add(classifier); } // Evaluate training and test to look out for overfitting. sampleTrainingAccuraciesMap[numOfSamples] = Evaluate(trainingData, classifiers); sampleTestAccuraciesMap[numOfSamples] = Evaluate(testData, classifiers); }); lock (_lockConsole) { foreach (int numOfSamples in sampleTestAccuraciesMap.Keys.OrderBy(n => n)) { Console.WriteLine($"{numOfSamples},{sampleTrainingAccuraciesMap[numOfSamples]},{sampleTestAccuraciesMap[numOfSamples]}"); } } }); Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); }