Exemple #1
0
        static void Main(string[] args)
        {
            List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result;
            List <double[]> continuousTestData  = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result;

            // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b)
            //foreach (int i in _continuesIndexes)
            //{
            //    using (StreamWriter sw = new StreamWriter($"{i}.txt"))
            //    {
            //        sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i])));
            //    }
            //}

            // Convert continuous to discrete
            Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap);
            List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping);
            List <int[]> discreteTestData  = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping);

            // Verify clustering
            //foreach (int continuousIndex in _continuousIndexes)
            //{
            //    HashSet<int> clustersTrain = new HashSet<int>();
            //    HashSet<int> clustersTest = new HashSet<int>();

            //    foreach (int[] instance in discreteTrainData)
            //    {
            //        clustersTrain.Add(instance[continuousIndex]);
            //    }
            //    foreach (int[] instance in discreteTestData)
            //    {
            //        clustersTest.Add(instance[continuousIndex]);
            //    }

            //    Console.WriteLine($"Clustering in index {continuousIndex} generated {clustersTrain.Count} unique clusters in train and {clustersTest.Count} unique clusters in test");
            //    Console.WriteLine($"Train: {string.Join(",", clustersTrain.OrderBy(i => i))}");
            //    Console.WriteLine($"Test: {string.Join(",", clustersTest.OrderBy(i => i))}");
            //    Console.WriteLine();
            //}

            List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples);

            ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3ClassifierDepthInstanceClassifierPredictionMappings = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >();
            ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3BaggerDepthInstanceClassifierPredictionMappings     = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >();

            ConcurrentDictionary <int, Tuple <double, double, double> > id3ClassifierResult       = new ConcurrentDictionary <int, Tuple <double, double, double> >();
            ConcurrentDictionary <int, Tuple <double, double, double> > id3BaggerClassifierResult = new ConcurrentDictionary <int, Tuple <double, double, double> >();

            // From 0 to 10 maxDepth, calculate the bias and variance of the classifiers in parallel.
            Parallel.For(0, 11, (maxDepth) =>
                         //for (int maxDepth = 0; maxDepth < 2; maxDepth++)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3BaggerInstanceClassifierPredictionMappings     = id3BaggerDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3ClassifierInstanceClassifierPredictionMappings = id3ClassifierDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());

                for (int i = 0; i < BiasVarianceNumOfSamples; i++)
                //Parallel.For(0, BiasVarianceNumOfSamples, i =>
                {
                    // Initialize and train the classifiers in parallel
                    Id3Bagger bagger = new Id3Bagger(BaggerSamples);
                    bagger.Train(samples[i], ClassIndex, 0, maxDepth);

                    // Store predictions on test data
                    //Parallel.For(0, discreteTestData.Count, (j) =>
                    for (int j = 0; j < discreteTestData.Count; j++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMapping = id3BaggerInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>());
                        classifierPredictionMapping.GetOrAdd(i, bagger.GetClass(discreteTestData[j]));
                    }
                    ;

                    // Free the memory
                    bagger = null;

                    // Initialize and train the classifiers in parallel
                    Id3Classifier tree = new Id3Classifier(samples[i], ClassIndex, 0, maxDepth);

                    // Store predictions on test data
                    //Parallel.For(0, discreteTestData.Count, (j) =>
                    for (int j = 0; j < discreteTestData.Count; j++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMapping = id3ClassifierInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>());
                        classifierPredictionMapping.GetOrAdd(i, tree.GetClass(discreteTestData[j]));
                    }
                    ;

                    // Free the memory
                    tree = null;
                }
                ;

                // Calculate bias and variance for each classifier and store it.
                id3BaggerClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3BaggerInstanceClassifierPredictionMappings));
                id3ClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3ClassifierInstanceClassifierPredictionMappings));
            });

            Console.WriteLine("Id3 Classifier");
            Console.WriteLine("Max Depth, Bias, Variance, Accuracy");
            foreach (int maxDepth in id3ClassifierResult.Keys)
            {
                Console.WriteLine($"{maxDepth}, {id3ClassifierResult[maxDepth].Item1}, {id3ClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}");
            }

            Console.WriteLine();

            Console.WriteLine("Bagger");
            Console.WriteLine("Max Depth, Bias, Variance, Accuracy");
            foreach (int maxDepth in id3BaggerClassifierResult.Keys)
            {
                Console.WriteLine($"{maxDepth}, {id3BaggerClassifierResult[maxDepth].Item1}, {id3BaggerClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}");
            }

            Console.WriteLine();

            Console.WriteLine("Press ENTER to exit...");
            Console.ReadLine();
        }