示例#1
0
        public void Train(List <int[]> trainingData, int classIndex, double confidence, int maxDepth)
        {
            Classifiers = new List <Id3Classifier>();
            List <List <int[]> > samples = Sampler.SampleData(trainingData, NumberOfSamples);

            for (int i = 0; i < NumberOfSamples; i++)
            {
                Id3Classifier classifier = new Id3Classifier(samples[i], classIndex, confidence, maxDepth);
                Classifiers.Add(classifier);
            }
        }
示例#2
0
        static void Main(string[] args)
        {
            List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result;
            List <double[]> continuousTestData  = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result;

            // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b)
            //foreach (int i in _continuesIndexes)
            //{
            //    using (StreamWriter sw = new StreamWriter($"{i}.txt"))
            //    {
            //        sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i])));
            //    }
            //}

            // Convert continuous to discrete
            Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap);
            List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping);
            List <int[]> discreteTestData  = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping);

            // Verify clustering
            //foreach (int continuousIndex in _continuousIndexes)
            //{
            //    HashSet<int> clustersTrain = new HashSet<int>();
            //    HashSet<int> clustersTest = new HashSet<int>();

            //    foreach (int[] instance in discreteTrainData)
            //    {
            //        clustersTrain.Add(instance[continuousIndex]);
            //    }
            //    foreach (int[] instance in discreteTestData)
            //    {
            //        clustersTest.Add(instance[continuousIndex]);
            //    }

            //    Console.WriteLine($"Clustering in index {continuousIndex} generated {clustersTrain.Count} unique clusters in train and {clustersTest.Count} unique clusters in test");
            //    Console.WriteLine($"Train: {string.Join(",", clustersTrain.OrderBy(i => i))}");
            //    Console.WriteLine($"Test: {string.Join(",", clustersTest.OrderBy(i => i))}");
            //    Console.WriteLine();
            //}

            List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples);

            ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3ClassifierDepthInstanceClassifierPredictionMappings = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >();
            ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > id3BaggerDepthInstanceClassifierPredictionMappings     = new ConcurrentDictionary <int, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >();

            ConcurrentDictionary <int, Tuple <double, double, double> > id3ClassifierResult       = new ConcurrentDictionary <int, Tuple <double, double, double> >();
            ConcurrentDictionary <int, Tuple <double, double, double> > id3BaggerClassifierResult = new ConcurrentDictionary <int, Tuple <double, double, double> >();

            // From 0 to 10 maxDepth, calculate the bias and variance of the classifiers in parallel.
            Parallel.For(0, 11, (maxDepth) =>
                         //for (int maxDepth = 0; maxDepth < 2; maxDepth++)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3BaggerInstanceClassifierPredictionMappings     = id3BaggerDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > id3ClassifierInstanceClassifierPredictionMappings = id3ClassifierDepthInstanceClassifierPredictionMappings.GetOrAdd(maxDepth, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());

                for (int i = 0; i < BiasVarianceNumOfSamples; i++)
                //Parallel.For(0, BiasVarianceNumOfSamples, i =>
                {
                    // Initialize and train the classifiers in parallel
                    Id3Bagger bagger = new Id3Bagger(BaggerSamples);
                    bagger.Train(samples[i], ClassIndex, 0, maxDepth);

                    // Store predictions on test data
                    //Parallel.For(0, discreteTestData.Count, (j) =>
                    for (int j = 0; j < discreteTestData.Count; j++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMapping = id3BaggerInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>());
                        classifierPredictionMapping.GetOrAdd(i, bagger.GetClass(discreteTestData[j]));
                    }
                    ;

                    // Free the memory
                    bagger = null;

                    // Initialize and train the classifiers in parallel
                    Id3Classifier tree = new Id3Classifier(samples[i], ClassIndex, 0, maxDepth);

                    // Store predictions on test data
                    //Parallel.For(0, discreteTestData.Count, (j) =>
                    for (int j = 0; j < discreteTestData.Count; j++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMapping = id3ClassifierInstanceClassifierPredictionMappings.GetOrAdd(j, new ConcurrentDictionary <int, int>());
                        classifierPredictionMapping.GetOrAdd(i, tree.GetClass(discreteTestData[j]));
                    }
                    ;

                    // Free the memory
                    tree = null;
                }
                ;

                // Calculate bias and variance for each classifier and store it.
                id3BaggerClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3BaggerInstanceClassifierPredictionMappings));
                id3ClassifierResult.GetOrAdd(maxDepth, BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, ClassIndex, id3ClassifierInstanceClassifierPredictionMappings));
            });

            Console.WriteLine("Id3 Classifier");
            Console.WriteLine("Max Depth, Bias, Variance, Accuracy");
            foreach (int maxDepth in id3ClassifierResult.Keys)
            {
                Console.WriteLine($"{maxDepth}, {id3ClassifierResult[maxDepth].Item1}, {id3ClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}");
            }

            Console.WriteLine();

            Console.WriteLine("Bagger");
            Console.WriteLine("Max Depth, Bias, Variance, Accuracy");
            foreach (int maxDepth in id3BaggerClassifierResult.Keys)
            {
                Console.WriteLine($"{maxDepth}, {id3BaggerClassifierResult[maxDepth].Item1}, {id3BaggerClassifierResult[maxDepth].Item2}, {id3ClassifierResult[maxDepth].Item3}");
            }

            Console.WriteLine();

            Console.WriteLine("Press ENTER to exit...");
            Console.ReadLine();
        }
示例#3
0
        static void Main(string[] args)
        {
            // Training
            ArffHeader      header    = null;
            List <object[]> instances = new List <object[]>();

            using (ArffReader arffReader = new ArffReader(TrainingArffFile))
            {
                header = arffReader.ReadHeader();
                object[] instance;
                while ((instance = arffReader.ReadInstance()) != null)
                {
                    instances.Add(instance);
                }
            }

            List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray()));

            // Test
            instances = new List <object[]>();
            using (ArffReader arffReader = new ArffReader(TestArffFile))
            {
                header = arffReader.ReadHeader();
                object[] instance;
                while ((instance = arffReader.ReadInstance()) != null)
                {
                    instances.Add(instance);
                }
            }

            List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray()));

            Console.WriteLine("Number of Samples, training accuracy, test accuracy");
            // Do this excercise multiple times as sampling is random.
            Parallel.For(0, 100, (k) =>
            {
                // Dictionaries to store results for different accuracies. KEEP IN SYNC :)
                ConcurrentDictionary <int, double> sampleTrainingAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double>
                {
                    { 1, 0 },
                    { 3, 0 },
                    { 5, 0 },
                    { 10, 0 },
                    { 20, 0 },
                    { 25, 0 },
                    { 50, 0 },
                    { 75, 0 },
                    { 100, 0 }
                });
                ConcurrentDictionary <int, double> sampleTestAccuraciesMap = new ConcurrentDictionary <int, double>(new Dictionary <int, double>
                {
                    { 1, 0 },
                    { 3, 0 },
                    { 5, 0 },
                    { 10, 0 },
                    { 20, 0 },
                    { 25, 0 },
                    { 50, 0 },
                    { 75, 0 },
                    { 100, 0 }
                });

                // Calculate different sample accuracies in parallel.
                Parallel.ForEach(sampleTrainingAccuraciesMap.Keys, numOfSamples =>
                {
                    List <Id3Classifier> classifiers = new List <Id3Classifier>();
                    Sampler sampler = new Sampler(trainingData, numOfSamples);

                    for (int i = 0; i < numOfSamples; i++)
                    {
                        Id3Classifier classifier = new Id3Classifier(sampler.Samples[i], ClassIndex, Confidence);
                        classifiers.Add(classifier);
                    }

                    // Evaluate training and test to look out for overfitting.
                    sampleTrainingAccuraciesMap[numOfSamples] = Evaluate(trainingData, classifiers);
                    sampleTestAccuraciesMap[numOfSamples]     = Evaluate(testData, classifiers);
                });

                lock (_lockConsole)
                {
                    foreach (int numOfSamples in sampleTestAccuraciesMap.Keys.OrderBy(n => n))
                    {
                        Console.WriteLine($"{numOfSamples},{sampleTrainingAccuraciesMap[numOfSamples]},{sampleTestAccuraciesMap[numOfSamples]}");
                    }
                }
            });

            Console.WriteLine("Press ENTER to exit...");
            Console.ReadLine();
        }