예제 #1
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true);

            Console.WriteLine("Validating training set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Getting test set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true);

            Console.WriteLine("Validating test set");
            DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values);

            const string svmTrainingPath = "svmtraining";
            const string svmOutputPath   = "output";

            List <int> kernelsToRunIn = new List <int>()
            {
                0, 1, 2, 3
            };

            string originalTrainingFile = "originalTraining.txt";

            LibSvmConverter.ConvertToLibSvm(trainingData.Values, "originalTraining.txt");

            // Run all kernels in parallel
            kernelsToRunIn.Select((kernel) =>
            {
                // Run all iterations in parallel
                Enumerable.Range(0, TotalSamplesForBiasAndVariance).Select((i) =>
                {
                    Console.WriteLine("Doing loop {0} for kernel {1}", i, kernel);

                    var postFix              = string.Format("K{0}-I{1}.txt", kernel, i);
                    string trainingPath      = svmTrainingPath + postFix;
                    string trainingModelPath = trainingPath + ".model";
                    string outputPath        = svmOutputPath + postFix;

                    if (!File.Exists(trainingModelPath))
                    {
                        List <List <DataSetValue> > differentTrainingData = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd);
                        LibSvmConverter.ConvertToLibSvm(differentTrainingData.Single(), trainingPath);
                        RunTrainingExe(trainingPath, kernel);
                    }
                    if (!File.Exists(outputPath))
                    {
                        RunEvaluateExe(originalTrainingFile, trainingModelPath, outputPath);
                    }
                    return(0);
                }).ToList();
                return(0);
            }).ToList();

            // Evaluate bias and variance
            foreach (var kernel in kernelsToRunIn)
            {
                List <List <bool> > allPredictions = new List <List <bool> >();

                for (int i = 0; i < TotalSamplesForBiasAndVariance; i++)
                {
                    Console.WriteLine("Evaluating loop {0} for kernel {1}", i, kernel);
                    var    postFix      = string.Format("K{0}-I{1}.txt", kernel, i);
                    string trainingPath = svmTrainingPath + postFix;
                    string outputPath   = svmOutputPath + postFix;

                    if (!File.Exists(trainingPath) || !File.Exists(outputPath))
                    {
                        continue;
                    }

                    List <bool> predictions = GetPredictionsFromOutputPath(outputPath);
                    allPredictions.Add(predictions);
                }

                if (allPredictions.Count == 0)
                {
                    Console.WriteLine("Not enough information to evaluate kernel {0}", kernel);
                    continue;
                }

                double bias;
                double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, allPredictions, out bias);
                Console.WriteLine("Bias:{0:0.00000} Variance:{1:0.00000}", bias, variance);
            }

            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }
예제 #2
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            var startTime = DateTime.Now;

            Console.WriteLine(startTime);

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true);

            Console.WriteLine("Validating training set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Getting test set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true);

            Console.WriteLine("Validating test set");
            DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values);

            List <List <DataSetValue> > differentDataSets    = Bagging.ProduceDifferentDataSets(trainingData.Values, TotalSamplesForBiasAndVariance, rnd);
            List <IPredictor>           naiveBayesPredictors = new List <IPredictor>();

            foreach (var differentTrainingDataSet in differentDataSets)
            {
                var naiveBayesPredictor = TrainNaiveBayes(differentTrainingDataSet);
                naiveBayesPredictors.Add(naiveBayesPredictor);
            }

            double bias;
            double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, naiveBayesPredictors, out bias);

            Console.WriteLine($"Bias:{bias} Variance:{variance}");

            var originalNaiveBayesPredictor = TrainNaiveBayes(trainingData.Values);

            EvaluateNaiveBayesPredictor(testData, originalNaiveBayesPredictor);

            var endTime = DateTime.Now;

            Console.WriteLine(endTime);
            var totalMinutes = (endTime - startTime).TotalMinutes;

            Console.WriteLine("Took {0} minutes.", totalMinutes);
            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }