Exemplo n.º 1
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath);

            Console.WriteLine("Validating training set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Getting test set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes);

            Console.WriteLine("Validating test set");
            DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values);

            Console.WriteLine("TotalSamplesForBiasAndVariance : {0}", TotalSamplesForBiasAndVariance);
            List <int> sizeOfBaggers = new List <int>()
            {
                1, 2, 5, 10
            };

            foreach (var sizeOfBagger in sizeOfBaggers)
            {
                Console.WriteLine("Running with SizeOfBaggers {0}", sizeOfBagger);
                // Run the algorithm with different tree depths
                for (int treeDepth = 1; treeDepth <= 3; treeDepth++)
                {
                    Console.WriteLine("Running with tree depth {0}", treeDepth);
                    RunWithTreeLevels(trainingData, rnd, treeDepth, sizeOfBagger, testData);
                }
            }

            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true);

            Console.WriteLine("Validating training set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Getting test set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true);

            Console.WriteLine("Validating test set");
            DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values);

            const string svmTrainingPath = "svmtraining";
            const string svmOutputPath   = "output";

            List <int> kernelsToRunIn = new List <int>()
            {
                0, 1, 2, 3
            };

            string originalTrainingFile = "originalTraining.txt";

            LibSvmConverter.ConvertToLibSvm(trainingData.Values, "originalTraining.txt");

            // Run all kernels in parallel
            kernelsToRunIn.Select((kernel) =>
            {
                // Run all iterations in parallel
                Enumerable.Range(0, TotalSamplesForBiasAndVariance).Select((i) =>
                {
                    Console.WriteLine("Doing loop {0} for kernel {1}", i, kernel);

                    var postFix              = string.Format("K{0}-I{1}.txt", kernel, i);
                    string trainingPath      = svmTrainingPath + postFix;
                    string trainingModelPath = trainingPath + ".model";
                    string outputPath        = svmOutputPath + postFix;

                    if (!File.Exists(trainingModelPath))
                    {
                        List <List <DataSetValue> > differentTrainingData = Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd);
                        LibSvmConverter.ConvertToLibSvm(differentTrainingData.Single(), trainingPath);
                        RunTrainingExe(trainingPath, kernel);
                    }
                    if (!File.Exists(outputPath))
                    {
                        RunEvaluateExe(originalTrainingFile, trainingModelPath, outputPath);
                    }
                    return(0);
                }).ToList();
                return(0);
            }).ToList();

            // Evaluate bias and variance
            foreach (var kernel in kernelsToRunIn)
            {
                List <List <bool> > allPredictions = new List <List <bool> >();

                for (int i = 0; i < TotalSamplesForBiasAndVariance; i++)
                {
                    Console.WriteLine("Evaluating loop {0} for kernel {1}", i, kernel);
                    var    postFix      = string.Format("K{0}-I{1}.txt", kernel, i);
                    string trainingPath = svmTrainingPath + postFix;
                    string outputPath   = svmOutputPath + postFix;

                    if (!File.Exists(trainingPath) || !File.Exists(outputPath))
                    {
                        continue;
                    }

                    List <bool> predictions = GetPredictionsFromOutputPath(outputPath);
                    allPredictions.Add(predictions);
                }

                if (allPredictions.Count == 0)
                {
                    Console.WriteLine("Not enough information to evaluate kernel {0}", kernel);
                    continue;
                }

                double bias;
                double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, allPredictions, out bias);
                Console.WriteLine("Bias:{0:0.00000} Variance:{1:0.00000}", bias, variance);
            }

            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            var startTime = DateTime.Now;

            Console.WriteLine(startTime);

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath, convertContinuousValues: true);

            Console.WriteLine("Validating training set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Getting test set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath, trainingData.Attributes, convertContinuousValues: true);

            Console.WriteLine("Validating test set");
            DataSetCleaner.ValidateDataSet(testData.Attributes, testData.Values);

            List <List <DataSetValue> > differentDataSets    = Bagging.ProduceDifferentDataSets(trainingData.Values, TotalSamplesForBiasAndVariance, rnd);
            List <IPredictor>           naiveBayesPredictors = new List <IPredictor>();

            foreach (var differentTrainingDataSet in differentDataSets)
            {
                var naiveBayesPredictor = TrainNaiveBayes(differentTrainingDataSet);
                naiveBayesPredictors.Add(naiveBayesPredictor);
            }

            double bias;
            double variance = BiasAndVarianceCalculator.CalculateBiasAndVariance(trainingData, naiveBayesPredictors, out bias);

            Console.WriteLine($"Bias:{bias} Variance:{variance}");

            var originalNaiveBayesPredictor = TrainNaiveBayes(trainingData.Values);

            EvaluateNaiveBayesPredictor(testData, originalNaiveBayesPredictor);

            var endTime = DateTime.Now;

            Console.WriteLine(endTime);
            var totalMinutes = (endTime - startTime).TotalMinutes;

            Console.WriteLine("Took {0} minutes.", totalMinutes);
            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }
Exemplo n.º 4
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            Random rnd = new Random();

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath);

            Console.WriteLine("Validating data set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            List <List <List <DataSetValue> > > dataSetValuesForBagging = new List <List <List <DataSetValue> > >()
            {
                Bagging.ProduceDifferentDataSets(trainingData.Values, 1, rnd),
                Bagging.ProduceDifferentDataSets(trainingData.Values, 3, rnd),
                Bagging.ProduceDifferentDataSets(trainingData.Values, 5, rnd),
                Bagging.ProduceDifferentDataSets(trainingData.Values, 10, rnd),
                Bagging.ProduceDifferentDataSets(trainingData.Values, 20, rnd),
            };

            // Initialize the required trees
            List <List <DecisionTreeLevel> > listOfTreesToRunTestOn = new List <List <DecisionTreeLevel> >();

            foreach (var dataSetForBagging in dataSetValuesForBagging)
            {
                listOfTreesToRunTestOn.Add(dataSetForBagging.Select(x => new DecisionTreeLevel(ChiTestLimit, trainingData.Attributes, x)).ToList());
            }

            Console.WriteLine("Runnind D3 on all trees in parallel...");
            Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.D3());

            Console.WriteLine("Deleting unecessary nodes...");
            Parallel.ForEach(listOfTreesToRunTestOn.SelectMany(s => s), l => l.TrimTree());

            Console.WriteLine("Getting test data set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath);

            Console.WriteLine("Evaluating trees against test data...");
            foreach (List <DecisionTreeLevel> baggingSetOfTrees in listOfTreesToRunTestOn)
            {
                DecisionTreeScore score = DecisionTreeScorer.ScoreWithTreeWithTestSet(baggingSetOfTrees, testData.Values);
                score.PrintTotalScore();
            }

            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }
Exemplo n.º 5
0
        static void Main(string[] args)
        {
            string errorMessage = "";

            if (!File.Exists(DataSetPath))
            {
                errorMessage += $"Failed to find file ${DataSetPath} - please update variable ${nameof(DataSetPath)} or create that file.\n";
            }
            if (!File.Exists(TestSetPath))
            {
                errorMessage += $"Failed to find file ${TestSetPath} - please update variable ${nameof(TestSetPath)} or create that file.\n";
            }

            if (errorMessage != "")
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Not all files available - not running!");
                Console.WriteLine(errorMessage);
                Console.ResetColor();
                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
                return;
            }

            Console.WriteLine("Reading training data...");
            ParserResults trainingData = ParserUtils.ParseData(DataSetPath);

            // Optimizations are optional
            // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(trainingData.Attributes, trainingData.Values);

            Console.WriteLine("Validating data set");
            DataSetCleaner.ValidateDataSet(trainingData.Attributes, trainingData.Values);

            // Initialize the required trees with their respective chiTestLimits
            List <DecisionTreeLevel> listOfTreesToRunTestOn = new List <DecisionTreeLevel>()
            {
                new DecisionTreeLevel(chiTestLimit: 0.99),
                new DecisionTreeLevel(chiTestLimit: 0.95),
                new DecisionTreeLevel(chiTestLimit: 0),
            };

            Console.WriteLine("Runnind D3...");
            Parallel.ForEach(listOfTreesToRunTestOn, l => l.D3(trainingData.Attributes, trainingData.Values));

            Console.WriteLine("Deleting unecessary nodes...");
            Parallel.ForEach(listOfTreesToRunTestOn, l => l.TrimTree());

            Console.WriteLine("Getting test data set...");
            ParserResults testData = ParserUtils.ParseData(TestSetPath);

            // Optimizations are optional
            // DataSetOptimizerForExtraCredit.OptimizeDataSetForExtraCredit(testData.Attributes, testData.Values);

            Console.WriteLine("Evaluating trees against test data...");
            List <DecisionTreeScore> scores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, testData.Values)).ToList();

            //Console.WriteLine("Writing trees to text files (for debugging/visualization)...");
            // Dump the trees to a txt file for debugging/visualization
            // NOTE: This won't work the the Chi=0 case - the JSON file generated is too big
            // Parallel.ForEach(listOfTreesToRunTestOn, l => File.WriteAllText("Chi" + Convert.ToInt64(l.ChiTestLimit * 10000000000000) + ".json", l.SerializeDecisionTree()));

            List <DecisionTreeScore> trainingDataScores = listOfTreesToRunTestOn.AsParallel().Select(t => DecisionTreeScorer.ScoreWithTreeWithTestSet(t, trainingData.Values)).ToList();

            // Print the results to console
            foreach (var score in scores)
            {
                score.PrintTotalScore();
            }

            Console.WriteLine("Evaluating trees against training data:");
            foreach (var score in trainingDataScores)
            {
                score.PrintTotalScore();
            }

            Console.WriteLine("Press any key to quit...");
            Console.ReadKey();
        }