Example #1
0
        public void RegularizedLinearRegression_ArtificaialFunction()
        {
            // Given
            var splitter = new CrossValidator <double>();
            Func <IList <double>, double> scoreFunc = list => 0.3 + (0.5 * list[0]) + (-0.3 * list[1]) + (0.7 * list[2]);
            var allData =
                TestDataBuilder.BuildRandomAbstractNumericDataFrame(
                    scoreFunc,
                    featuresCount: 3,
                    min: 0,
                    max: 1,
                    rowCount: 1000);
            var subject   = new RegularizedLinearRegressionModelBuilder(0.5);
            var regParams = new LinearRegressionParams(0.05);

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: subject,
                modelBuilderParams: regParams,
                predictor: new LinearRegressionPredictor(),
                qualityMeasure: new GoodnessOfFitQualityMeasure(),
                dataFrame: allData,
                dependentFeatureName: "result",
                percetnagOfTrainData: 0.8,
                folds: 20);

            // Then
            Assert.IsTrue(accuracies.Select(acc => acc.Accuracy).Average() >= 0.9);
        }
Example #2
0
        public void DiscreteClassification_DiscreteFeatures_MultiValuesSplits_CongressVoting()
        {
            // Given
            var randomForestBuilder = new RandomForestModelBuilder <object>(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                new DecisionTreePredictor <object>(),
                new ConfusionMatrixBuilder <object>(),
                i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero),
                () => new DecisionTreeModelBuilderParams(false));
            var randomForestPredictor = new RandomForestPredictor <object>(new DecisionTreePredictor <object>(), true);
            var testData       = TestDataBuilder.ReadCongressData();
            var crossValidator = new CrossValidator <object>();


            // When
            var accuracy = crossValidator.CrossValidate(
                randomForestBuilder,
                new RandomForestParams(100, 10),
                randomForestPredictor,
                new ConfusionMatrixBuilder <object>(),
                testData,
                "party",
                0.7,
                1).First();

            // Then
            Assert.IsTrue(accuracy.Accuracy >= 0.9);
        }
Example #3
0
        static void Main(string[] args)
        {
            var cases = FileProvider.Load <BreastCancerData>("");
            var folds = CrossValidator.CreateFolds(cases, 5, true);

            var watch = Stopwatch.StartNew();

            for (var k = 1; k < 31; k++)
            {
                var k1     = k;
                var result = CrossValidator.ValidateInParallel <int>(folds, cases, ReasonerBuilder, PipelineBuilder);
                Console.WriteLine($"K: {k1}, AverageTime: {result.ValidationTime}, Accuracy: {result.Accuracy():0.####}");

                TransformerPipeline PipelineBuilder() =>
                new TransformerPipeline()
                .Add(new MinMaxNormalizer());

                Reasoner ReasonerBuilder()
                {
                    var cycle = new ReasoningCycle()
                                .AddRetriever(new LshRetriever(15, 3))
                                .AddRetriever(new SimilarityRetriever(new MinkowskiDistance(2), 35))
                                .SetReuser(new KnnReuser(k1));

                    return(new Reasoner(cycle));
                }
            }

            watch.Stop();
            Console.WriteLine($"TotalExecutionTime: {watch.ElapsedMilliseconds}, AverageFoldTime: {watch.ElapsedMilliseconds / 30d}");
            Console.ReadLine();
        }
Example #4
0
        static void Main(string[] args)
        {
            var dataFilePath = "Data/test_generated.data";

            var pipeline = new LearningPipeline()
            {
                new TextLoader(dataFilePath).CreateFrom <ReopenedIssueData>(),
                new TextFeaturizer(Columns.Environment, Columns.Environment),
                new TextFeaturizer(Columns.Type, Columns.Type),
                new TextFeaturizer(Columns.ProjectName, Columns.ProjectName),
                new TextFeaturizer(Columns.AsigneeEmail, Columns.AsigneeEmail),
                new TextFeaturizer(Columns.ReporterEmail, Columns.ReporterEmail),
                new ColumnConcatenator(
                    Columns.Features,
                    Columns.Environment,
                    Columns.Type,
                    Columns.CommentsCount,
                    Columns.CommentsLenght,
                    Columns.ReporterCommentsCount,
                    Columns.ProjectName,
                    Columns.AsigneeEmail,
                    Columns.ReporterEmail
                    ),
                new FastTreeBinaryClassifier()
            };

            //var predictionModel = pipeline.Train<ReopenedIssueData, ReopenedIssuePrediction>();

            var crossValidator = new CrossValidator()
            {
                // NumFolds = numOfFolds,
                Kind = MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer
            };
            var crossValidationResult = crossValidator.CrossValidate <ReopenedIssueData, ReopenedIssuePrediction>(pipeline);
        }
Example #5
0
        static void Main(string[] args)
        {
            var dataset     = MLNetUtilities.GetDataPathByDatasetName("SalaryData.csv");
            var testDataset = MLNetUtilities.GetDataPathByDatasetName("SalaryData-test.csv");

            var pipeline = new LearningPipeline
            {
                new TextLoader(dataset).CreateFrom <SalaryData>(useHeader: true, separator: ','),
                new ColumnConcatenator("Features", "YearsExperience"),
                new GeneralizedAdditiveModelRegressor()
            };

            var crossValidator = new CrossValidator()
            {
                Kind     = MacroUtilsTrainerKinds.SignatureRegressorTrainer,
                NumFolds = 5
            };
            var crossValidatorOutput = crossValidator.CrossValidate <SalaryData, SalaryPrediction>(pipeline);

            Console.Write(Environment.NewLine);
            Console.WriteLine("Root Mean Squared for each fold:");
            crossValidatorOutput.RegressionMetrics.ForEach(m => Console.WriteLine(m.Rms));

            var totalR2  = crossValidatorOutput.RegressionMetrics.Sum(metric => metric.RSquared);
            var totalRMS = crossValidatorOutput.RegressionMetrics.Sum(metric => metric.Rms);

            Console.Write(Environment.NewLine);
            Console.WriteLine($"Average R^2: {totalR2 / crossValidatorOutput.RegressionMetrics.Count}");
            Console.WriteLine($"Average RMS: {totalRMS / crossValidatorOutput.RegressionMetrics.Count}");

            Console.ReadLine();
        }
Example #6
0
        public static void CrossValidate()
        {
            // Define pipeline
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader("1_BinaryClassification/problem1.csv").CreateFrom <BeerOrWineData>(useHeader: true, separator: ','));

            pipeline.Add(new TextFeaturizer("Features", "FullName"));

            pipeline.Add(new Dictionarizer(("Type", "Label")));

            pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier()
            {
            });

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Cross validation
            var cv = new CrossValidator().CrossValidate <BeerOrWineData, BeerOrWinePrediction>(pipeline);

            // show matrix
        }
Example #7
0
        public void Mushroom_BinarySplit()
        {
            // Given
            var randomizer = new Random(3);
            var splitter   = new CrossValidator <string>(randomizer);
            var testData   = TestDataBuilder.ReadMushroomDataWithCategoricalAttributes();

            var predictor = new DecisionTreePredictor <string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: binaryTreeBuilder,
                modelBuilderParams: modelBuilderParams,
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder <string>(),
                dataFrame: testData,
                dependentFeatureName: "type",
                percetnagOfTrainData: 0.7,
                folds: 2);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.99);
        }
Example #8
0
        public void Regression_NumericAttrsAndOutcomesOnly_RegularizedRegression()
        {
            // Given
            var randomizer = new Random(3);
            var splitter   = new CrossValidator <double>(randomizer);
            var testData   = TestDataBuilder.ReadHousingDataNormalizedAttrs();

            var predictor = new DecisionTreePredictor <double>();

            var numericTreeBuilder = new BinaryDecisionTreeModelBuilder(
                new VarianceBasedSplitQualityChecker(),
                new BestSplitSelectorForNumericValues(new BinaryNumericDataSplitter()),
                new RegressionAndModelDecisionTreeLeafBuilder(new RegularizedLinearRegressionModelBuilder(0.005)));

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: numericTreeBuilder,
                modelBuilderParams: modelBuilderParams,
                predictor: predictor,
                qualityMeasure: new GoodnessOfFitQualityMeasure(),
                dataFrame: testData,
                dependentFeatureName: "MEDV",
                percetnagOfTrainData: 0.7,
                folds: 15);

            // Then
            var averegeRsquared = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averegeRsquared >= 0.6);
        }
Example #9
0
        public void DiscreteClassification_CategoricalFeatures_BinarySplits_ConvressVotingData_StatisticalSignificanceTest_CrossValidation()
        {
            // Given
            var randomizer = new Random(3);
            var splitter   = new CrossValidator <string>(randomizer);
            var testData   = TestDataBuilder.ReadCongressData() as DataFrame;

            var predictor = new DecisionTreePredictor <string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: this.BuildCustomModelBuilder(true, statisticalSignificanceChecker: new ChiSquareStatisticalSignificanceChecker()),
                modelBuilderParams: new DecisionTreeModelBuilderParams(false, true),
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder <string>(),
                dataFrame: testData,
                dependentFeatureName: "party",
                percetnagOfTrainData: 0.7,
                folds: 10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.9);
        }
Example #10
0
        public void Mushroom_MultiSplit_StatisticalSignificanceHeuristic()
        {
            // Given
            var randomizer = new Random(3);
            var splitter   = new CrossValidator <string>(randomizer);
            var testData   = TestDataBuilder.ReadMushroomDataWithCategoricalAttributes();

            var predictor = new DecisionTreePredictor <string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: this.BuildCustomModelBuilder(statisticalSignificanceChecker: new ChiSquareStatisticalSignificanceChecker()),
                modelBuilderParams: new DecisionTreeModelBuilderParams(false, true),
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder <string>(),
                dataFrame: testData,
                dependentFeatureName: "type",
                percetnagOfTrainData: 0.7,
                folds: 2);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.99);
        }
        public void RegularizedGradientDescent_ArtificialFunction()
        {
            // Given
            var splitter = new CrossValidator<double>();
            Func<IList<double>, double> scoreFunc = list => 0.3 + (0.5 * list[0]) + (-0.3 * list[1]) + (0.7 * list[2]);
            var allData =
                TestDataBuilder.BuildRandomAbstractNumericDataFrame(
                    scoreFunc,
                    featuresCount: 3,
                    min: 0,
                    max: 1,
                    rowCount: 1000);
            var subject = new RegularizedGradientDescentModelBuilder(0, 1);
            var regParams = new LinearRegressionParams(0.05);

            // When
            var accuracies = splitter.CrossValidate(
               modelBuilder: subject,
               modelBuilderParams: regParams,
               predictor: new LinearRegressionPredictor(),
               qualityMeasure: new GoodnessOfFitQualityMeasure(),
               dataFrame: allData,
               dependentFeatureName: "result",
               percetnagOfTrainData: 0.8,
               folds: 20);

            // Then
            Assert.IsTrue(accuracies.Select(acc => acc.Accuracy).Average() >= 0.9);
        }
        public void DiscreteClassification_DiscreteFeatures_MultiValuesSplits_CongressVoting()
        {
            // Given
            var randomForestBuilder = new RandomForestModelBuilder<object>(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                new DecisionTreePredictor<object>(),
                new ConfusionMatrixBuilder<object>(),
                i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero),
                () => new DecisionTreeModelBuilderParams(false));
            var randomForestPredictor = new RandomForestPredictor<object>(new DecisionTreePredictor<object>(), true);
            var testData = TestDataBuilder.ReadCongressData();
            var crossValidator = new CrossValidator<object>();

            // When
            var accuracy = crossValidator.CrossValidate(
                randomForestBuilder,
                new RandomForestParams(100, 10),
                randomForestPredictor,
                new ConfusionMatrixBuilder<object>(),
                testData,
                "party",
                0.7,
                1).First();

            // Then
            Assert.IsTrue(accuracy.Accuracy >= 0.9);
        }
Example #13
0
        void CrossValidation()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            var pipeline = new Legacy.LearningPipeline();
            var loader   = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);
            pipeline.Add(MakeSentimentTextTransform());
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var cv               = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline);
            var metrics          = cv.BinaryClassificationMetrics[0];
            var singlePrediction = cv.PredictorModels[0].Predict(new SentimentData()
            {
                SentimentText = "Not big fan of this."
            });

            Assert.True(singlePrediction.Sentiment);
        }
        private static void TestLinearRegressionUsingCrossValidation(FeatureVector training, FeatureVector test)
        {
            CrossValidator      cv          = new CrossValidator(new LinearRegression(), new BinaryClassificationEvaluator(), 10);
            CrossValidatorModel cvModel     = (CrossValidatorModel)cv.Fit(training);
            FeatureVector       predictions = cvModel.transform(test);

            PrintPredictionsAndEvaluate(predictions);
        }
        private static void TestLogisticRegressionUsingCrossValidation(FeatureVector training, FeatureVector test)
        {
            CrossValidator      cv      = new CrossValidator(new LogisticRegression(), new BinaryClassificationEvaluator(), 10);
            CrossValidatorModel cvModel = (CrossValidatorModel)cv.Fit(training);

            Console.WriteLine("10-fold cross validator accuracy: " + cv.Accuracy);
            FeatureVector predictions = cvModel.transform(test);

            PrintPredictionsAndEvaluate(predictions);
        }
        private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset,
                                 SentimentLabel label, SentimentLabel otherLabel1, SentimentLabel otherLabel2)
        {
            IModel <SentimentLabel, SparseVector <double> > model = CreateModel();

            var otherLabelWeight1 = (double)dataset.Count(le => le.Label == otherLabel1) / dataset.Count(le => le.Label != label);
            var otherLabelWeight2 = (double)dataset.Count(le => le.Label == otherLabel2) / dataset.Count(le => le.Label != label);

            dataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le =>
                                                                                                 new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == label ? label : otherLabel1, le.Example)));

            var scores       = new List <double>();
            var scoresOthers = new List <double>();
            var validation   = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = dataset,

                OnAfterPrediction = (sender, foldN, m, ex, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (prediction.BestClassLabel == label)
                        {
                            scores.Add(prediction.BestScore);
                        }
                        else
                        {
                            scoresOthers.Add(prediction.BestScore);
                        }
                    }
                    return(true);
                }
            };

            validation.Models.Add(model);
            validation.Run();

            // train model
            model.Train(dataset);

            return(new Model
            {
                InnerModel = model,
                Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1(),
                Label = label,
                OtherLabel1 = otherLabel1,
                OtherLabelWeight1 = otherLabelWeight1,
                OtherLabel2 = otherLabel2,
                OtherLabelWeight2 = otherLabelWeight2,
                Scores = scores.OrderBy(s => s).ToArray(),
                ScoresOthers = scoresOthers.OrderBy(s => s).ToArray()
            });
        }
Example #17
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation ...with the convenience class
            var validation = new CrossValidator <string, BinaryVector>
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                },
                OnAfterFold = (sender, foldN, trainSet, foldPredictions) =>
                {
                    PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN);
                    Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
                }
            };

            validation.Models.Add(new NaiveBayesClassifier <string>());
            validation.Run();

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
Example #18
0
        public void DiscreteClassification_CategoricalFeatures_MultiValuesSplits_CongressVotingData_CrossValidation()
        {
            // Given
            var randomizer = new Random();
            var splitter   = new CrossValidator <string>(randomizer);
            var testData   = TestDataBuilder.ReadCongressData() as DataFrame;

            var predictor = new DecisionTreePredictor <string>();

            // When
            var accuracies = splitter.CrossValidate(modelBuilder: this.multiValueTreeBuilder, modelBuilderParams: modelBuilderParams, predictor: predictor, qualityMeasure: new ConfusionMatrixBuilder <string>(), dataFrame: testData, dependentFeatureName: "party", percetnagOfTrainData: 0.7, folds: 10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.9);
        }
        private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset,
                                 SentimentLabel label1, SentimentLabel label2)
        {
            IModel <SentimentLabel, SparseVector <double> > model = CreateModel();
            var scores1 = new List <double>();
            var scores2 = new List <double>();

            var validation = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = dataset,

                OnAfterPrediction = (sender, foldN, m, ex, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (prediction.BestClassLabel == label1)
                        {
                            scores1.Add(prediction.BestScore);
                        }
                        else if (prediction.BestClassLabel == label2)
                        {
                            scores2.Add(prediction.BestScore);
                        }
                    }
                    return(true);
                }
            };

            validation.Models.Add(model);
            validation.Run();

            // train model
            model.Train(dataset);
            return(new Model
            {
                InnerModel = model,
                Label1 = label1,
                Label2 = label2,
                Scores1 = scores1.OrderBy(s => s).ToArray(),
                Scores2 = scores2.OrderBy(s => s).ToArray(),
                Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1()
            });
        }
        public void DiscreteClassification_CategoricalFeatures_BinarySplits_ConvressVotingData_CrossValidation()
        {
            // Given
            var randomizer = new Random(3);
            var splitter = new CrossValidator<string>(randomizer);
            var testData = TestDataBuilder.ReadCongressData() as DataFrame;

            var predictor = new DecisionTreePredictor<string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: binaryTreeBuilder,
                modelBuilderParams: modelBuilderParams,
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder<string>(),
                dataFrame: testData,
                dependentFeatureName: "party",
                percetnagOfTrainData: 0.7,
                folds: 10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.9);
        }
Example #21
0
        public void DiscreteClassification_NumericFeatures_MultiValuesSplits_AdultCensusData_CrossValidation()
        {
            // Given
            var splitter = new CrossValidator <object>();
            var testData = TestDataBuilder.ReadAdultCensusDataFrame();

            var predictor = new DecisionTreePredictor <object>();

            // When
            var accuracies = splitter.CrossValidate(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                modelBuilderParams,
                predictor,
                new ConfusionMatrixBuilder <object>(),
                testData,
                "income",
                0.7,
                5);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.8);
        }
Example #22
0
        public void DiscreteClassification_NumericFeatures_BinarySplits_IrisData_CrossValidation()
        {
            // Given
            var randomizer = new Random();
            var splitter   = new CrossValidator <object>();
            var testData   = TestDataBuilder.ReadIrisData();
            var predictor  = new DecisionTreePredictor <object>();

            // When
            var accuracies = splitter.CrossValidate(
                binaryTreeBuilder,
                modelBuilderParams,
                predictor,
                new ConfusionMatrixBuilder <object>(),
                testData,
                "iris_class",
                0.7,
                10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();

            Assert.IsTrue(averageAccuracy >= 0.9);
        }
Example #23
0
        public void CrossValidateSentimentModelTest()
        {
            string dataPath = GetDataPath(SentimentDataPath);
            var    pipeline = new LearningPipeline();

            pipeline.Add(new Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Runtime.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SentimentText",
                            Source = new [] { new TextLoaderRange(1) },
                            Type   = Runtime.Data.DataKind.Text
                        }
                    }
                }
            });

            pipeline.Add(new TextFeaturizer("Features", "SentimentText")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 2, AllLengths = true
                }
            });

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            IEnumerable <SentimentData> sentiments = new[]
            {
                new SentimentData
                {
                    SentimentText = "Please refrain from adding nonsense to Wikipedia."
                },
                new SentimentData
                {
                    SentimentText = "He is a CHEATER, and the article should say that."
                }
            };

            var cv = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline);

            //First two items are average and std. deviation of metrics from the folds.
            Assert.Equal(2, cv.PredictorModels.Count());
            Assert.Null(cv.ClassificationMetrics);
            Assert.Null(cv.RegressionMetrics);
            Assert.NotNull(cv.BinaryClassificationMetrics);
            Assert.Equal(4, cv.BinaryClassificationMetrics.Count());

            //Avergae of all folds.
            BinaryClassificationMetrics metrics = cv.BinaryClassificationMetrics[0];

            Assert.Equal(0.57023626091422708, metrics.Accuracy, 4);
            Assert.Equal(0.54960689910161487, metrics.Auc, 1);
            Assert.Equal(0.67048277219704255, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.68942642723130532, metrics.F1Score, 4);
            Assert.Equal(0.97695909611968434, metrics.LogLoss, 3);
            Assert.Equal(-3.050726259114541, metrics.LogLossReduction, 3);
            Assert.Equal(0.37553879310344829, metrics.NegativePrecision, 3);
            Assert.Equal(0.25683962264150945, metrics.NegativeRecall, 3);
            Assert.Equal(0.63428539173628362, metrics.PositivePrecision, 3);
            Assert.Equal(0.75795196364816619, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Std. Deviation.
            metrics = cv.BinaryClassificationMetrics[1];
            Assert.Equal(0.039933230611196011, metrics.Accuracy, 4);
            Assert.Equal(0.021066177821462407, metrics.Auc, 1);
            Assert.Equal(0.045842033921572725, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.030085767890644915, metrics.F1Score, 4);
            Assert.Equal(0.032906777175141941, metrics.LogLoss, 3);
            Assert.Equal(0.86311349745170118, metrics.LogLossReduction, 3);
            Assert.Equal(0.030711206896551647, metrics.NegativePrecision, 3);
            Assert.Equal(0.068160377358490579, metrics.NegativeRecall, 3);
            Assert.Equal(0.051761119891622735, metrics.PositivePrecision, 3);
            Assert.Equal(0.0015417072379052127, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Fold 1.
            metrics = cv.BinaryClassificationMetrics[2];
            Assert.Equal(0.53030303030303028, metrics.Accuracy, 4);
            Assert.Equal(0.52854072128015284, metrics.Auc, 1);
            Assert.Equal(0.62464073827546951, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.65934065934065933, metrics.F1Score, 4);
            Assert.Equal(1.0098658732948276, metrics.LogLoss, 3);
            Assert.Equal(-3.9138397565662424, metrics.LogLossReduction, 3);
            Assert.Equal(0.34482758620689657, metrics.NegativePrecision, 3);
            Assert.Equal(0.18867924528301888, metrics.NegativeRecall, 3);
            Assert.Equal(0.58252427184466016, metrics.PositivePrecision, 3);
            Assert.Equal(0.759493670886076, metrics.PositiveRecall);

            ConfusionMatrix matrix = metrics.ConfusionMatrix;

            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(60, matrix[0, 0]);
            Assert.Equal(60, matrix["positive", "positive"]);
            Assert.Equal(19, matrix[0, 1]);
            Assert.Equal(19, matrix["positive", "negative"]);

            Assert.Equal(43, matrix[1, 0]);
            Assert.Equal(43, matrix["negative", "positive"]);
            Assert.Equal(10, matrix[1, 1]);
            Assert.Equal(10, matrix["negative", "negative"]);

            //Fold 2.
            metrics = cv.BinaryClassificationMetrics[3];
            Assert.Equal(0.61016949152542377, metrics.Accuracy, 4);
            Assert.Equal(0.57067307692307689, metrics.Auc, 1);
            Assert.Equal(0.71632480611861549, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.71951219512195119, metrics.F1Score, 4);
            Assert.Equal(0.94405231894454111, metrics.LogLoss, 3);
            Assert.Equal(-2.1876127616628396, metrics.LogLossReduction, 3);
            Assert.Equal(0.40625, metrics.NegativePrecision, 3);
            Assert.Equal(0.325, metrics.NegativeRecall, 3);
            Assert.Equal(0.686046511627907, metrics.PositivePrecision, 3);
            Assert.Equal(0.75641025641025639, metrics.PositiveRecall);

            matrix = metrics.ConfusionMatrix;
            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(59, matrix[0, 0]);
            Assert.Equal(59, matrix["positive", "positive"]);
            Assert.Equal(19, matrix[0, 1]);
            Assert.Equal(19, matrix["positive", "negative"]);

            Assert.Equal(27, matrix[1, 0]);
            Assert.Equal(27, matrix["negative", "positive"]);
            Assert.Equal(13, matrix[1, 1]);
            Assert.Equal(13, matrix["negative", "negative"]);

            IEnumerable <SentimentPrediction> predictions = cv.PredictorModels[0].Predict(sentiments);

            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);

            predictions = cv.PredictorModels[1].Predict(sentiments);
            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
        }
Example #24
0
        public void DiscreteClassification_MixedFeatures_MultiValueSplits_CleanedTitanicData()
        {
            // Given
            var randomForestBuilder = new RandomForestModelBuilder <object>(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                new DecisionTreePredictor <object>(),
                new ConfusionMatrixBuilder <object>(),
                i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero),
                () => new DecisionTreeModelBuilderParams(false, true));
            var randomForestPredictor = new RandomForestPredictor <object>(new DecisionTreePredictor <object>());
            var baseData = TestDataBuilder.ReadTitanicData();

            baseData = baseData.GetSubsetByColumns(baseData.ColumnNames.Except(new[] { "FarePerPerson", "PassengerId", "FamilySize" }).ToList());
            var crossValidator = new CrossValidator <object>();

            // When
            var accuracy = crossValidator.CrossValidate(
                randomForestBuilder,
                new RandomForestParams(200, 10),
                randomForestPredictor,
                new ConfusionMatrixBuilder <object>(),
                baseData,
                "Survived",
                0.75,
                1);

            // Then
            Assert.IsTrue(accuracy.Select(acc => acc.Accuracy).Average() >= 0.75);

            /*
             * var qualityMeasure = new ConfusionMatrixBuilder<object>();
             * IPredictionModel bestModel = null;
             * double accuracy = Double.NegativeInfinity;
             * var percetnagOfTrainData = 0.8;
             *
             * var trainingDataCount = (int)Math.Round(percetnagOfTrainData * baseData.RowCount);
             * var testDataCount = baseData.RowCount - trainingDataCount;
             * for (var i = 0; i < 10; i++)
             * {
             *  var shuffledAllIndices = baseData.RowIndices.Shuffle(new Random());
             *  var trainingIndices = shuffledAllIndices.Take(trainingDataCount).ToList();
             *  var trainingData = baseData.GetSubsetByRows(trainingIndices);
             *
             *  var testIndices = shuffledAllIndices.Except(trainingIndices).ToList();
             *  var testData = baseData.GetSubsetByRows(testIndices);
             *  IPredictionModel model = randomForestBuilder.BuildModel(trainingData, "Survived", new RandomForestParams(250, 10));
             *  IList<object> evalPredictions = randomForestPredictor.Predict(testData, model, "Survived");
             *  IList<object> expected = testData.GetColumnVector<object>("Survived");
             *  IDataQualityReport<object> qualityReport = qualityMeasure.GetReport(expected, evalPredictions);
             *  if (qualityReport.Accuracy > accuracy)
             *  {
             *      accuracy = qualityReport.Accuracy;
             *      bestModel = model;
             *  }
             * }
             *
             * var queryData = TestDataBuilder.ReadTitanicQuery();
             * var predictions = randomForestPredictor.Predict(queryData, bestModel, "Survived").Select(elem => (double)Convert.ChangeType(elem, typeof(double))).ToList();
             * var passengerIds = queryData.GetNumericColumnVector("PassengerId");
             *
             * var matrix = Matrix.Build.DenseOfColumns(new List<IEnumerable<double>>() { passengerIds, predictions });
             * DelimitedWriter.Write(@"c:\Users\Filip\Downloads\prediction.csv", matrix, ",");
             * Assert.IsTrue(true);
             */
        }
        public void CrossValidateSentimentModelTest()
        {
            var pipeline = PreparePipeline();

            var cv = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline);

            //First two items are average and std. deviation of metrics from the folds.
            Assert.Equal(2, cv.PredictorModels.Count());
            Assert.Null(cv.ClassificationMetrics);
            Assert.Null(cv.RegressionMetrics);
            Assert.NotNull(cv.BinaryClassificationMetrics);
            Assert.Equal(4, cv.BinaryClassificationMetrics.Count());

            //Avergae of all folds.
            var metrics = cv.BinaryClassificationMetrics[0];

            Assert.Equal(0.603235747303544, metrics.Accuracy, 4);
            Assert.Equal(0.58811318075483943, metrics.Auc, 4);
            Assert.Equal(0.70302385499183984, metrics.Auprc, 4);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.71751777634130576, metrics.F1Score, 4);
            Assert.Equal(0.95263103280238037, metrics.LogLoss, 4);
            Assert.Equal(-0.39971801589876232, metrics.LogLossReduction, 4);
            Assert.Equal(0.43965517241379309, metrics.NegativePrecision, 4);
            Assert.Equal(0.26627358490566039, metrics.NegativeRecall, 4);
            Assert.Equal(0.64937737441958632, metrics.PositivePrecision, 4);
            Assert.Equal(0.8027426160337553, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Std. Deviation.
            metrics = cv.BinaryClassificationMetrics[1];
            Assert.Equal(0.057781201848998764, metrics.Accuracy, 4);
            Assert.Equal(0.04249579360413544, metrics.Auc, 4);
            Assert.Equal(0.086083866074815427, metrics.Auprc, 4);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.04718810601163604, metrics.F1Score, 4);
            Assert.Equal(0.063839715206238851, metrics.LogLoss, 4);
            Assert.Equal(4.1937544629633878, metrics.LogLossReduction, 4);
            Assert.Equal(0.060344827586206781, metrics.NegativePrecision, 4);
            Assert.Equal(0.058726415094339748, metrics.NegativeRecall, 4);
            Assert.Equal(0.057144364710848418, metrics.PositivePrecision, 4);
            Assert.Equal(0.030590717299577637, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Fold 1.
            metrics = cv.BinaryClassificationMetrics[2];
            Assert.Equal(0.54545454545454541, metrics.Accuracy, 4);
            Assert.Equal(0.54561738715070451, metrics.Auc, 4);
            Assert.Equal(0.61693998891702417, metrics.Auprc, 4);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.67032967032967028, metrics.F1Score, 4);
            Assert.Equal(1.0164707480086188, metrics.LogLoss, 4);
            Assert.Equal(-4.59347247886215, metrics.LogLossReduction, 4);
            Assert.Equal(0.37931034482758619, metrics.NegativePrecision, 4);
            Assert.Equal(0.20754716981132076, metrics.NegativeRecall, 4);
            Assert.Equal(0.59223300970873782, metrics.PositivePrecision, 4);
            Assert.Equal(0.77215189873417722, metrics.PositiveRecall);

            var matrix = metrics.ConfusionMatrix;

            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(61, matrix[0, 0]);
            Assert.Equal(61, matrix["positive", "positive"]);
            Assert.Equal(18, matrix[0, 1]);
            Assert.Equal(18, matrix["positive", "negative"]);

            Assert.Equal(42, matrix[1, 0]);
            Assert.Equal(42, matrix["negative", "positive"]);
            Assert.Equal(11, matrix[1, 1]);
            Assert.Equal(11, matrix["negative", "negative"]);

            //Fold 2.
            metrics = cv.BinaryClassificationMetrics[3];
            Assert.Equal(0.66101694915254239, metrics.Accuracy, 4);
            Assert.Equal(0.63060897435897434, metrics.Auc, 4);
            Assert.Equal(0.7891077210666555, metrics.Auprc, 4);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.76470588235294124, metrics.F1Score, 4);
            Assert.Equal(0.88879131759614194, metrics.LogLoss, 4);
            Assert.Equal(3.7940364470646255, metrics.LogLossReduction, 4);
            Assert.Equal(0.5, metrics.NegativePrecision, 3);
            Assert.Equal(0.325, metrics.NegativeRecall, 3);
            Assert.Equal(0.70652173913043481, metrics.PositivePrecision, 4);
            Assert.Equal(0.83333333333333337, metrics.PositiveRecall);

            matrix = metrics.ConfusionMatrix;
            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(65, matrix[0, 0]);
            Assert.Equal(65, matrix["positive", "positive"]);
            Assert.Equal(13, matrix[0, 1]);
            Assert.Equal(13, matrix["positive", "negative"]);

            Assert.Equal(27, matrix[1, 0]);
            Assert.Equal(27, matrix["negative", "positive"]);
            Assert.Equal(13, matrix[1, 1]);
            Assert.Equal(13, matrix["negative", "negative"]);

            var sentiments  = GetTestData();
            var predictions = cv.PredictorModels[0].Predict(sentiments);

            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);

            predictions = cv.PredictorModels[1].Predict(sentiments);
            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
        }
        private static double CalculateAccuracy(List <int> indicators, int mlAlgorithm, bool isCrossValidationEnabled, int minRowCount, double trainingSetPercentage, double[] smaOut, double[] wmaOut, double[] emaOut, double[] macdOut, double[] rsiOut, double[] williamsROut, double[] stochasticsOut, double[] closesOut)
        {
            FeatureVector vector = new FeatureVector();

            if (indicators.Contains(IndicatorService.SMA))
            {
                vector.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WMA))
            {
                vector.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.EMA))
            {
                vector.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.MACD))
            {
                vector.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.RSI))
            {
                vector.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WilliamsR))
            {
                vector.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.Stochastics))
            {
                vector.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            vector.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            new CSVExporter(vector).Export("c:\\users\\yasin\\indicatorOutput.csv");
            int           count    = vector.Values[0].Length;
            FeatureVector training = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                training.AddColumn(vector.ColumnName[i], vector.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            FeatureVector test = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                test.AddColumn(vector.ColumnName[i], vector.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray());
            }

            double accuracy = 0;

            if (mlAlgorithm == MLAService.LIN_REG)
            {
                var linReg = new LinearRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(linReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var linRegModel = (LinearRegressionModel)linReg.Fit(training);
                    var predictions = linRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.LOG_REG)
            {
                var logReg = new LogisticRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(logReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var logRegModel = (LogisticRegressionModel)logReg.Fit(training);
                    var predictions = logRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.NAI_BAY)
            {
                var naiBay = new NaiveBayes();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(naiBay, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var naiBayModel = (NaiveBayesModel)naiBay.Fit(training);
                    var predictions = naiBayModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            return(accuracy);
        }
        public void Mushroom_MultiSplit()
        {
            // Given
            var randomizer = new Random(3);
            var splitter = new CrossValidator<string>(randomizer);
            var testData = TestDataBuilder.ReadMushroomDataWithCategoricalAttributes();

            var predictor = new DecisionTreePredictor<string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: multiValueTreeBuilder,
                modelBuilderParams: modelBuilderParams,
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder<string>(),
                dataFrame: testData,
                dependentFeatureName: "type",
                percetnagOfTrainData: 0.7,
                folds: 2);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.99);
        }
        public void DiscreteClassification_NumericFeatures_MultiValuesSplits_AdultCensusData_CrossValidation()
        {
            // Given
            var splitter = new CrossValidator<object>();
            var testData = TestDataBuilder.ReadAdultCensusDataFrame();

            var predictor = new DecisionTreePredictor<object>();

            // When
            var accuracies = splitter.CrossValidate(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                modelBuilderParams,
                predictor,
                new ConfusionMatrixBuilder<object>(),
                testData,
                "income",
                0.7,
                5);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.8);
        }
        public void DiscreteClassification_NumericFeatures_BinarySplits_IrisData_CrossValidation()
        {
            // Given
            var randomizer = new Random();
            var splitter = new CrossValidator<object>();
            var testData = TestDataBuilder.ReadIrisData();
            var predictor = new DecisionTreePredictor<object>();

            // When
            var accuracies = splitter.CrossValidate(
                binaryTreeBuilder,
                modelBuilderParams,
                predictor,
                new ConfusionMatrixBuilder<object>(),
                testData,
                "iris_class",
                0.7,
                10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.9);
        }
        public void DiscreteClassification_CategoricalFeatures_MultiValuesSplits_CongressVotingData_StatisticalSignificanceHeuristic_CrossValidation()
        {
            // Given
            var randomizer = new Random();
            var splitter = new CrossValidator<string>(randomizer);
            var testData = TestDataBuilder.ReadCongressData() as DataFrame;

            var predictor = new DecisionTreePredictor<string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: this.BuildCustomModelBuilder(true, statisticalSignificanceChecker: new ChiSquareStatisticalSignificanceChecker(0.05)),
                modelBuilderParams: new DecisionTreeModelBuilderParams(false, true),
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder<string>(),
                dataFrame: testData,
                dependentFeatureName: "party",
                percetnagOfTrainData: 0.7,
                folds: 10);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.9);
        }
Example #31
0
        public override void Train(ILabeledExampleCollection <SentimentLabel, SparseVector <double> > dataset)
        {
            Preconditions.CheckNotNull(dataset);
            Preconditions.CheckArgumentRange(IsCalcBounds || NegCentile >= 0 && NegCentile <= 1);
            Preconditions.CheckArgumentRange(IsCalcBounds || PosCentile >= 0 && PosCentile <= 1);

            var labeledDataset = (LabeledDataset <SentimentLabel, SparseVector <double> >)dataset;

            if (labeledDataset.Count == 0)
            {
                Console.WriteLine("empty dataset");
            }

            TrainStats = null;

            var posScores      = new List <double>();
            var negScores      = new List <double>();
            var neutralScores  = new List <double>();
            var trainDataset   = new LabeledDataset <SentimentLabel, SparseVector <double> >(labeledDataset.Where(le => le.Label != SentimentLabel.Neutral));
            var neutralDataset = IsCalcStats || IsCalcBounds
                ? new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Where(le => le.Label == SentimentLabel.Neutral))
                : null;

            var validation = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = trainDataset,

                OnAfterPrediction = (sender, foldN, model, example, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (le.Label == SentimentLabel.Positive)
                        {
                            posScores.Add(prediction.BestScore);
                        }
                        else
                        {
                            negScores.Add(-prediction.BestScore);
                        }
                    }
                    return(true);
                },

                OnAfterFold = (sender, foldN, trainSet, testSet) =>
                {
                    if (IsCalcStats || IsCalcBounds)
                    {
                        neutralScores.AddRange(neutralDataset
                                               .Select(le => sender.Models[0].Predict(le.Example))
                                               .Select(p => p.BestClassLabel == SentimentLabel.Positive ? p.BestScore : -p.BestScore));
                    }
                }
            };

            validation.Models.Add(CreateModel());
            validation.Run();

            if (IsCalcBounds)
            {
                double negMaxProb, negScore;
                NegBound = FindMaxExclusiveProbability(neutralScores.Where(s => s < 0).Select(s => - s),
                                                       negScores.Select(s => - s), out negMaxProb, out negScore) ? -negScore : 0;

                double posMaxProb, posScore;
                PosBound = FindMaxExclusiveProbability(neutralScores.Where(s => s > 0),
                                                       posScores, out posMaxProb, out posScore) ? posScore : 0;
            }
            else
            {
                if (NegCentile != null)
                {
                    NegBound = negScores.OrderByDescending(bs => bs).Skip((int)Math.Truncate(negScores.Count * NegCentile.Value)).FirstOrDefault();
                }
                if (PosCentile != null)
                {
                    PosBound = posScores.OrderBy(bs => bs).Skip((int)Math.Truncate(posScores.Count * PosCentile.Value)).FirstOrDefault();
                }
            }

            if (IsCalcStats)
            {
                TrainStats = CalcStats(negScores, neutralScores, posScores);
            }

            mBinaryClassifier = validation.Models[0];
            mBinaryClassifier.Train(trainDataset);

            IsTrained = true;
        }
        public void Mushroom_MultiSplit_StatisticalSignificanceHeuristic()
        {
            // Given
            var randomizer = new Random(3);
            var splitter = new CrossValidator<string>(randomizer);
            var testData = TestDataBuilder.ReadMushroomDataWithCategoricalAttributes();

            var predictor = new DecisionTreePredictor<string>();

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: this.BuildCustomModelBuilder(statisticalSignificanceChecker: new ChiSquareStatisticalSignificanceChecker()),
                modelBuilderParams: new DecisionTreeModelBuilderParams(false, true),
                predictor: predictor,
                qualityMeasure: new ConfusionMatrixBuilder<string>(),
                dataFrame: testData,
                dependentFeatureName: "type",
                percetnagOfTrainData: 0.7,
                folds: 2);

            // Then
            var averageAccuracy = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averageAccuracy >= 0.99);
        }
        public void Regression_NumericAttrsAndOutcomesOnly_RegularizedRegression()
        {
            // Given
            var randomizer = new Random(3);
            var splitter = new CrossValidator<double>(randomizer);
            var testData = TestDataBuilder.ReadHousingDataNormalizedAttrs();

            var predictor = new DecisionTreePredictor<double>();

            var numericTreeBuilder = new BinaryDecisionTreeModelBuilder(
                new VarianceBasedSplitQualityChecker(),
                new BestSplitSelectorForNumericValues(new BinaryNumericDataSplitter()),
                new RegressionAndModelDecisionTreeLeafBuilder(new RegularizedLinearRegressionModelBuilder(0.005)));

            // When
            var accuracies = splitter.CrossValidate(
                modelBuilder: numericTreeBuilder,
                modelBuilderParams: modelBuilderParams,
                predictor: predictor,
                qualityMeasure: new GoodnessOfFitQualityMeasure(),
                dataFrame: testData,
                dependentFeatureName: "MEDV",
                percetnagOfTrainData: 0.7,
                folds: 15);

            // Then
            var averegeRsquared = accuracies.Select(report => report.Accuracy).Average();
            Assert.IsTrue(averegeRsquared >= 0.6);
        }
Example #34
0
        private void buttonForDataSplitNext_Click(object sender, EventArgs e)
        {
            trainingSetPercentage = (double)numericUpDownForTrainingSetPercent.Value / 100.0;
            numFolds = (int)numericUpDownForNumFolds.Value;

            double[] smaOut         = null;
            double[] wmaOut         = null;
            double[] emaOut         = null;
            double[] macdOut        = null;
            double[] stochasticsOut = null;
            double[] williamsROut   = null;
            double[] rsiOut         = null;
            double[] closesOut      = null;

            var data = IndicatorService.GetData(code, targetDate, new string[] { "Tarih", "Kapanis" }, numberOfData + 1);

            if (isSMAChecked)
            {
                smaOut = IndicatorDataPreprocessor.GetSMAOut(MovingAverage.Simple(code, targetDate, smaPeriod, numberOfData));
            }
            if (isWMAChecked)
            {
                wmaOut = IndicatorDataPreprocessor.GetWMAOut(MovingAverage.Weighted(code, targetDate, wmaPeriod, numberOfData));
            }
            if (isEMAChecked)
            {
                emaOut = IndicatorDataPreprocessor.GetEMAOut(MovingAverage.Exponential(code, targetDate, emaPeriod, numberOfData));
            }
            if (isMACDChecked)
            {
                macdOut = IndicatorDataPreprocessor.GetMACDOut(new MovingAverageConvergenceDivergence(code, targetDate, firstPeriod, secondPeriod, triggerPeriod, numberOfData));
            }
            if (isStochasticsChecked)
            {
                stochasticsOut = IndicatorDataPreprocessor.GetStochasticsOut(new Stochastics(code, targetDate, fastKPeriod, fastDPeriod, slowDPeriod, numberOfData));
            }
            if (isWilliamsRChecked)
            {
                williamsROut = IndicatorDataPreprocessor.GetWilliamsROut(WilliamsR.Wsr(code, targetDate, williamsRPeriod, numberOfData));
            }
            if (isRSIChecked)
            {
                rsiOut = IndicatorDataPreprocessor.GetRSIOut(RelativeStrengthIndex.Rsi(code, targetDate, rsiPeriod, numberOfData));
            }
            closesOut = IndicatorDataPreprocessor.GetClosesOut(numberOfData, data);

            int minRowCount = 1000000;

            if (smaOut != null)
            {
                minRowCount = smaOut.Length;
            }
            if (wmaOut != null)
            {
                minRowCount = minRowCount < wmaOut.Length ? minRowCount : wmaOut.Length;
            }
            if (emaOut != null)
            {
                minRowCount = minRowCount < emaOut.Length ? minRowCount : emaOut.Length;
            }
            if (macdOut != null)
            {
                minRowCount = minRowCount < macdOut.Length ? minRowCount : macdOut.Length;
            }
            if (rsiOut != null)
            {
                minRowCount = minRowCount < rsiOut.Length ? minRowCount : rsiOut.Length;
            }
            if (williamsROut != null)
            {
                minRowCount = minRowCount < williamsROut.Length ? minRowCount : williamsROut.Length;
            }
            if (stochasticsOut != null)
            {
                minRowCount = minRowCount < stochasticsOut.Length ? minRowCount : stochasticsOut.Length;
            }
            if (closesOut != null)
            {
                minRowCount = minRowCount < closesOut.Length ? minRowCount : closesOut.Length;
            }

            var fv = new FeatureVector();

            if (isSMAChecked)
            {
                fv.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWMAChecked)
            {
                fv.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isEMAChecked)
            {
                fv.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isMACDChecked)
            {
                fv.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isRSIChecked)
            {
                fv.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWilliamsRChecked)
            {
                fv.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isStochasticsChecked)
            {
                fv.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            fv.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            var training = new FeatureVector();
            var test     = new FeatureVector();
            int count    = fv.Values[0].Length;

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                training.AddColumn(fv.ColumnName[i], fv.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                test.AddColumn(fv.ColumnName[i], fv.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray()); // Take(count) means take the rest of all elements, number of the rest of the elements is smaller than count.
            }

            if (numFolds > 0)
            {
                BinaryClassificationEvaluator bce1    = new BinaryClassificationEvaluator();
                LinearRegression    linearRegression  = new LinearRegression();
                CrossValidator      cvLinReg          = new CrossValidator(linearRegression, bce1, numFolds);
                CrossValidatorModel cvLinRegModel     = (CrossValidatorModel)cvLinReg.Fit(training);
                FeatureVector       linRegPredictions = cvLinRegModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logisticRegression = new LogisticRegression();
                CrossValidator      cvLogReg          = new CrossValidator(logisticRegression, bce2, numFolds);
                CrossValidatorModel cvLogRegModel     = (CrossValidatorModel)cvLogReg.Fit(training);
                FeatureVector       logRegPredictions = cvLogRegModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3    = new BinaryClassificationEvaluator();
                NaiveBayes          naiveBayes        = new NaiveBayes();
                CrossValidator      cvNaiBay          = new CrossValidator(naiveBayes, bce3, numFolds);
                CrossValidatorModel cvNaiBayModel     = (CrossValidatorModel)cvNaiBay.Fit(training);
                FeatureVector       naiBayPredictions = cvNaiBayModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }
            else
            {
                BinaryClassificationEvaluator bce1          = new BinaryClassificationEvaluator();
                LinearRegression      linearRegression      = new LinearRegression();
                LinearRegressionModel linearRegressionModel = (LinearRegressionModel)linearRegression.Fit(training);
                FeatureVector         linRegPredictions     = linearRegressionModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logicticRegression      = new LogisticRegression();
                LogisticRegressionModel       logisticRegressionModel = (LogisticRegressionModel)logicticRegression.Fit(training);
                FeatureVector logRegPredictions = logisticRegressionModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3 = new BinaryClassificationEvaluator();
                NaiveBayes      naiveBayes         = new NaiveBayes();
                NaiveBayesModel naiveBayesModel    = (NaiveBayesModel)naiveBayes.Fit(training);
                FeatureVector   naiBayPredictions  = naiveBayesModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }

            labelForLinRegAcc.Text = linRegAcc.ToString();
            labelForLogRegAcc.Text = logRegAcc.ToString();
            labelForNaiBayAcc.Text = naiBayAcc.ToString();

            panelForResults.BringToFront();
        }
        public void DiscreteClassification_MixedFeatures_MultiValueSplits_CleanedTitanicData()
        {
            // Given
            var randomForestBuilder = new RandomForestModelBuilder<object>(
                multiValueTreeBuilderWithBetterNumercValsHandler,
                new DecisionTreePredictor<object>(),
                new ConfusionMatrixBuilder<object>(),
                i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero),
                () => new DecisionTreeModelBuilderParams(false, true));
            var randomForestPredictor = new RandomForestPredictor<object>(new DecisionTreePredictor<object>());
            var baseData = TestDataBuilder.ReadTitanicData();
            baseData = baseData.GetSubsetByColumns(baseData.ColumnNames.Except(new[] { "FarePerPerson", "PassengerId", "FamilySize" }).ToList());
            var crossValidator = new CrossValidator<object>();

            // When
            var accuracy = crossValidator.CrossValidate(
                randomForestBuilder,
                new RandomForestParams(200, 10),
                randomForestPredictor,
                new ConfusionMatrixBuilder<object>(),
                baseData,
                "Survived",
                0.75,
                1);

            // Then
            Assert.IsTrue(accuracy.Select(acc => acc.Accuracy).Average() >= 0.75);

            /*
            var qualityMeasure = new ConfusionMatrixBuilder<object>();
            IPredictionModel bestModel = null;
            double accuracy = Double.NegativeInfinity;
            var percetnagOfTrainData = 0.8;

            var trainingDataCount = (int)Math.Round(percetnagOfTrainData * baseData.RowCount);
            var testDataCount = baseData.RowCount - trainingDataCount;
            for (var i = 0; i < 10; i++)
            {
                var shuffledAllIndices = baseData.RowIndices.Shuffle(new Random());
                var trainingIndices = shuffledAllIndices.Take(trainingDataCount).ToList();
                var trainingData = baseData.GetSubsetByRows(trainingIndices);

                var testIndices = shuffledAllIndices.Except(trainingIndices).ToList();
                var testData = baseData.GetSubsetByRows(testIndices);
                IPredictionModel model = randomForestBuilder.BuildModel(trainingData, "Survived", new RandomForestParams(250, 10));
                IList<object> evalPredictions = randomForestPredictor.Predict(testData, model, "Survived");
                IList<object> expected = testData.GetColumnVector<object>("Survived");
                IDataQualityReport<object> qualityReport = qualityMeasure.GetReport(expected, evalPredictions);
                if (qualityReport.Accuracy > accuracy)
                {
                    accuracy = qualityReport.Accuracy;
                    bestModel = model;
                }
            }

            var queryData = TestDataBuilder.ReadTitanicQuery();
            var predictions = randomForestPredictor.Predict(queryData, bestModel, "Survived").Select(elem => (double)Convert.ChangeType(elem, typeof(double))).ToList();
            var passengerIds = queryData.GetNumericColumnVector("PassengerId");

            var matrix = Matrix.Build.DenseOfColumns(new List<IEnumerable<double>>() { passengerIds, predictions });
            DelimitedWriter.Write(@"c:\Users\Filip\Downloads\prediction.csv", matrix, ",");
            Assert.IsTrue(true);
            */
        }
Example #36
0
        public void CrossValidateSentimentModelTest()
        {
            var pipeline = PreparePipeline();

            var cv = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline);

            //First two items are average and std. deviation of metrics from the folds.
            Assert.Equal(2, cv.PredictorModels.Count());
            Assert.Null(cv.ClassificationMetrics);
            Assert.Null(cv.RegressionMetrics);
            Assert.NotNull(cv.BinaryClassificationMetrics);
            Assert.Equal(4, cv.BinaryClassificationMetrics.Count());

            //Avergae of all folds.
            var metrics = cv.BinaryClassificationMetrics[0];

            Assert.Equal(0.57023626091422708, metrics.Accuracy, 4);
            Assert.Equal(0.54960689910161487, metrics.Auc, 1);
            Assert.Equal(0.67048277219704255, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.68942642723130532, metrics.F1Score, 4);
            Assert.Equal(0.97695909611968434, metrics.LogLoss, 3);
            Assert.Equal(-3.050726259114541, metrics.LogLossReduction, 3);
            Assert.Equal(0.37553879310344829, metrics.NegativePrecision, 3);
            Assert.Equal(0.25683962264150945, metrics.NegativeRecall, 3);
            Assert.Equal(0.63428539173628362, metrics.PositivePrecision, 3);
            Assert.Equal(0.75795196364816619, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Std. Deviation.
            metrics = cv.BinaryClassificationMetrics[1];
            Assert.Equal(0.039933230611196011, metrics.Accuracy, 4);
            Assert.Equal(0.021066177821462407, metrics.Auc, 1);
            Assert.Equal(0.045842033921572725, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.030085767890644915, metrics.F1Score, 4);
            Assert.Equal(0.032906777175141941, metrics.LogLoss, 3);
            Assert.Equal(0.86311349745170118, metrics.LogLossReduction, 3);
            Assert.Equal(0.030711206896551647, metrics.NegativePrecision, 3);
            Assert.Equal(0.068160377358490579, metrics.NegativeRecall, 3);
            Assert.Equal(0.051761119891622735, metrics.PositivePrecision, 3);
            Assert.Equal(0.0015417072379052127, metrics.PositiveRecall);
            Assert.Null(metrics.ConfusionMatrix);

            //Fold 1.
            metrics = cv.BinaryClassificationMetrics[2];
            Assert.Equal(0.53030303030303028, metrics.Accuracy, 4);
            Assert.Equal(0.52854072128015284, metrics.Auc, 1);
            Assert.Equal(0.62464073827546951, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.65934065934065933, metrics.F1Score, 4);
            Assert.Equal(1.0098658732948276, metrics.LogLoss, 3);
            Assert.Equal(-3.9138397565662424, metrics.LogLossReduction, 3);
            Assert.Equal(0.34482758620689657, metrics.NegativePrecision, 3);
            Assert.Equal(0.18867924528301888, metrics.NegativeRecall, 3);
            Assert.Equal(0.58252427184466016, metrics.PositivePrecision, 3);
            Assert.Equal(0.759493670886076, metrics.PositiveRecall);

            var matrix = metrics.ConfusionMatrix;

            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(60, matrix[0, 0]);
            Assert.Equal(60, matrix["positive", "positive"]);
            Assert.Equal(19, matrix[0, 1]);
            Assert.Equal(19, matrix["positive", "negative"]);

            Assert.Equal(43, matrix[1, 0]);
            Assert.Equal(43, matrix["negative", "positive"]);
            Assert.Equal(10, matrix[1, 1]);
            Assert.Equal(10, matrix["negative", "negative"]);

            //Fold 2.
            metrics = cv.BinaryClassificationMetrics[3];
            Assert.Equal(0.61016949152542377, metrics.Accuracy, 4);
            Assert.Equal(0.57067307692307689, metrics.Auc, 1);
            Assert.Equal(0.71632480611861549, metrics.Auprc, 2);
            Assert.Equal(0, metrics.Entropy, 3);
            Assert.Equal(0.71951219512195119, metrics.F1Score, 4);
            Assert.Equal(0.94405231894454111, metrics.LogLoss, 3);
            Assert.Equal(-2.1876127616628396, metrics.LogLossReduction, 3);
            Assert.Equal(0.40625, metrics.NegativePrecision, 3);
            Assert.Equal(0.325, metrics.NegativeRecall, 3);
            Assert.Equal(0.686046511627907, metrics.PositivePrecision, 3);
            Assert.Equal(0.75641025641025639, metrics.PositiveRecall);

            matrix = metrics.ConfusionMatrix;
            Assert.Equal(2, matrix.Order);
            Assert.Equal(2, matrix.ClassNames.Count);
            Assert.Equal("positive", matrix.ClassNames[0]);
            Assert.Equal("negative", matrix.ClassNames[1]);

            Assert.Equal(59, matrix[0, 0]);
            Assert.Equal(59, matrix["positive", "positive"]);
            Assert.Equal(19, matrix[0, 1]);
            Assert.Equal(19, matrix["positive", "negative"]);

            Assert.Equal(27, matrix[1, 0]);
            Assert.Equal(27, matrix["negative", "positive"]);
            Assert.Equal(13, matrix[1, 1]);
            Assert.Equal(13, matrix["negative", "negative"]);

            var sentiments  = GetTestData();
            var predictions = cv.PredictorModels[0].Predict(sentiments);

            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);

            predictions = cv.PredictorModels[1].Predict(sentiments);
            Assert.Equal(2, predictions.Count());
            Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
            Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
        }
Example #37
0
        static void Main(string[] args)
        {
            //Set the path of the file containing the data set
            //string dataFilePath = @"C:\Users\kevin\Desktop\squaredtest.csv"; NutrioxDataset
            string dataFilePath = @"C:\Users\Bruker\Desktop\NutrioxDataset.csv";

            //string dataFilePath = @"C:\Users\Bruker\Desktop\-5to5-200Rows.csv";

            //Create a new data set
            DataSet.DataSet dataSet = new DataSet.DataSet(dataFilePath, true);

            //Apply desired data preprocessing to the data set
            dataSet.PreProcessDataSet(NormalizationType.MinMax, 2, EncodingType.None, null);

            //Create a model hyperparameter layer structure
            LayerStructure layerStructure = new LayerStructure()
            {
                numberOfInputNodes = 2, HiddenLayerList = new List <int> {
                    5, 5
                }, numberOfOutputNodes = 1
            };

            //Create an instance of the desired optimalization strategy to use

            var regularizationStrategyFactory = new RegularizationStrategyFactory();
            StochasticGradientDescent SGD     = new StochasticGradientDescent(new SigmoidFunction(), new IdentityFunction(), new MeanSquaredError(), RegularizationType.None, regularizationStrategyFactory);

            //Create training hyperparameters
            TrainingParameters trainingParams = new TrainingParameters()
            {
                epochs = 500, learningRate = 0.01, momentum = 0.01, RegularizationLambda = 0.00
            };

            //Create an instance of a neural network
            //ArtificialNeuralNetwork ann = new ArtificialNeuralNetwork(layerStructure, trainingParams, dataSet, SGD, new GaussianDistribution());

            //Or Load a Network from XML

            XML xml = new XML();

            ArtificialNeuralNetwork ann = xml.LoadNetwork(@"C:\Users\Bruker\Desktop\BestNet.xml", dataSet) as ArtificialNeuralNetwork;

            //Apply the desired training/test data set split ratios.
            ann.SplitDataSetIntoTrainAndTestSets(0.7);

            //Initiate network training
            //ann.TrainNetwork();

            var crossValidationStrategyFactory = new CrossValidationStrategyFactory();
            NetworkEvaluator evaluator         = new NetworkEvaluator(ann);
            CrossValidator   crossValidator    = new CrossValidator(ann, evaluator, crossValidationStrategyFactory);

            //Cross-validate the fitted model
            //crossValidator.KFold(10, 0.007);

            //Evaluate the fitted model on the test set
            evaluator.EvaluateNetwork(0.007);


            //--Optional--//

            //Serialize and save the fitted model

            //XML xml = new XML();
            //xml.SaveNetwork(dataFilePath, ann);

            //Extract model information

            //ann.SaveListOfErrors();

            //ann.GetApproximatedFunction(ann.SavePath + "/Function.txt");

            Console.ReadLine();
        }