コード例 #1
0
        public async Task AutoMLExperiment_return_current_best_trial_when_ct_is_canceled_with_trial_completed_Async()
        {
            var context  = new MLContext(1);
            var pipeline = context.Transforms.Concatenate("Features", "Features")
                           .Append(context.Auto().Regression());

            var dummyTrainer = new DummyTrialRunner(context, 1);
            var experiment   = context.Auto().CreateExperiment();

            experiment.SetPipeline(pipeline)
            .SetDataset(GetDummyData(), 10)
            .SetEvaluateMetric(RegressionMetric.RootMeanSquaredError, "Label")
            .SetTrainingTimeInSeconds(100)
            .SetTrialRunner(dummyTrainer);

            var cts = new CancellationTokenSource();

            context.Log += (o, e) =>
            {
                if (e.RawMessage.Contains("Update Completed Trial"))
                {
                    cts.CancelAfter(100);
                }
            };

            var res = await experiment.RunAsync(cts.Token);

            res.Metric.Should().BeGreaterThan(0);
        }
コード例 #2
0
        public async Task AutoMLExperiment_throw_timeout_exception_when_ct_is_canceled_and_no_trial_completed_Async()
        {
            var context  = new MLContext(1);
            var pipeline = context.Transforms.Concatenate("Features", "Features")
                           .Append(context.Auto().Regression());
            var dummyTrainer = new DummyTrialRunner(context, 5);
            var experiment   = context.Auto().CreateExperiment();

            experiment.SetPipeline(pipeline)
            .SetDataset(GetDummyData(), 10)
            .SetEvaluateMetric(RegressionMetric.RootMeanSquaredError, "Label")
            .SetTrainingTimeInSeconds(1)
            .SetTrialRunner(dummyTrainer);

            var cts = new CancellationTokenSource();

            context.Log += (o, e) =>
            {
                if (e.RawMessage.Contains("Update Running Trial"))
                {
                    cts.Cancel();
                }
            };

            var runExperimentAction = async() => await experiment.RunAsync(cts.Token);

            await runExperimentAction.Should().ThrowExactlyAsync <TimeoutException>();
        }
コード例 #3
0
        public void AutoFitImageClassificationTrainTest()
        {
            var           context             = new MLContext();
            var           datasetPath         = DatasetUtil.GetFlowersDataset();
            var           columnInference     = context.Auto().InferColumns(datasetPath, "Label");
            var           textLoader          = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var           trainData           = context.Data.ShuffleRows(textLoader.Load(datasetPath), seed: 1);
            var           originalColumnNames = trainData.Schema.Select(c => c.Name);
            TrainTestData trainTestData       = context.Data.TrainTestSplit(trainData, testFraction: 0.2, seed: 1);
            IDataView     trainDataset        = SplitUtil.DropAllColumnsExcept(context, trainTestData.TrainSet, originalColumnNames);
            IDataView     testDataset         = SplitUtil.DropAllColumnsExcept(context, trainTestData.TestSet, originalColumnNames);
            var           result = context.Auto()
                                   .CreateMulticlassClassificationExperiment(0)
                                   .Execute(trainDataset, testDataset, columnInference.ColumnInformation);

            //Known issue, where on Ubuntu there is degradation in accuracy.
            if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
                  RuntimeInformation.IsOSPlatform(OSPlatform.OSX)))
            {
                Assert.Equal(0.778, result.BestRun.ValidationMetrics.MicroAccuracy, 3);
            }
            else
            {
                Assert.Equal(1, result.BestRun.ValidationMetrics.MicroAccuracy, 3);
            }

            var scoredData = result.BestRun.Model.Transform(trainData);

            Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
        }
コード例 #4
0
ファイル: AutoFitTests.cs プロジェクト: obrm2/machinelearning
        public void AutoFitMaxExperimentTimeTest()
        {
            // A single binary classification experiment takes less than 5 seconds.
            // System.OperationCanceledException is thrown when ongoing experiment
            // is canceled and at least one model has been generated.
            // BinaryClassificationExperiment includes LightGBM, which is not 32-bit
            // compatible.
            var context         = new MLContext(1);
            var dataPath        = DatasetUtil.GetUciAdultDataset();
            var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel);
            var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var trainData       = textLoader.Load(dataPath);
            var experiment      = context.Auto()
                                  .CreateBinaryClassificationExperiment(15)
                                  .Execute(trainData, new ColumnInformation()
            {
                LabelColumnName = DatasetUtil.UciAdultLabel
            });

            // Ensure the (last) model that was training when maximum experiment time was reached has been stopped,
            // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which
            // can increase the run time of unit tests, and may not produce multiple runs.
            if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null)
            {
                Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"),
                            "Training process was not successfully canceled after maximum experiment time was reached.");
                // Ensure that the best found model can still run after maximum experiment time was reached.
                IDataView predictions = experiment.BestRun.Model.Transform(trainData);
            }
        }
コード例 #5
0
        public void Start()
        {
            //Infer columns and load train data
            var columnInferenceResult = mlContext.Auto().InferColumns(
                path: TRAIN_DATA_FILEPATH,
                labelColumnName: "next",
                groupColumns: false);

            TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions);

            trainData = textLoader.Load(TRAIN_DATA_FILEPATH);

            //Modify infered columns information
            columnInformation = columnInferenceResult.ColumnInformation;

            columnInformation.CategoricalColumnNames.Add("productId");
            columnInformation.NumericColumnNames.Remove("productId");

            columnInformation.CategoricalColumnNames.Add("year");
            columnInformation.NumericColumnNames.Remove("year");

            columnInformation.NumericColumnNames.Remove("units");
            columnInformation.IgnoredColumnNames.Add("units");


            var experimentSettings = new RegressionExperimentSettings()
            {
                MaxExperimentTimeInSeconds = 10,
                OptimizingMetric           = RegressionMetric.RootMeanSquaredError,
                CacheDirectory             = new DirectoryInfo(CACHE_DIRECTORY),
                CancellationToken          = cancelationTokenSource.Token
            };

            //Exclude trainers from experiment
            experimentSettings.Trainers.Remove(RegressionTrainer.Ols);

            RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(
                trainData: trainData,
                columnInformation: columnInformation,
                progressHandler: new RegressionProgressHandler(),
                preFeaturizer: null);

            ITransformer model = experimentResult.BestRun.Model;
            IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator;

            //Make batch predictions
            IDataView predictionsDataView = model.Transform(trainData);

            PrintPredictions(predictionsDataView);
            PrintPredictionsEnumerable(predictionsDataView);


            model = estimator.Fit(trainData);
            mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH);
            Console.WriteLine("Done");
        }
コード例 #6
0
        public void AutoFitMaxExperimentTimeTest()
        {
            // A single binary classification experiment takes less than 5 seconds.
            // System.OperationCanceledException is thrown when ongoing experiment
            // is canceled and at least one model has been generated.
            // BinaryClassificationExperiment includes LightGBM, which is not 32-bit
            // compatible.
            var context         = new MLContext(1);
            var dataPath        = DatasetUtil.GetUciAdultDataset();
            var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel);
            var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var trainData       = textLoader.Load(dataPath);
            var experiment      = context.Auto()
                                  .CreateBinaryClassificationExperiment(15)
                                  .Execute(trainData, new ColumnInformation()
            {
                LabelColumnName = DatasetUtil.UciAdultLabel
            });

            // Ensure the (last) model that was training when maximum experiment time was reached has been stopped,
            // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which
            // can increase the run time of unit tests, and may not produce multiple runs.
            if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null)
            {
                var expectedExceptionMessage = "Operation was canceled";
                var lastException            = experiment.RunDetails.Last().Exception;
                var containsMessage          = lastException.Message.Contains(expectedExceptionMessage);

                if (lastException is AggregateException lastAggregateException)
                {
                    // Sometimes multiple threads might throw the same "Operation was cancelled"
                    // exception and all of them are grouped inside an AggregateException
                    // Must check that all exceptions are the expected one.
                    containsMessage = true;
                    foreach (var ex in lastAggregateException.Flatten().InnerExceptions)
                    {
                        if (!ex.Message.Contains(expectedExceptionMessage))
                        {
                            containsMessage = false;
                        }
                    }
                }


                Assert.True(containsMessage,
                            $"Did not obtain '{expectedExceptionMessage}' error." +
                            $"Obtained unexpected error of type {lastException.GetType()} with message: {lastException.Message}");

                // Ensure that the best found model can still run after maximum experiment time was reached.
                IDataView predictions = experiment.BestRun.Model.Transform(trainData);
            }
        }
コード例 #7
0
        public void AutoFitRegressionTest(string culture)
        {
            var originalCulture = Thread.CurrentThread.CurrentCulture;

            try
            {
                Thread.CurrentThread.CurrentCulture = new CultureInfo(culture);

                // If users run AutoML with a different locale, sometimes
                // the sweeper encounters problems when parsing some strings.
                // So testing in another culture is necessary.
                // Furthermore, these issues might only occur after ~70
                // iterations, so more experiment time is needed for this to
                // occur.
                uint experimentTime = (uint)(culture == "en-US" ? 0 : 180);

                var experimentSettings = new RegressionExperimentSettings {
                    MaxExperimentTimeInSeconds = experimentTime
                };
                if (!Environment.Is64BitProcess)
                {
                    // LightGBM isn't available on x86 machines
                    experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm);
                }

                var context         = new MLContext(1);
                var dataPath        = DatasetUtil.GetMlNetGeneratedRegressionDataset();
                var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
                var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
                var trainData       = textLoader.Load(dataPath);
                var validationData  = context.Data.TakeRows(trainData, 20);
                trainData = context.Data.SkipRows(trainData, 20);
                var result = context.Auto()
                             .CreateRegressionExperiment(experimentSettings)
                             .Execute(trainData, validationData,
                                      new ColumnInformation()
                {
                    LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel
                });

                Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9));

                // Ensure experimentTime allows enough iterations to fully test the internationalization code
                // If the below assertion fails, increase the experiment time so the number of iterations is met
                Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75");
            }
            finally
            {
                Thread.CurrentThread.CurrentCulture = originalCulture;
            }
        }
コード例 #8
0
        public void AutoFitRegressionTest(string culture)
        {
            var originalCulture = Thread.CurrentThread.CurrentCulture;

            try
            {
                Thread.CurrentThread.CurrentCulture = new CultureInfo(culture);

                // If users run AutoML with a different locale, sometimes
                // the sweeper encounters problems when parsing some strings.
                // So testing in another culture is necessary.
                // Furthermore, these issues might only occur after ~70
                // iterations, so setting the internal maxModels parameter.
                int maxModels = culture == "en-US" ? 1 : 75;

                var experimentSettings = new RegressionExperimentSettings {
                    MaxModels = maxModels
                };

                if (!Environment.Is64BitProcess)
                {
                    // LightGBM isn't available on x86 machines
                    experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm);
                }

                var context         = new MLContext(1);
                var dataPath        = DatasetUtil.GetMlNetGeneratedRegressionDataset();
                var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
                var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
                var trainData       = textLoader.Load(dataPath);
                var validationData  = context.Data.TakeRows(trainData, 20);
                trainData = context.Data.SkipRows(trainData, 20);
                var result = context.Auto()
                             .CreateRegressionExperiment(experimentSettings)
                             .Execute(trainData, validationData,
                                      new ColumnInformation()
                {
                    LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel
                });

                Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.99);

                // Test the internal maxModels parameter
                Assert.True(culture == "en-US" || result.RunDetails.Count() == 75, $"RunDetails.Count() = {result.RunDetails.Count()}, is not 75");
            }
            finally
            {
                Thread.CurrentThread.CurrentCulture = originalCulture;
            }
        }
コード例 #9
0
        public void AutoFit_UCI_Adult_CrossValidation_10_Test()
        {
            var context         = new MLContext(1);
            var dataPath        = DatasetUtil.GetUciAdultDataset();
            var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel);
            var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var trainData       = textLoader.Load(dataPath);
            var result          = context.Auto()
                                  .CreateBinaryClassificationExperiment(1)
                                  .Execute(trainData, 10, DatasetUtil.UciAdultLabel);

            Assert.True(result.BestRun.Results.Select(x => x.ValidationMetrics.Accuracy).Min() > 0.70);
            Assert.NotNull(result.BestRun.Estimator);
            Assert.NotNull(result.BestRun.TrainerName);
        }
コード例 #10
0
        public void AutoFitMultiTest()
        {
            var context         = new MLContext();
            var columnInference = context.Auto().InferColumns(DatasetUtil.TrivialMulticlassDatasetPath, DatasetUtil.TrivialMulticlassDatasetLabel);
            var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var trainData       = textLoader.Load(DatasetUtil.TrivialMulticlassDatasetPath);
            var result          = context.Auto()
                                  .CreateMulticlassClassificationExperiment(0)
                                  .Execute(trainData, 5, DatasetUtil.TrivialMulticlassDatasetLabel);

            Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7);
            var scoredData = result.BestRun.Results.First().Model.Transform(trainData);

            Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
        }
コード例 #11
0
        public void UnGroupReturnsMoreColumnsThanGroup()
        {
            var dataPath = DatasetUtil.DownloadUciAdultDataset();
            var context  = new MLContext();
            var columnInferenceWithoutGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: false);

            foreach (var col in columnInferenceWithoutGrouping.TextLoaderOptions.Columns)
            {
                Assert.False(col.Source.Length > 1 || col.Source[0].Min != col.Source[0].Max);
            }

            var columnInferenceWithGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: true);

            Assert.True(columnInferenceWithGrouping.TextLoaderOptions.Columns.Count() < columnInferenceWithoutGrouping.TextLoaderOptions.Columns.Count());
        }
コード例 #12
0
        /// <summary>
        /// Infer columns in the dataset with AutoML.
        /// </summary>
        private static ColumnInferenceResults InferColumns(MLContext mlContext, string dataPath, string labelColumnName)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Inferring columns in dataset ===============");
            var columnInference = mlContext.Auto().InferColumns(dataPath, labelColumnName, groupColumns: false);

            return(columnInference);
        }
コード例 #13
0
        public static ExperimentResult <MulticlassClassificationMetrics> RunAutoMLExperiment(
            MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings,
            MulticlassExperimentProgressHandler progressHandler, IDataView dataView)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Trace.WriteLine($"Running AutoML multiclass classification experiment for {experimentSettings.MaxExperimentTimeInSeconds} seconds...");
            var experimentResult = mlContext.Auto()
                                   .CreateMulticlassClassificationExperiment(experimentSettings)
                                   .Execute(dataView, labelColumnName, progressHandler: progressHandler);

            Trace.WriteLine(Environment.NewLine);
            Trace.WriteLine($"num models created: {experimentResult.RunDetails.Count()}");

            // Get top few runs ranked by accuracy
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy))
                          .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3);

            Trace.WriteLine("Top models ranked by accuracy --");
            CreateRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width);
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                CreateRow($"{i,-4} {run.TrainerName,-35} {run.ValidationMetrics?.MicroAccuracy ?? double.NaN,14:F4} {run.ValidationMetrics?.MacroAccuracy ?? double.NaN,14:F4} {run.RuntimeInSeconds,9:F1}", Width);
            }
            return(experimentResult);
        }
コード例 #14
0
ファイル: MyAutoML.cs プロジェクト: wenfeifei/SharesML
        /* static readonly string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-train.csv");
         * static readonly string TestDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-test.csv");*/
        public static void TrainAndSave(string label, string trainDataPath, uint experimentTime)
        {
            MLContext mlContext = new MLContext(seed: 0);

            // 准备数据
            var trainData = mlContext.Data.LoadFromTextFile <ModelInput>(path: trainDataPath, separatorChar: ',', hasHeader: true);
            //var testData = mlContext.Data.LoadFromTextFile<ModelInput>(path: TestDataPath, separatorChar: ',', hasHeader: true);

            var testData        = mlContext.Data.TrainTestSplit(trainData, testFraction: 0.2).TestSet;
            var progressHandler = new RegressionExperimentProgressHandler();
            //uint ExperimentTime = 200;

            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(experimentTime)
                                                                    .Execute(trainData, label, progressHandler: progressHandler);

            //Debugger.PrintTopModels(experimentResult);

            RunDetail <RegressionMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel          = best.Model;

            // 评估 BestRun
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.Regression.Evaluate(predictions, labelColumnName: label, scoreColumnName: "Score");

            //Debugger.PrintRegressionMetrics(best.TrainerName, metrics);

            // 保存模型
            using (var stream = System.IO.File.Create(ModelFilePath))
            {
                mlContext.Model.Save(trainedModel, trainData.Schema, stream);
            }
        }
コード例 #15
0
        static void Main(string[] args)
        {
            MLContext mlContext     = new MLContext();
            IDataView trainDataView = mlContext.Data.LoadFromTextFile <TrafficData>(GetAbsolutePath("../../../Data/Metro_Interstate_Traffic_Volume.csv"), hasHeader: true, separatorChar: ',');
            //configure experiment settings
            var experimentSettings = new RegressionExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 10;
            var cts = new CancellationTokenSource();

            experimentSettings.CancellationToken = cts.Token;
            experimentSettings.OptimizingMetric  = RegressionMetric.MeanSquaredError;
            experimentSettings.CacheDirectory    = null;

            // Cancel experiment after the user presses any key
            CancelExperimentAfterAnyKeyPress(cts);
            //create experiment
            RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
            var handler = new RegressionExperimentProgressHandler();
            //execute experiment
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(trainDataView, labelColumnName: "Label", progressHandler: handler);
            //Evaluate
            RegressionMetrics metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"Best Algorthm: {experimentResult.BestRun.TrainerName}");
            Console.WriteLine($"R-Squared: {metrics.RSquared:0.##}");
            Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:0.##}");

            Console.ReadKey();
        }
コード例 #16
0
        public void Experiment()
        {
            var data     = GetData(_dataPath);
            var validate = GetData(_validatePath);

            var experimentSettings = new BinaryExperimentSettings
            {
                MaxExperimentTimeInSeconds = 30 * 60,
                OptimizingMetric           = BinaryClassificationMetric.F1Score,
            };

            experimentSettings.Trainers.Clear();
            experimentSettings.Trainers.Add(BinaryClassificationTrainer.AveragedPerceptron);
            experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm);

            var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            var experimentResult = experiment.Execute(
                trainData: data,
                validationData: validate,
                //columnInformation: new ColumnInformation
                //{
                //    ExampleWeightColumnName = nameof(Appointment.Weight)
                //},
                progressHandler: new ProgressHandler());

            Console.WriteLine("Experiment completed");
            Console.WriteLine();

            ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics);

            SaveModel(data.Schema, experimentResult.BestRun.Model);
            Console.WriteLine("Best model saved");
        }
コード例 #17
0
 private void Run()
 {
     try
     {
         var mlContext          = new MLContext();
         var models             = ReadCsv(@"data\data.csv");
         var dataView           = BuildDataView(mlContext, models);
         var experimentSettings = new RegressionExperimentSettings
         {
             MaxExperimentTimeInSeconds = 600,
             CacheDirectory             = new DirectoryInfo(@".\cache"),
         };
         var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
         // Data has already been parsed using invariant culture
         CultureInfo.DefaultThreadCurrentCulture = CultureInfo.CreateSpecificCulture("it-IT");
         var bestRun = experiment.Execute(dataView).BestRun;
         Console.WriteLine("Done.");
     }
     catch (Exception ex)
     {
         Console.WriteLine(ex.Message);
         Console.WriteLine(ex.StackTrace);
     }
     finally
     {
         Console.ReadLine();
     }
 }
コード例 #18
0
        static void Main(string[] args)
        {
            var context = new MLContext();

            var data = context.Data.LoadFromTextFile <RankingData>("./ranking.tsv", separatorChar: '\t');

            var trainTestSplit = context.Data.TrainTestSplit(data, testFraction: 0.2);

            var settings = new RankingExperimentSettings
            {
                MaxExperimentTimeInSeconds = 300,
                OptimizingMetric           = RankingMetric.Ndcg,
            };

            var experiment = context.Auto().CreateRankingExperiment(settings);

            var progressHandler = new Progress <RunDetail <RankingMetrics> >(ph =>
            {
                if (ph.ValidationMetrics != null)
                {
                    Console.WriteLine($"Current trainer - {ph.TrainerName} with nDCG {ph.ValidationMetrics.NormalizedDiscountedCumulativeGains.Average()}");
                }
            });

            var results = experiment.Execute(trainTestSplit.TrainSet, validationData: trainTestSplit.TestSet,
                                             progressHandler: progressHandler);

            var bestRun = results.BestRun;

            var metrics = bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains;

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine($"Best model {bestRun.TrainerName} - with nDCG {metrics.Average()}");
        }
コード例 #19
0
        static async Task Main(string[] args)
        {
            //setup our DI
            var serviceProvider = new ServiceCollection()
                                  .AddLogging()
                                  .AddSingleton <IYahooFinanceService, YahooFinanceService>()
                                  .AddHttpClient()
                                  .BuildServiceProvider();


            var loggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("CandleStickMachineLearning.Program", LogLevel.Debug)
                .AddConsole();
            });

            var _logger = loggerFactory.CreateLogger <Program>();

            _logger.LogInformation("Hello World!");

            //do the actual work here
            var yahooFinanceService = serviceProvider.GetService <IYahooFinanceService>();
            var barsList            = await yahooFinanceService.GetBars("AAPL", DateTime.UtcNow.AddMonths(-1), DateTime.UtcNow, "1h");

            var context = new MLContext();

            var trainData = context.Data.LoadFromEnumerable <Models.Bar>(barsList);

            var settings = new RegressionExperimentSettings
            {
                MaxExperimentTimeInSeconds = 20,
                OptimizingMetric           = RegressionMetric.MeanAbsoluteError
            };

            var labelColumnInfo = new ColumnInformation()
            {
                LabelColumnName = "Label"
            };

            var progress = new Progress <RunDetail <RegressionMetrics> >(p =>
            {
                if (p.ValidationMetrics != null)
                {
                    _logger.LogInformation($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}");
                }
            });

            var experiment = context.Auto().CreateRegressionExperiment(settings);

            var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress);

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Best run:");
            Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}");
            Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}");
            Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}");

            Console.ReadLine();
        }
コード例 #20
0
        public static void TrainAndSave()
        {
            MLContext mlContext = new MLContext(seed: 1);

            // 准备数据
            var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true);
            var testData  = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true);

            var  progressHandler = new RegressionExperimentProgressHandler();
            uint ExperimentTime  = 200;

            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(ExperimentTime)
                                                                    .Execute(trainData, "Label", progressHandler: progressHandler);

            Debugger.PrintTopModels(experimentResult);

            RunDetail <RegressionMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel          = best.Model;

            // 评估 BestRun
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");

            Debugger.PrintRegressionMetrics(best.TrainerName, metrics);

            // 保存模型
            Console.WriteLine("====== Save model to local file =========");
            mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath);
        }
コード例 #21
0
        public void LoadFromTextFile(string filePath, string labelColumnName)
        {
            var columnInference = _mlContext.Auto().InferColumns(filePath, labelColumnName, separatorChar: ',', groupColumns: false);
            var textLoader      = _mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);

            _trainDataView = textLoader.Load(filePath);
        }
コード例 #22
0
ファイル: Program.cs プロジェクト: eerhardt/nni-mlnet
        public static void Main(string[] args)
        {
            MLContext ctx             = new MLContext();
            var       columnInference = ctx.Auto().InferColumns("digits.csv", labelColumnIndex: 64, separatorChar: ',');

            IDataView data           = ctx.Data.LoadFromTextFile("digits.csv", columnInference.TextLoaderOptions);
            var       trainTestSplit = ctx.Data.TrainTestSplit(data, testFraction: .25);

            var preprocessPipeline = ctx.Transforms.NormalizeMeanVariance("Features")
                                     .Append(ctx.Transforms.Conversion.MapValueToKey("Label"))
                                     .Fit(trainTestSplit.TrainSet);
            IDataView trainSet = preprocessPipeline.Transform(trainTestSplit.TrainSet);
            IDataView testSet  = preprocessPipeline.Transform(trainTestSplit.TestSet);

            Nni nni = new Nni();
            Dictionary <string, string> parameters = nni.GetNextParameter();

            var trainer = ctx.MulticlassClassification.Trainers.LightGbm(CreateOptions(parameters));

            var model   = trainer.Fit(trainSet);
            var metrics = ctx.MulticlassClassification.Evaluate(model.Transform(testSet));

            Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}");
            Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy}");
            Console.WriteLine($"LogLoss: {metrics.LogLoss}");
            Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction}");
            Console.WriteLine($"TopKAccuracy: {metrics.TopKAccuracy}");

            nni.ReportFinalResult(metrics.MicroAccuracy);
        }
コード例 #23
0
ファイル: ModelBuilder.cs プロジェクト: mifmasterz/Hikaton
        public static void DoAutoML()
        {
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 10;

            MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }));

            ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline);
            var metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}");
            Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}");

            // Save model
            SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema);
        }
コード例 #24
0
        static void Main(string[] args)
        {
            var csvPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), "tasks.csv");

            var context = new MLContext();

            var data = context.Data.LoadFromTextFile <TaskInput>(csvPath, hasHeader: true, separatorChar: ',');

            var settings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 600,
                OptimizingMetric           = MulticlassClassificationMetric.LogLoss
            };

            var experiment = context.Auto().CreateMulticlassClassificationExperiment(settings);

            var result = experiment.Execute(data, new ColumnInformation {
                LabelColumnName = "Tags"
            });

            var bestModel = result.BestRun.Model;

            var predictionEngine = context.Model.CreatePredictionEngine <TaskInput, TaskOutput>(bestModel);

            var prediction = predictionEngine.Predict(new TaskInput {
                TaskName = "Introduction to ML.NET"
            });

            Console.WriteLine($"Predicted label - {prediction.PredictedLabel}");

            context.Model.Save(bestModel, data.Schema, "./clickup-model.zip");
        }
コード例 #25
0
        public void IncorrectLabelColumnThrows()
        {
            var dataPath = DatasetUtil.DownloadUciAdultDataset();
            var context  = new MLContext();

            Assert.Throws <ArgumentException>(new System.Action(() => context.Auto().InferColumns(dataPath, "Junk", groupColumns: false)));
        }
コード例 #26
0
        static void Main(string[] args)
        {
            // Define source data directory paths
            string solutionDirectory = "/home/lqdev/Development/RestaurantInspectionsSparkMLNET";
            string dataLocation      = Path.Combine(solutionDirectory, "RestaurantInspectionsETL", "Output");

            // Initialize MLContext
            MLContext mlContext = new MLContext();

            // Get directory name of most recent ETL output
            var latestOutput =
                Directory
                .GetDirectories(dataLocation)
                .Select(directory => new DirectoryInfo(directory))
                .OrderBy(directoryInfo => directoryInfo.Name)
                .Select(directory => Path.Join(directory.FullName, "Graded"))
                .First();

            var dataFilePaths =
                Directory
                .GetFiles(latestOutput)
                .Where(file => file.EndsWith("csv"))
                .ToArray();

            // Load the data
            var       dataLoader = mlContext.Data.CreateTextLoader <ModelInput>(separatorChar: ',', hasHeader: false, allowQuoting: true, trimWhitespace: true);
            IDataView data       = dataLoader.Load(dataFilePaths);

            // Split the data
            TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
            IDataView     trainData = dataSplit.TrainSet;
            IDataView     testData  = dataSplit.TestSet;

            // Define experiment settings
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 600;
            experimentSettings.OptimizingMetric           = MulticlassClassificationMetric.LogLoss;

            // Create experiment
            var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            // Run experiment
            var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler());

            // Best Run Results
            var bestModel = experimentResults.BestRun.Model;

            // Evaluate Model
            IDataView scoredTestData = bestModel.Transform(testData);
            var       metrics        = mlContext.MulticlassClassification.Evaluate(scoredTestData);

            Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}");

            // Save Model
            string modelSavePath = Path.Join(solutionDirectory, "RestaurantInspectionsML", "model.zip");

            mlContext.Model.Save(bestModel, data.Schema, modelSavePath);
        }
コード例 #27
0
        public void AutoFeaturizer_iris_test()
        {
            var context  = new MLContext(1);
            var dataset  = DatasetUtil.GetIrisDataView();
            var pipeline = context.Auto().Featurizer(dataset, excludeColumns: new[] { "Label" });

            Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions));
        }
コード例 #28
0
        /// <summary>
        /// Infer columns in the dataset with AutoML.
        /// </summary>
        private static ColumnInferenceResults InferColumns(MLContext mlContext)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Inferring columns in dataset ===============");
            ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false);

            ConsoleHelper.Print(columnInference);
            return(columnInference);
        }
コード例 #29
0
        public void AutoFit_UCI_Adult_Train_Test_Split_Test()
        {
            var context         = new MLContext(1);
            var dataPath        = DatasetUtil.GetUciAdultDataset();
            var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel);
            var textLoader      = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var trainData       = textLoader.Load(dataPath);
            var dataTrainTest   = context.Data.TrainTestSplit(trainData);
            var result          = context.Auto()
                                  .CreateBinaryClassificationExperiment(1)
                                  .Execute(dataTrainTest.TrainSet, dataTrainTest.TestSet, DatasetUtil.UciAdultLabel);

            Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70);
            Assert.NotNull(result.BestRun.Estimator);
            Assert.NotNull(result.BestRun.Model);
            Assert.NotNull(result.BestRun.TrainerName);
        }
コード例 #30
0
ファイル: Program.cs プロジェクト: rpuliadi/Titanic-ML.NET-Ex
        /// <summary>
        /// Infer columns in the dataset with AutoML.
        /// </summary>
        private static ColumnInferenceResults InferColumns(MLContext mlContext, string TrainDataPath)
        {
            Console.WriteLine("=============== Inferring columns in dataset ===============");
            ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false);

            Console.WriteLine(columnInference);
            return(columnInference);
        }