public async Task AutoMLExperiment_return_current_best_trial_when_ct_is_canceled_with_trial_completed_Async() { var context = new MLContext(1); var pipeline = context.Transforms.Concatenate("Features", "Features") .Append(context.Auto().Regression()); var dummyTrainer = new DummyTrialRunner(context, 1); var experiment = context.Auto().CreateExperiment(); experiment.SetPipeline(pipeline) .SetDataset(GetDummyData(), 10) .SetEvaluateMetric(RegressionMetric.RootMeanSquaredError, "Label") .SetTrainingTimeInSeconds(100) .SetTrialRunner(dummyTrainer); var cts = new CancellationTokenSource(); context.Log += (o, e) => { if (e.RawMessage.Contains("Update Completed Trial")) { cts.CancelAfter(100); } }; var res = await experiment.RunAsync(cts.Token); res.Metric.Should().BeGreaterThan(0); }
public async Task AutoMLExperiment_throw_timeout_exception_when_ct_is_canceled_and_no_trial_completed_Async() { var context = new MLContext(1); var pipeline = context.Transforms.Concatenate("Features", "Features") .Append(context.Auto().Regression()); var dummyTrainer = new DummyTrialRunner(context, 5); var experiment = context.Auto().CreateExperiment(); experiment.SetPipeline(pipeline) .SetDataset(GetDummyData(), 10) .SetEvaluateMetric(RegressionMetric.RootMeanSquaredError, "Label") .SetTrainingTimeInSeconds(1) .SetTrialRunner(dummyTrainer); var cts = new CancellationTokenSource(); context.Log += (o, e) => { if (e.RawMessage.Contains("Update Running Trial")) { cts.Cancel(); } }; var runExperimentAction = async() => await experiment.RunAsync(cts.Token); await runExperimentAction.Should().ThrowExactlyAsync <TimeoutException>(); }
public void AutoFitImageClassificationTrainTest() { var context = new MLContext(); var datasetPath = DatasetUtil.GetFlowersDataset(); var columnInference = context.Auto().InferColumns(datasetPath, "Label"); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = context.Data.ShuffleRows(textLoader.Load(datasetPath), seed: 1); var originalColumnNames = trainData.Schema.Select(c => c.Name); TrainTestData trainTestData = context.Data.TrainTestSplit(trainData, testFraction: 0.2, seed: 1); IDataView trainDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TrainSet, originalColumnNames); IDataView testDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TestSet, originalColumnNames); var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainDataset, testDataset, columnInference.ColumnInformation); //Known issue, where on Ubuntu there is degradation in accuracy. if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX))) { Assert.Equal(0.778, result.BestRun.ValidationMetrics.MicroAccuracy, 3); } else { Assert.Equal(1, result.BestRun.ValidationMetrics.MicroAccuracy, 3); } var scoredData = result.BestRun.Model.Transform(trainData); Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); }
public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), "Training process was not successfully canceled after maximum experiment time was reached."); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void Start() { //Infer columns and load train data var columnInferenceResult = mlContext.Auto().InferColumns( path: TRAIN_DATA_FILEPATH, labelColumnName: "next", groupColumns: false); TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions); trainData = textLoader.Load(TRAIN_DATA_FILEPATH); //Modify infered columns information columnInformation = columnInferenceResult.ColumnInformation; columnInformation.CategoricalColumnNames.Add("productId"); columnInformation.NumericColumnNames.Remove("productId"); columnInformation.CategoricalColumnNames.Add("year"); columnInformation.NumericColumnNames.Remove("year"); columnInformation.NumericColumnNames.Remove("units"); columnInformation.IgnoredColumnNames.Add("units"); var experimentSettings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 10, OptimizingMetric = RegressionMetric.RootMeanSquaredError, CacheDirectory = new DirectoryInfo(CACHE_DIRECTORY), CancellationToken = cancelationTokenSource.Token }; //Exclude trainers from experiment experimentSettings.Trainers.Remove(RegressionTrainer.Ols); RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute( trainData: trainData, columnInformation: columnInformation, progressHandler: new RegressionProgressHandler(), preFeaturizer: null); ITransformer model = experimentResult.BestRun.Model; IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator; //Make batch predictions IDataView predictionsDataView = model.Transform(trainData); PrintPredictions(predictionsDataView); PrintPredictionsEnumerable(predictionsDataView); model = estimator.Fit(trainData); mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH); Console.WriteLine("Done"); }
public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { var expectedExceptionMessage = "Operation was canceled"; var lastException = experiment.RunDetails.Last().Exception; var containsMessage = lastException.Message.Contains(expectedExceptionMessage); if (lastException is AggregateException lastAggregateException) { // Sometimes multiple threads might throw the same "Operation was cancelled" // exception and all of them are grouped inside an AggregateException // Must check that all exceptions are the expected one. containsMessage = true; foreach (var ex in lastAggregateException.Flatten().InnerExceptions) { if (!ex.Message.Contains(expectedExceptionMessage)) { containsMessage = false; } } } Assert.True(containsMessage, $"Did not obtain '{expectedExceptionMessage}' error." + $"Obtained unexpected error of type {lastException.GetType()} with message: {lastException.Message}"); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so setting the internal maxModels parameter. int maxModels = culture == "en-US" ? 1 : 75; var experimentSettings = new RegressionExperimentSettings { MaxModels = maxModels }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.99); // Test the internal maxModels parameter Assert.True(culture == "en-US" || result.RunDetails.Count() == 75, $"RunDetails.Count() = {result.RunDetails.Count()}, is not 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFit_UCI_Adult_CrossValidation_10_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(trainData, 10, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.Results.Select(x => x.ValidationMetrics.Accuracy).Min() > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.TrainerName); }
public void AutoFitMultiTest() { var context = new MLContext(); var columnInference = context.Auto().InferColumns(DatasetUtil.TrivialMulticlassDatasetPath, DatasetUtil.TrivialMulticlassDatasetLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(DatasetUtil.TrivialMulticlassDatasetPath); var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainData, 5, DatasetUtil.TrivialMulticlassDatasetLabel); Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7); var scoredData = result.BestRun.Results.First().Model.Transform(trainData); Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); }
public void UnGroupReturnsMoreColumnsThanGroup() { var dataPath = DatasetUtil.DownloadUciAdultDataset(); var context = new MLContext(); var columnInferenceWithoutGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: false); foreach (var col in columnInferenceWithoutGrouping.TextLoaderOptions.Columns) { Assert.False(col.Source.Length > 1 || col.Source[0].Min != col.Source[0].Max); } var columnInferenceWithGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: true); Assert.True(columnInferenceWithGrouping.TextLoaderOptions.Columns.Count() < columnInferenceWithoutGrouping.TextLoaderOptions.Columns.Count()); }
/// <summary> /// Infer columns in the dataset with AutoML. /// </summary> private static ColumnInferenceResults InferColumns(MLContext mlContext, string dataPath, string labelColumnName) { ConsoleHelper.ConsoleWriteHeader("=============== Inferring columns in dataset ==============="); var columnInference = mlContext.Auto().InferColumns(dataPath, labelColumnName, groupColumns: false); return(columnInference); }
public static ExperimentResult <MulticlassClassificationMetrics> RunAutoMLExperiment( MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings, MulticlassExperimentProgressHandler progressHandler, IDataView dataView) { ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Trace.WriteLine($"Running AutoML multiclass classification experiment for {experimentSettings.MaxExperimentTimeInSeconds} seconds..."); var experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(experimentSettings) .Execute(dataView, labelColumnName, progressHandler: progressHandler); Trace.WriteLine(Environment.NewLine); Trace.WriteLine($"num models created: {experimentResult.RunDetails.Count()}"); // Get top few runs ranked by accuracy var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy)) .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3); Trace.WriteLine("Top models ranked by accuracy --"); CreateRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); CreateRow($"{i,-4} {run.TrainerName,-35} {run.ValidationMetrics?.MicroAccuracy ?? double.NaN,14:F4} {run.ValidationMetrics?.MacroAccuracy ?? double.NaN,14:F4} {run.RuntimeInSeconds,9:F1}", Width); } return(experimentResult); }
/* static readonly string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-train.csv"); * static readonly string TestDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-test.csv");*/ public static void TrainAndSave(string label, string trainDataPath, uint experimentTime) { MLContext mlContext = new MLContext(seed: 0); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <ModelInput>(path: trainDataPath, separatorChar: ',', hasHeader: true); //var testData = mlContext.Data.LoadFromTextFile<ModelInput>(path: TestDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.TrainTestSplit(trainData, testFraction: 0.2).TestSet; var progressHandler = new RegressionExperimentProgressHandler(); //uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(experimentTime) .Execute(trainData, label, progressHandler: progressHandler); //Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: label, scoreColumnName: "Score"); //Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 using (var stream = System.IO.File.Create(ModelFilePath)) { mlContext.Model.Save(trainedModel, trainData.Schema, stream); } }
static void Main(string[] args) { MLContext mlContext = new MLContext(); IDataView trainDataView = mlContext.Data.LoadFromTextFile <TrafficData>(GetAbsolutePath("../../../Data/Metro_Interstate_Traffic_Volume.csv"), hasHeader: true, separatorChar: ','); //configure experiment settings var experimentSettings = new RegressionExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; var cts = new CancellationTokenSource(); experimentSettings.CancellationToken = cts.Token; experimentSettings.OptimizingMetric = RegressionMetric.MeanSquaredError; experimentSettings.CacheDirectory = null; // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); //create experiment RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); var handler = new RegressionExperimentProgressHandler(); //execute experiment ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(trainDataView, labelColumnName: "Label", progressHandler: handler); //Evaluate RegressionMetrics metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Best Algorthm: {experimentResult.BestRun.TrainerName}"); Console.WriteLine($"R-Squared: {metrics.RSquared:0.##}"); Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:0.##}"); Console.ReadKey(); }
public void Experiment() { var data = GetData(_dataPath); var validate = GetData(_validatePath); var experimentSettings = new BinaryExperimentSettings { MaxExperimentTimeInSeconds = 30 * 60, OptimizingMetric = BinaryClassificationMetric.F1Score, }; experimentSettings.Trainers.Clear(); experimentSettings.Trainers.Add(BinaryClassificationTrainer.AveragedPerceptron); experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm); var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings); var experimentResult = experiment.Execute( trainData: data, validationData: validate, //columnInformation: new ColumnInformation //{ // ExampleWeightColumnName = nameof(Appointment.Weight) //}, progressHandler: new ProgressHandler()); Console.WriteLine("Experiment completed"); Console.WriteLine(); ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics); SaveModel(data.Schema, experimentResult.BestRun.Model); Console.WriteLine("Best model saved"); }
private void Run() { try { var mlContext = new MLContext(); var models = ReadCsv(@"data\data.csv"); var dataView = BuildDataView(mlContext, models); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 600, CacheDirectory = new DirectoryInfo(@".\cache"), }; var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); // Data has already been parsed using invariant culture CultureInfo.DefaultThreadCurrentCulture = CultureInfo.CreateSpecificCulture("it-IT"); var bestRun = experiment.Execute(dataView).BestRun; Console.WriteLine("Done."); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } finally { Console.ReadLine(); } }
static void Main(string[] args) { var context = new MLContext(); var data = context.Data.LoadFromTextFile <RankingData>("./ranking.tsv", separatorChar: '\t'); var trainTestSplit = context.Data.TrainTestSplit(data, testFraction: 0.2); var settings = new RankingExperimentSettings { MaxExperimentTimeInSeconds = 300, OptimizingMetric = RankingMetric.Ndcg, }; var experiment = context.Auto().CreateRankingExperiment(settings); var progressHandler = new Progress <RunDetail <RankingMetrics> >(ph => { if (ph.ValidationMetrics != null) { Console.WriteLine($"Current trainer - {ph.TrainerName} with nDCG {ph.ValidationMetrics.NormalizedDiscountedCumulativeGains.Average()}"); } }); var results = experiment.Execute(trainTestSplit.TrainSet, validationData: trainTestSplit.TestSet, progressHandler: progressHandler); var bestRun = results.BestRun; var metrics = bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains; Console.WriteLine(Environment.NewLine); Console.WriteLine($"Best model {bestRun.TrainerName} - with nDCG {metrics.Average()}"); }
static async Task Main(string[] args) { //setup our DI var serviceProvider = new ServiceCollection() .AddLogging() .AddSingleton <IYahooFinanceService, YahooFinanceService>() .AddHttpClient() .BuildServiceProvider(); var loggerFactory = LoggerFactory.Create(builder => { builder.AddFilter("Microsoft", LogLevel.Warning) .AddFilter("System", LogLevel.Warning) .AddFilter("CandleStickMachineLearning.Program", LogLevel.Debug) .AddConsole(); }); var _logger = loggerFactory.CreateLogger <Program>(); _logger.LogInformation("Hello World!"); //do the actual work here var yahooFinanceService = serviceProvider.GetService <IYahooFinanceService>(); var barsList = await yahooFinanceService.GetBars("AAPL", DateTime.UtcNow.AddMonths(-1), DateTime.UtcNow, "1h"); var context = new MLContext(); var trainData = context.Data.LoadFromEnumerable <Models.Bar>(barsList); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 20, OptimizingMetric = RegressionMetric.MeanAbsoluteError }; var labelColumnInfo = new ColumnInformation() { LabelColumnName = "Label" }; var progress = new Progress <RunDetail <RegressionMetrics> >(p => { if (p.ValidationMetrics != null) { _logger.LogInformation($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}"); } }); var experiment = context.Auto().CreateRegressionExperiment(settings); var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}"); Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}"); Console.ReadLine(); }
public static void TrainAndSave() { MLContext mlContext = new MLContext(seed: 1); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true); var progressHandler = new RegressionExperimentProgressHandler(); uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainData, "Label", progressHandler: progressHandler); Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 Console.WriteLine("====== Save model to local file ========="); mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath); }
public void LoadFromTextFile(string filePath, string labelColumnName) { var columnInference = _mlContext.Auto().InferColumns(filePath, labelColumnName, separatorChar: ',', groupColumns: false); var textLoader = _mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions); _trainDataView = textLoader.Load(filePath); }
public static void Main(string[] args) { MLContext ctx = new MLContext(); var columnInference = ctx.Auto().InferColumns("digits.csv", labelColumnIndex: 64, separatorChar: ','); IDataView data = ctx.Data.LoadFromTextFile("digits.csv", columnInference.TextLoaderOptions); var trainTestSplit = ctx.Data.TrainTestSplit(data, testFraction: .25); var preprocessPipeline = ctx.Transforms.NormalizeMeanVariance("Features") .Append(ctx.Transforms.Conversion.MapValueToKey("Label")) .Fit(trainTestSplit.TrainSet); IDataView trainSet = preprocessPipeline.Transform(trainTestSplit.TrainSet); IDataView testSet = preprocessPipeline.Transform(trainTestSplit.TestSet); Nni nni = new Nni(); Dictionary <string, string> parameters = nni.GetNextParameter(); var trainer = ctx.MulticlassClassification.Trainers.LightGbm(CreateOptions(parameters)); var model = trainer.Fit(trainSet); var metrics = ctx.MulticlassClassification.Evaluate(model.Transform(testSet)); Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}"); Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy}"); Console.WriteLine($"LogLoss: {metrics.LogLoss}"); Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction}"); Console.WriteLine($"TopKAccuracy: {metrics.TopKAccuracy}"); nni.ReportFinalResult(metrics.MicroAccuracy); }
public static void DoAutoML() { // Load Data IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") }) .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" })); ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline); var metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}"); Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}"); // Save model SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema); }
static void Main(string[] args) { var csvPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), "tasks.csv"); var context = new MLContext(); var data = context.Data.LoadFromTextFile <TaskInput>(csvPath, hasHeader: true, separatorChar: ','); var settings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 600, OptimizingMetric = MulticlassClassificationMetric.LogLoss }; var experiment = context.Auto().CreateMulticlassClassificationExperiment(settings); var result = experiment.Execute(data, new ColumnInformation { LabelColumnName = "Tags" }); var bestModel = result.BestRun.Model; var predictionEngine = context.Model.CreatePredictionEngine <TaskInput, TaskOutput>(bestModel); var prediction = predictionEngine.Predict(new TaskInput { TaskName = "Introduction to ML.NET" }); Console.WriteLine($"Predicted label - {prediction.PredictedLabel}"); context.Model.Save(bestModel, data.Schema, "./clickup-model.zip"); }
public void IncorrectLabelColumnThrows() { var dataPath = DatasetUtil.DownloadUciAdultDataset(); var context = new MLContext(); Assert.Throws <ArgumentException>(new System.Action(() => context.Auto().InferColumns(dataPath, "Junk", groupColumns: false))); }
static void Main(string[] args) { // Define source data directory paths string solutionDirectory = "/home/lqdev/Development/RestaurantInspectionsSparkMLNET"; string dataLocation = Path.Combine(solutionDirectory, "RestaurantInspectionsETL", "Output"); // Initialize MLContext MLContext mlContext = new MLContext(); // Get directory name of most recent ETL output var latestOutput = Directory .GetDirectories(dataLocation) .Select(directory => new DirectoryInfo(directory)) .OrderBy(directoryInfo => directoryInfo.Name) .Select(directory => Path.Join(directory.FullName, "Graded")) .First(); var dataFilePaths = Directory .GetFiles(latestOutput) .Where(file => file.EndsWith("csv")) .ToArray(); // Load the data var dataLoader = mlContext.Data.CreateTextLoader <ModelInput>(separatorChar: ',', hasHeader: false, allowQuoting: true, trimWhitespace: true); IDataView data = dataLoader.Load(dataFilePaths); // Split the data TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; // Define experiment settings var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 600; experimentSettings.OptimizingMetric = MulticlassClassificationMetric.LogLoss; // Create experiment var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); // Run experiment var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler()); // Best Run Results var bestModel = experimentResults.BestRun.Model; // Evaluate Model IDataView scoredTestData = bestModel.Transform(testData); var metrics = mlContext.MulticlassClassification.Evaluate(scoredTestData); Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}"); // Save Model string modelSavePath = Path.Join(solutionDirectory, "RestaurantInspectionsML", "model.zip"); mlContext.Model.Save(bestModel, data.Schema, modelSavePath); }
public void AutoFeaturizer_iris_test() { var context = new MLContext(1); var dataset = DatasetUtil.GetIrisDataView(); var pipeline = context.Auto().Featurizer(dataset, excludeColumns: new[] { "Label" }); Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions)); }
/// <summary> /// Infer columns in the dataset with AutoML. /// </summary> private static ColumnInferenceResults InferColumns(MLContext mlContext) { ConsoleHelper.ConsoleWriteHeader("=============== Inferring columns in dataset ==============="); ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false); ConsoleHelper.Print(columnInference); return(columnInference); }
public void AutoFit_UCI_Adult_Train_Test_Split_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var dataTrainTest = context.Data.TrainTestSplit(trainData); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(dataTrainTest.TrainSet, dataTrainTest.TestSet, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.Model); Assert.NotNull(result.BestRun.TrainerName); }
/// <summary> /// Infer columns in the dataset with AutoML. /// </summary> private static ColumnInferenceResults InferColumns(MLContext mlContext, string TrainDataPath) { Console.WriteLine("=============== Inferring columns in dataset ==============="); ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false); Console.WriteLine(columnInference); return(columnInference); }