/// <summary> /// Create AutoML regression experiment settings. /// </summary> private static RegressionExperimentSettings CreateExperimentSettings(MLContext mlContext, CancellationTokenSource cts) { var experimentSettings = new RegressionExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 240; experimentSettings.CancellationToken = cts.Token; // Set the metric that AutoML will try to optimize over the course of the experiment. experimentSettings.OptimizingMetric = (RegressionMetric)iMetrics; // Set the cache directory to null. // This will cause all models produced by AutoML to be kept in memory // instead of written to disk after each run, as AutoML is training. // (Please note: for an experiment on a large dataset, opting to keep all // models trained by AutoML in memory could cause your system to run out // of memory.) experimentSettings.CacheDirectory = null; // Don't use LbfgsPoissonRegression and OnlineGradientDescent trainers during this experiment. // (These trainers sometimes underperform on this dataset.) experimentSettings.Trainers.Remove(RegressionTrainer.LbfgsPoissonRegression); experimentSettings.Trainers.Remove(RegressionTrainer.OnlineGradientDescent); return(experimentSettings); }
static void Main(string[] args) { MLContext mlContext = new MLContext(); IDataView trainDataView = mlContext.Data.LoadFromTextFile <TrafficData>(GetAbsolutePath("../../../Data/Metro_Interstate_Traffic_Volume.csv"), hasHeader: true, separatorChar: ','); //configure experiment settings var experimentSettings = new RegressionExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; var cts = new CancellationTokenSource(); experimentSettings.CancellationToken = cts.Token; experimentSettings.OptimizingMetric = RegressionMetric.MeanSquaredError; experimentSettings.CacheDirectory = null; // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); //create experiment RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); var handler = new RegressionExperimentProgressHandler(); //execute experiment ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(trainDataView, labelColumnName: "Label", progressHandler: handler); //Evaluate RegressionMetrics metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Best Algorthm: {experimentResult.BestRun.TrainerName}"); Console.WriteLine($"R-Squared: {metrics.RSquared:0.##}"); Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:0.##}"); Console.ReadKey(); }
private void Run() { try { var mlContext = new MLContext(); var models = ReadCsv(@"data\data.csv"); var dataView = BuildDataView(mlContext, models); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 600, CacheDirectory = new DirectoryInfo(@".\cache"), }; var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); // Data has already been parsed using invariant culture CultureInfo.DefaultThreadCurrentCulture = CultureInfo.CreateSpecificCulture("it-IT"); var bestRun = experiment.Execute(dataView).BestRun; Console.WriteLine("Done."); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); } finally { Console.ReadLine(); } }
static async Task Main(string[] args) { //setup our DI var serviceProvider = new ServiceCollection() .AddLogging() .AddSingleton <IYahooFinanceService, YahooFinanceService>() .AddHttpClient() .BuildServiceProvider(); var loggerFactory = LoggerFactory.Create(builder => { builder.AddFilter("Microsoft", LogLevel.Warning) .AddFilter("System", LogLevel.Warning) .AddFilter("CandleStickMachineLearning.Program", LogLevel.Debug) .AddConsole(); }); var _logger = loggerFactory.CreateLogger <Program>(); _logger.LogInformation("Hello World!"); //do the actual work here var yahooFinanceService = serviceProvider.GetService <IYahooFinanceService>(); var barsList = await yahooFinanceService.GetBars("AAPL", DateTime.UtcNow.AddMonths(-1), DateTime.UtcNow, "1h"); var context = new MLContext(); var trainData = context.Data.LoadFromEnumerable <Models.Bar>(barsList); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 20, OptimizingMetric = RegressionMetric.MeanAbsoluteError }; var labelColumnInfo = new ColumnInformation() { LabelColumnName = "Label" }; var progress = new Progress <RunDetail <RegressionMetrics> >(p => { if (p.ValidationMetrics != null) { _logger.LogInformation($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}"); } }); var experiment = context.Auto().CreateRegressionExperiment(settings); var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}"); Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}"); Console.ReadLine(); }
public void Start() { //Infer columns and load train data var columnInferenceResult = mlContext.Auto().InferColumns( path: TRAIN_DATA_FILEPATH, labelColumnName: "next", groupColumns: false); TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions); trainData = textLoader.Load(TRAIN_DATA_FILEPATH); //Modify infered columns information columnInformation = columnInferenceResult.ColumnInformation; columnInformation.CategoricalColumnNames.Add("productId"); columnInformation.NumericColumnNames.Remove("productId"); columnInformation.CategoricalColumnNames.Add("year"); columnInformation.NumericColumnNames.Remove("year"); columnInformation.NumericColumnNames.Remove("units"); columnInformation.IgnoredColumnNames.Add("units"); var experimentSettings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 10, OptimizingMetric = RegressionMetric.RootMeanSquaredError, CacheDirectory = new DirectoryInfo(CACHE_DIRECTORY), CancellationToken = cancelationTokenSource.Token }; //Exclude trainers from experiment experimentSettings.Trainers.Remove(RegressionTrainer.Ols); RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute( trainData: trainData, columnInformation: columnInformation, progressHandler: new RegressionProgressHandler(), preFeaturizer: null); ITransformer model = experimentResult.BestRun.Model; IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator; //Make batch predictions IDataView predictionsDataView = model.Transform(trainData); PrintPredictions(predictionsDataView); PrintPredictionsEnumerable(predictionsDataView); model = estimator.Fit(trainData); mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH); Console.WriteLine("Done"); }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so setting the internal maxModels parameter. int maxModels = culture == "en-US" ? 1 : 75; var experimentSettings = new RegressionExperimentSettings { MaxModels = maxModels }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.99); // Test the internal maxModels parameter Assert.True(culture == "en-US" || result.RunDetails.Count() == 75, $"RunDetails.Count() = {result.RunDetails.Count()}, is not 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
// make train already existing model /* not having the supported data types */ static void RunAutoML(List <Data> imageData) { var mlContext = new MLContext(); var trainDataView = mlContext.Data.LoadFromEnumerable <Data>(imageData); // automl experiment var settings = new RegressionExperimentSettings(); settings.MaxExperimentTimeInSeconds = 10 * 60; var experiment = mlContext.Auto().CreateRegressionExperiment(settings); Debug.WriteLine("Running expiriment"); var result = experiment.Execute(trainDataView); result.Print(); }
static void Main(string[] args) { var context = new MLContext(seed: 0); // Load the data var data = context.Data.LoadFromTextFile <Input>(_path, hasHeader: true, separatorChar: ','); // Create an experiment var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 600, // 10 minutes max OptimizingMetric = RegressionMetric.RSquared, CacheDirectory = null }; var experiment = context.Auto().CreateRegressionExperiment(settings); // Run the experiment Console.WriteLine("Running the experiment..."); var result = experiment.Execute(data); RegressionMetrics metrics = result.BestRun.ValidationMetrics; Console.WriteLine($"R2 score: {metrics.RSquared:0.##}"); Console.WriteLine(); // Use the best model to make a prediction var predictor = context.Model.CreatePredictionEngine <Input, Output>(result.BestRun.Model); var input = new Input { Bathrooms = 1.0f, Bedrooms = 1.0f, TotalRooms = 3.0f, FinishedSquareFeet = 653.0f, UseCode = "Condominium", LastSoldPrice = 0.0f }; var prediction = predictor.Predict(input); Console.WriteLine($"Predicted price: ${prediction.Price:n0}; Actual price: $665,000"); Console.WriteLine(); }
static void Main(string[] args) { var context = new MLContext(); // Inferring with column information //var columnInfo = new ColumnInformation //{ // LabelColumnName = LABEL_NAME //}; //var inference = context.Auto().InferColumns(FILE_PATH, columnInfo, separatorChar: ','); // Inferring with label column index //var inference = context.Auto().InferColumns(FILE_PATH, labelColumnIndex: 8, hasHeader: true, separatorChar: ','); // Inferring with label column name var inference = context.Auto().InferColumns(FILE_PATH, labelColumnName: LABEL_NAME, separatorChar: ','); var loader = context.Data.CreateTextLoader(inference.TextLoaderOptions); var data = loader.Load(FILE_PATH); var split = context.Data.TrainTestSplit(data, testFraction: 0.2); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 60, OptimizingMetric = RegressionMetric.RSquared }; var experimentResult = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(split.TrainSet, labelColumnName: LABEL_NAME); var predictions = experimentResult.BestRun.Model.Transform(split.TestSet); var metrics = context.Regression.Evaluate(predictions, LABEL_NAME); Console.WriteLine($"R^2: {metrics.RSquared}"); Console.Write(Environment.NewLine); }
public static void AutoMlOnDataset(MLContext context, TrainingData train, TrainingData validation) { var regExpSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 60 * 5, OptimizingMetric = RegressionMetric.MeanAbsoluteError, CacheDirectory = null, }; var experiment = context.Auto().CreateRegressionExperiment(regExpSettings); IProgress <RunDetail <RegressionMetrics> > progressHandler = new Handler(); //ModelTrainer.ToDataView(context, validation) var experimentResults = experiment.Execute(ModelTrainer.ToDataView(context, train), progressHandler: progressHandler); foreach (var res in experimentResults.RunDetails) { if (res.TrainerName != null && res.ValidationMetrics != null) { Console.WriteLine($"Trainer={res.TrainerName}; MAE={res.ValidationMetrics.MeanAbsoluteError}"); } } var best = experimentResults.BestRun; Console.WriteLine($"Lé best trainer is {best.TrainerName} with MAE={best.ValidationMetrics.MeanAbsoluteError}"); }
// Setting up auto ML private static void Best_Machine_Learning_Algorithm(MLContext mlContext, IDataView training_data_view) { Console.WriteLine("=============== Performing Regression Analysis for the best algorithm for the dataset model ==============="); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 10 }; var experiment = mlContext.Auto().CreateRegressionExperiment(settings); var progress = new Progress <RunDetail <RegressionMetrics> >(x => {; if (x.ValidationMetrics != null) { stopwatch.Start(); Console.WriteLine(Environment.NewLine); Console.WriteLine($"Current result:"); Console.WriteLine($"Metrics for Trainer name - {x.TrainerName}"); Console.WriteLine($"RSquared - {x.ValidationMetrics.RSquared:0.##}"); Console.WriteLine($"RootMeanSquaredError - {x.ValidationMetrics.RootMeanSquaredError:0.##}"); Console.WriteLine($"Absolute Loss - {x.ValidationMetrics.MeanAbsoluteError:0.##}"); Console.WriteLine($"Squared Loss - {x.ValidationMetrics.MeanSquaredError:0.##}"); stopwatch.Stop(); Console.WriteLine($"Time taken - {stopwatch.ElapsedMilliseconds} ms"); } }); var result = experiment.Execute(training_data_view, labelColumnName: "Label", progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared:0.##}"); Console.WriteLine($"RootMeanSquaredError - {result.BestRun.ValidationMetrics.RootMeanSquaredError:0.##}"); Console.WriteLine($"Absolute Loss - {result.BestRun.ValidationMetrics.MeanAbsoluteError:0.##}"); Console.WriteLine($"Squared Loss - {result.BestRun.ValidationMetrics.MeanSquaredError:0.##}"); }
static void Main(string[] args) { var context = new MLContext(); var trainData = context.Data.LoadFromTextFile <HousingData>("./housing.csv", hasHeader: true, separatorChar: ','); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 20, OptimizingMetric = RegressionMetric.MeanAbsoluteError }; var labelColumnInfo = new ColumnInformation() { LabelColumnName = "Label" }; var progress = new Progress <RunDetail <RegressionMetrics> >(p => { if (p.ValidationMetrics != null) { Console.WriteLine($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}"); } }); var experiment = context.Auto().CreateRegressionExperiment(settings); var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}"); Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}"); Console.ReadLine(); }
public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log) { var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); try { db.BeginTransaction(); MLContext context = new MLContext(); TrainInput input = null; using (StreamReader reader = new StreamReader(req.Body)) { input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd()); } File.WriteAllText(dataFilePath, input.Data); IDataView LoadedData = null; var columnData = new List <TextLoader.Column>(); foreach (var c in input.Columns) { //data type 1 is for ignore if (c.Type != 1) { var newColData = new TextLoader.Column() { DataKind = (DataKind)c.Type, Name = c.ColumnName, Source = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) } }; columnData.Add(newColData); } } LoadedData = context.Data.LoadFromTextFile( dataFilePath, columnData.ToArray(), separatorChar: input.Separator, hasHeader: input.HasHeaders, allowQuoting: true ); LoadedData = context.Data.ShuffleRows(LoadedData); /* * Multiclass will be used in the case of binary experiments and multiclass experiments. * This is because multiclass can accept all types as an output column. This will * allow less interaction with the user and a better user experience. */ double bestRunMetric = 0; ITransformer bestModel = null; if (input.ModelType == TrainInput.ModelTypes.Multiclass) { ExperimentResult <MulticlassClassificationMetrics> Results = null; var settings = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateMulticlassClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Binary) { ExperimentResult <BinaryClassificationMetrics> Results = null; var settings = new BinaryExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateBinaryClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Regression) { ExperimentResult <RegressionMetrics> Results = null; var settings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateRegressionExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.RSquared; bestModel = Results.BestRun.Model; if (bestRunMetric < 0) { bestRunMetric = 0; } } else { throw new Exception("Invalid model type"); } var modelFileId = 0; using (MemoryStream ms = new MemoryStream()) { context.Model.Save(bestModel, LoadedData.Schema, ms); //Save model to the database FileStore modelSave = new FileStore() { Data = ms.ToArray() }; modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId; } var resultModel = new Model() { FileStoreId = modelFileId, Accuracy = bestRunMetric, Rows = input.Data.Trim().Split('\n').Length }; db.CompleteTransaction(); return(new ReturnResult <Model>() { Success = true, Item = resultModel }); } catch (Exception e) { db.AbortTransaction(); log.LogError(e.Message); return(new ReturnResult <Model>() { Success = false, Exception = e }); } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.9); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } catch (AggregateException ae) { // During CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests and throw OperationCanceledExceptions // from multiple threads in the form of a single AggregateException. foreach (var ex in ae.Flatten().InnerExceptions) { var ignoredExceptions = new List <Exception>(); if (ex is OperationCanceledException) { continue; } else { ignoredExceptions.Add(ex); } if (ignoredExceptions.Count > 0) { throw new AggregateException(ignoredExceptions); } } } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }