public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so setting the internal maxModels parameter. int maxModels = culture == "en-US" ? 1 : 75; var experimentSettings = new RegressionExperimentSettings { MaxModels = maxModels }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.99); // Test the internal maxModels parameter Assert.True(culture == "en-US" || result.RunDetails.Count() == 75, $"RunDetails.Count() = {result.RunDetails.Count()}, is not 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void InferColumnsColumnInfoParam() { var columnInfo = new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }; var result = new MLContext(1).Auto().InferColumns(DatasetUtil.GetMlNetGeneratedRegressionDataset(), columnInfo); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Name == DatasetUtil.MlNetGeneratedRegressionLabel); Assert.Equal(DataKind.Single, labelCol.DataKind); Assert.Equal(DatasetUtil.MlNetGeneratedRegressionLabel, result.ColumnInformation.LabelColumnName); Assert.Single(result.ColumnInformation.NumericColumnNames); Assert.Equal(DefaultColumnNames.Features, result.ColumnInformation.NumericColumnNames.First()); Assert.Null(result.ColumnInformation.ExampleWeightColumnName); }
public void AutoFitRegressionTest() { var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(0) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.9); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } catch (AggregateException ae) { // During CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests and throw OperationCanceledExceptions // from multiple threads in the form of a single AggregateException. foreach (var ex in ae.Flatten().InnerExceptions) { var ignoredExceptions = new List <Exception>(); if (ex is OperationCanceledException) { continue; } else { ignoredExceptions.Add(ex); } if (ignoredExceptions.Count > 0) { throw new AggregateException(ignoredExceptions); } } } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }