public ITuner CreateTuner(TrialSettings settings) { var experimentSetting = _provider.GetService <AutoMLExperiment.AutoMLExperimentSettings>(); var searchSpace = settings.Pipeline.SearchSpace; var initParameter = settings.Pipeline.Parameter; var isMaximize = experimentSetting.IsMaximizeMetric; return(new CostFrugalTuner(searchSpace, initParameter, !isMaximize)); }
public void Update(TrialSettings settings, TrialResult result) { var schema = settings.Schema; if (_tuners.TryGetValue(schema, out var tuner)) { tuner.Update(result); } }
public void ReportFailTrial(TrialSettings settings, Exception exp) { var result = new BinaryClassificationTrialResult { TrialSettings = settings, Exception = exp, }; RunDetails.Add(result); }
public Parameter Propose(TrialSettings settings) { if (!_enumerator.MoveNext()) { _enumerator = _tuner.Propose().GetEnumerator(); return(Propose(settings)); } else { var res = _enumerator.Current; return(res); } }
public TrialSettings Propose(TrialSettings settings) { var tunerFactory = _provider.GetService <ITunerFactory>(); if (!_tuners.ContainsKey(settings.Schema)) { var t = tunerFactory.CreateTuner(settings); _tuners.Add(settings.Schema, t); } var tuner = _tuners[settings.Schema]; var parameter = tuner.Propose(settings); settings.Parameter = parameter; return(settings); }
public TrialResult Run(TrialSettings settings, IServiceProvider provider) { var rnd = new Random(settings.ExperimentSettings.Seed ?? 0); if (_datasetManager is CrossValidateDatasetManager datasetSettings && _metricManager is BinaryMetricManager metricSettings) { var stopWatch = new Stopwatch(); stopWatch.Start(); var fold = datasetSettings.Fold ?? 5; var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter); var metrics = _context.BinaryClassification.CrossValidateNonCalibrated(datasetSettings.Dataset, pipeline, fold, metricSettings.LabelColumn); // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best. var res = metrics[rnd.Next(fold)]; var model = res.Model; var metric = metricSettings.Metric switch { BinaryClassificationMetric.PositivePrecision => res.Metrics.PositivePrecision, BinaryClassificationMetric.Accuracy => res.Metrics.Accuracy, BinaryClassificationMetric.AreaUnderRocCurve => res.Metrics.AreaUnderRocCurve, BinaryClassificationMetric.AreaUnderPrecisionRecallCurve => res.Metrics.AreaUnderPrecisionRecallCurve, _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"), }; stopWatch.Stop(); return(new BinaryClassificationTrialResult() { Metric = metric, Model = model, TrialSettings = settings, DurationInMilliseconds = stopWatch.ElapsedMilliseconds, BinaryClassificationMetrics = res.Metrics, CrossValidationMetrics = metrics, }); } throw new ArgumentException(); }
public Parameter Propose(TrialSettings settings) { var trialId = settings.TrialId; if (_initUsed) { var searchThread = _searchThreadPool[_currentThreadId]; _configs[trialId] = _searchSpace.MappingToFeatureSpace(searchThread.Suggest(trialId)); _trialProposedBy[trialId] = _currentThreadId; } else { _configs[trialId] = CreateInitConfigFromAdmissibleRegion(); _trialProposedBy[trialId] = _currentThreadId; } var param = _configs[trialId]; return(_searchSpace.SampleFromFeatureSpace(param)); }
public TrialSettings Propose(TrialSettings settings) { _multiModelPipeline = settings.ExperimentSettings.Pipeline; _learnerInitialCost = _multiModelPipeline.PipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline)); var pipelineIds = _multiModelPipeline.PipelineIds; if (_eci == null) { // initialize eci with the estimated cost and always start from pipeline which has lowest cost. _eci = pipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline)); settings.Schema = _eci.OrderBy(kv => kv.Value).First().Key; } else { var probabilities = pipelineIds.Select(id => _eci[id]).ToArray(); probabilities = ArrayMath.Inverse(probabilities); probabilities = ArrayMath.Normalize(probabilities); // sample var randdouble = _rand.NextDouble(); var sum = 0.0; // selected pipeline id index int i; for (i = 0; i != pipelineIds.Length; ++i) { sum += ((double[])probabilities)[i]; if (sum > randdouble) { break; } } settings.Schema = pipelineIds[i]; } settings.Pipeline = _multiModelPipeline.BuildSweepableEstimatorPipeline(settings.Schema); return(settings); }
public TrialResult Run(TrialSettings settings, IServiceProvider provider) { var rnd = new Random(settings.ExperimentSettings.Seed ?? 0); if (_datasetManager is CrossValidateDatasetManager datasetSettings && _metricManager is MultiClassMetricManager metricSettings) { var stopWatch = new Stopwatch(); stopWatch.Start(); var fold = datasetSettings.Fold ?? 5; var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter); var metrics = _context.MulticlassClassification.CrossValidate(datasetSettings.Dataset, pipeline, fold, metricSettings.LabelColumn, seed: settings.ExperimentSettings?.Seed); // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best. var res = metrics[rnd.Next(fold)]; var model = res.Model; var metric = metricSettings.Metric switch { MulticlassClassificationMetric.MicroAccuracy => res.Metrics.MicroAccuracy, MulticlassClassificationMetric.MacroAccuracy => res.Metrics.MacroAccuracy, MulticlassClassificationMetric.TopKAccuracy => res.Metrics.TopKAccuracy, MulticlassClassificationMetric.LogLoss => res.Metrics.LogLoss, MulticlassClassificationMetric.LogLossReduction => res.Metrics.LogLossReduction, _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"), }; stopWatch.Stop(); return(new TrialResult() { Metric = metric, Model = model, TrialSettings = settings, DurationInMilliseconds = stopWatch.ElapsedMilliseconds, }); } throw new ArgumentException(); }
public TrialResult Run(TrialSettings settings, IServiceProvider provider) { if (_datasetManager is TrainTestDatasetManager datasetSettings && _metricManager is BinaryMetricManager metricSettings) { var stopWatch = new Stopwatch(); stopWatch.Start(); var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter); var model = pipeline.Fit(datasetSettings.TrainDataset); var eval = model.Transform(datasetSettings.TestDataset); var metrics = _context.BinaryClassification.EvaluateNonCalibrated(eval, metricSettings.LabelColumn, predictedLabelColumnName: metricSettings.PredictedColumn); // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best. var metric = metricSettings.Metric switch { BinaryClassificationMetric.PositivePrecision => metrics.PositivePrecision, BinaryClassificationMetric.Accuracy => metrics.Accuracy, BinaryClassificationMetric.AreaUnderRocCurve => metrics.AreaUnderRocCurve, BinaryClassificationMetric.AreaUnderPrecisionRecallCurve => metrics.AreaUnderPrecisionRecallCurve, _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"), }; stopWatch.Stop(); return(new BinaryClassificationTrialResult() { Metric = metric, Model = model, TrialSettings = settings, DurationInMilliseconds = stopWatch.ElapsedMilliseconds, BinaryClassificationMetrics = metrics, }); } throw new ArgumentException(); }
public TrialResult Run(TrialSettings settings, IServiceProvider provider) { if (_datasetManager is TrainTestDatasetManager datasetSettings && _metricManager is MultiClassMetricManager metricSettings) { var stopWatch = new Stopwatch(); stopWatch.Start(); var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter); var model = pipeline.Fit(datasetSettings.TrainDataset); var eval = model.Transform(datasetSettings.TestDataset); var metrics = _context.MulticlassClassification.Evaluate(eval, metricSettings.LabelColumn, predictedLabelColumnName: metricSettings.PredictedColumn); var metric = metricSettings.Metric switch { MulticlassClassificationMetric.MicroAccuracy => metrics.MicroAccuracy, MulticlassClassificationMetric.MacroAccuracy => metrics.MacroAccuracy, MulticlassClassificationMetric.TopKAccuracy => metrics.TopKAccuracy, MulticlassClassificationMetric.LogLoss => metrics.LogLoss, MulticlassClassificationMetric.LogLossReduction => metrics.LogLossReduction, _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"), }; stopWatch.Stop(); return(new TrialResult() { Metric = metric, Model = model, TrialSettings = settings, DurationInMilliseconds = stopWatch.ElapsedMilliseconds, }); } throw new ArgumentException(); }
public TrialResult Run(TrialSettings settings, IServiceProvider provider) { if (_datasetManager is TrainTestDatasetManager datasetSettings && _metricManager is RegressionMetricManager metricSettings) { var stopWatch = new Stopwatch(); stopWatch.Start(); var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter); var model = pipeline.Fit(datasetSettings.TrainDataset); var eval = model.Transform(datasetSettings.TestDataset); var metrics = _context.Regression.Evaluate(eval, metricSettings.LabelColumn, scoreColumnName: metricSettings.ScoreColumn); var metric = metricSettings.Metric switch { RegressionMetric.RootMeanSquaredError => metrics.RootMeanSquaredError, RegressionMetric.RSquared => metrics.RSquared, RegressionMetric.MeanSquaredError => metrics.MeanSquaredError, RegressionMetric.MeanAbsoluteError => metrics.MeanAbsoluteError, _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"), }; stopWatch.Stop(); return(new TrialResult() { Metric = metric, Model = model, TrialSettings = settings, DurationInMilliseconds = stopWatch.ElapsedMilliseconds, }); } throw new ArgumentException(); }
public void ReportRunningTrial(TrialSettings setting) { }
public ITuner CreateTuner(TrialSettings settings) { var searchSpace = settings.Pipeline.SearchSpace; return(new GridSearchTuner(searchSpace)); }
public void ReportRunningTrial(TrialSettings setting) { _logger.Info($"Update Running Trial - Id: {setting.TrialId} - Pipeline: {setting.Pipeline}"); }
public void ReportFailTrial(TrialSettings settings, Exception exception = null) { _logger.Info($"Update Failed Trial - Id: {settings.TrialId} - Pipeline: {settings.Pipeline}"); }
public void Update(TrialSettings parameter, TrialResult result) { var schema = parameter.Schema; var error = CaculateError(result.Metric, parameter.ExperimentSettings.IsMaximizeMetric); var duration = result.DurationInMilliseconds / 1000; var pipelineIds = _multiModelPipeline.PipelineIds; var isSuccess = duration != 0; // if k1 is null, it means this is the first completed trial. // in that case, initialize k1, k2, e1, e2 in the following way: // k1: for every learner, k1[l] = c * duration where c is a ratio defined in learnerInitialCost // k2: k2 = k1, which indicates the hypothesis that it costs the same time for learners to reach the next break through. // e1: current error // e2: 1.001*e1 if (isSuccess) { if (_k1 == null) { _k1 = pipelineIds.ToDictionary(id => id, id => duration * _learnerInitialCost[id] / _learnerInitialCost[schema]); _k2 = _k1.ToDictionary(kv => kv.Key, kv => kv.Value); _e1 = pipelineIds.ToDictionary(id => id, id => error); _e2 = pipelineIds.ToDictionary(id => id, id => 1.05 * error); _globalBestError = error; } else if (error >= _e1[schema]) { // if error is larger than current best error, which means there's no improvements for // the last trial with the current learner. // In that case, simply increase the total spent time since the last best error for that learner. _k1[schema] += duration; } else { // there's an improvement. // k2 <= k1 && e2 <= e1, and update k1, e2. _k2[schema] = _k1[schema]; _k1[schema] = duration; _e2[schema] = _e1[schema]; _e1[schema] = error; // update global best error as well if (error < _globalBestError) { _globalBestError = error; } } // update eci var eci1 = Math.Max(_k1[schema], _k2[schema]); var estimatorCostForBreakThrough = 2 * (error - _globalBestError) / ((_e2[schema] - _e1[schema]) / (_k2[schema] + _k1[schema])); _eci[schema] = Math.Max(eci1, estimatorCostForBreakThrough); } else { // double eci of current trial twice of maxium ecis. _eci[schema] = _eci.Select(kv => kv.Value).Max() * 2; } // normalize eci var sum = _eci.Select(x => x.Value).Sum(); _eci = _eci.Select(x => (x.Key, x.Value / sum)).ToDictionary(x => x.Key, x => x.Item2); // TODO // save k1,k2,e1,e2,eci,bestError to training configuration return; }
public void ReportRunningTrial(TrialSettings setting) { ActiveTrial = setting; ThrottledUpdate(); }
public Parameter Propose(TrialSettings settings) { return(_tuner.Propose(_searchSpace)); }
/// <summary> /// Run experiment and return the best trial result asynchronizely. The experiment returns the current best trial result if there's any trial completed when <paramref name="ct"/> get cancelled, /// and throws <see cref="TimeoutException"/> with message "Training time finished without completing a trial run" when no trial has completed. /// Another thing needs to notice is that this function won't immediately return after <paramref name="ct"/> get cancelled. Instead, it will call <see cref="MLContext.CancelExecution"/> to cancel all training process /// and wait all running trials get cancelled or completed. /// </summary> /// <returns></returns> public async Task <TrialResult> RunAsync(CancellationToken ct = default) { ValidateSettings(); var cts = new CancellationTokenSource(); _settings.CancellationToken = ct; cts.CancelAfter((int)_settings.MaxExperimentTimeInSeconds * 1000); _settings.CancellationToken.Register(() => cts.Cancel()); cts.Token.Register(() => { // only force-canceling running trials when there's completed trials. // otherwise, wait for the current running trial to be completed. if (_bestTrialResult != null) { _context.CancelExecution(); } }); InitializeServiceCollection(); var serviceProvider = _serviceCollection.BuildServiceProvider(); var monitor = serviceProvider.GetService <IMonitor>(); var trialNum = 0; var pipelineProposer = serviceProvider.GetService <PipelineProposer>(); var hyperParameterProposer = serviceProvider.GetService <HyperParameterProposer>(); var runnerFactory = serviceProvider.GetService <ITrialRunnerFactory>(); while (true) { if (cts.Token.IsCancellationRequested) { break; } var setting = new TrialSettings() { ExperimentSettings = _settings, TrialId = trialNum++, }; setting = pipelineProposer.Propose(setting); setting = hyperParameterProposer.Propose(setting); monitor.ReportRunningTrial(setting); var runner = runnerFactory.CreateTrialRunner(); try { var trialResult = runner.Run(setting, serviceProvider); monitor.ReportCompletedTrial(trialResult); hyperParameterProposer.Update(setting, trialResult); pipelineProposer.Update(setting, trialResult); var error = _settings.IsMaximizeMetric ? 1 - trialResult.Metric : trialResult.Metric; if (error < _bestError) { _bestTrialResult = trialResult; _bestError = error; monitor.ReportBestTrial(trialResult); } } catch (Exception ex) { if (cts.Token.IsCancellationRequested) { break; } else { // TODO // it's questionable on whether to abort the entire training process // for a single fail trial. We should make it an option and only exit // when error is fatal (like schema mismatch). monitor.ReportFailTrial(setting, ex); throw; } } } if (_bestTrialResult == null) { throw new TimeoutException("Training time finished without completing a trial run"); } else { return(await Task.FromResult(_bestTrialResult)); } }
public void ReportFailTrial(TrialSettings setting, Exception exp = null) { // TODO figure out what to do with failed trials. ThrottledUpdate(); }