Exemple #1
0
        public ITuner CreateTuner(TrialSettings settings)
        {
            var experimentSetting = _provider.GetService <AutoMLExperiment.AutoMLExperimentSettings>();
            var searchSpace       = settings.Pipeline.SearchSpace;
            var initParameter     = settings.Pipeline.Parameter;
            var isMaximize        = experimentSetting.IsMaximizeMetric;

            return(new CostFrugalTuner(searchSpace, initParameter, !isMaximize));
        }
Exemple #2
0
        public void Update(TrialSettings settings, TrialResult result)
        {
            var schema = settings.Schema;

            if (_tuners.TryGetValue(schema, out var tuner))
            {
                tuner.Update(result);
            }
        }
        public void ReportFailTrial(TrialSettings settings, Exception exp)
        {
            var result = new BinaryClassificationTrialResult
            {
                TrialSettings = settings,
                Exception     = exp,
            };

            RunDetails.Add(result);
        }
 public Parameter Propose(TrialSettings settings)
 {
     if (!_enumerator.MoveNext())
     {
         _enumerator = _tuner.Propose().GetEnumerator();
         return(Propose(settings));
     }
     else
     {
         var res = _enumerator.Current;
         return(res);
     }
 }
Exemple #5
0
        public TrialSettings Propose(TrialSettings settings)
        {
            var tunerFactory = _provider.GetService <ITunerFactory>();

            if (!_tuners.ContainsKey(settings.Schema))
            {
                var t = tunerFactory.CreateTuner(settings);
                _tuners.Add(settings.Schema, t);
            }

            var tuner     = _tuners[settings.Schema];
            var parameter = tuner.Propose(settings);

            settings.Parameter = parameter;

            return(settings);
        }
        public TrialResult Run(TrialSettings settings, IServiceProvider provider)
        {
            var rnd = new Random(settings.ExperimentSettings.Seed ?? 0);

            if (_datasetManager is CrossValidateDatasetManager datasetSettings &&
                _metricManager is BinaryMetricManager metricSettings)
            {
                var stopWatch = new Stopwatch();
                stopWatch.Start();
                var fold = datasetSettings.Fold ?? 5;

                var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter);
                var metrics  = _context.BinaryClassification.CrossValidateNonCalibrated(datasetSettings.Dataset, pipeline, fold, metricSettings.LabelColumn);

                // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best.
                var res    = metrics[rnd.Next(fold)];
                var model  = res.Model;
                var metric = metricSettings.Metric switch
                {
                    BinaryClassificationMetric.PositivePrecision => res.Metrics.PositivePrecision,
                    BinaryClassificationMetric.Accuracy => res.Metrics.Accuracy,
                    BinaryClassificationMetric.AreaUnderRocCurve => res.Metrics.AreaUnderRocCurve,
                    BinaryClassificationMetric.AreaUnderPrecisionRecallCurve => res.Metrics.AreaUnderPrecisionRecallCurve,
                    _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"),
                };

                stopWatch.Stop();


                return(new BinaryClassificationTrialResult()
                {
                    Metric = metric,
                    Model = model,
                    TrialSettings = settings,
                    DurationInMilliseconds = stopWatch.ElapsedMilliseconds,
                    BinaryClassificationMetrics = res.Metrics,
                    CrossValidationMetrics = metrics,
                });
            }

            throw new ArgumentException();
        }
        public Parameter Propose(TrialSettings settings)
        {
            var trialId = settings.TrialId;

            if (_initUsed)
            {
                var searchThread = _searchThreadPool[_currentThreadId];
                _configs[trialId]         = _searchSpace.MappingToFeatureSpace(searchThread.Suggest(trialId));
                _trialProposedBy[trialId] = _currentThreadId;
            }
            else
            {
                _configs[trialId]         = CreateInitConfigFromAdmissibleRegion();
                _trialProposedBy[trialId] = _currentThreadId;
            }

            var param = _configs[trialId];

            return(_searchSpace.SampleFromFeatureSpace(param));
        }
        public TrialSettings Propose(TrialSettings settings)
        {
            _multiModelPipeline = settings.ExperimentSettings.Pipeline;
            _learnerInitialCost = _multiModelPipeline.PipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline));
            var pipelineIds = _multiModelPipeline.PipelineIds;

            if (_eci == null)
            {
                // initialize eci with the estimated cost and always start from pipeline which has lowest cost.
                _eci            = pipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline));
                settings.Schema = _eci.OrderBy(kv => kv.Value).First().Key;
            }
            else
            {
                var probabilities = pipelineIds.Select(id => _eci[id]).ToArray();
                probabilities = ArrayMath.Inverse(probabilities);
                probabilities = ArrayMath.Normalize(probabilities);

                // sample
                var randdouble = _rand.NextDouble();
                var sum        = 0.0;
                // selected pipeline id index
                int i;

                for (i = 0; i != pipelineIds.Length; ++i)
                {
                    sum += ((double[])probabilities)[i];
                    if (sum > randdouble)
                    {
                        break;
                    }
                }

                settings.Schema = pipelineIds[i];
            }

            settings.Pipeline = _multiModelPipeline.BuildSweepableEstimatorPipeline(settings.Schema);
            return(settings);
        }
        public TrialResult Run(TrialSettings settings, IServiceProvider provider)
        {
            var rnd = new Random(settings.ExperimentSettings.Seed ?? 0);

            if (_datasetManager is CrossValidateDatasetManager datasetSettings &&
                _metricManager is MultiClassMetricManager metricSettings)
            {
                var stopWatch = new Stopwatch();
                stopWatch.Start();
                var fold = datasetSettings.Fold ?? 5;

                var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter);
                var metrics  = _context.MulticlassClassification.CrossValidate(datasetSettings.Dataset, pipeline, fold, metricSettings.LabelColumn, seed: settings.ExperimentSettings?.Seed);
                // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best.
                var res    = metrics[rnd.Next(fold)];
                var model  = res.Model;
                var metric = metricSettings.Metric switch
                {
                    MulticlassClassificationMetric.MicroAccuracy => res.Metrics.MicroAccuracy,
                    MulticlassClassificationMetric.MacroAccuracy => res.Metrics.MacroAccuracy,
                    MulticlassClassificationMetric.TopKAccuracy => res.Metrics.TopKAccuracy,
                    MulticlassClassificationMetric.LogLoss => res.Metrics.LogLoss,
                    MulticlassClassificationMetric.LogLossReduction => res.Metrics.LogLossReduction,
                    _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"),
                };

                stopWatch.Stop();

                return(new TrialResult()
                {
                    Metric = metric,
                    Model = model,
                    TrialSettings = settings,
                    DurationInMilliseconds = stopWatch.ElapsedMilliseconds,
                });
            }

            throw new ArgumentException();
        }
Exemple #10
0
        public TrialResult Run(TrialSettings settings, IServiceProvider provider)
        {
            if (_datasetManager is TrainTestDatasetManager datasetSettings &&
                _metricManager is BinaryMetricManager metricSettings)
            {
                var stopWatch = new Stopwatch();
                stopWatch.Start();

                var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter);
                var model    = pipeline.Fit(datasetSettings.TrainDataset);
                var eval     = model.Transform(datasetSettings.TestDataset);
                var metrics  = _context.BinaryClassification.EvaluateNonCalibrated(eval, metricSettings.LabelColumn, predictedLabelColumnName: metricSettings.PredictedColumn);

                // now we just randomly pick a model, but a better way is to provide option to pick a model which score is the cloest to average or the best.
                var metric = metricSettings.Metric switch
                {
                    BinaryClassificationMetric.PositivePrecision => metrics.PositivePrecision,
                    BinaryClassificationMetric.Accuracy => metrics.Accuracy,
                    BinaryClassificationMetric.AreaUnderRocCurve => metrics.AreaUnderRocCurve,
                    BinaryClassificationMetric.AreaUnderPrecisionRecallCurve => metrics.AreaUnderPrecisionRecallCurve,
                    _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"),
                };

                stopWatch.Stop();


                return(new BinaryClassificationTrialResult()
                {
                    Metric = metric,
                    Model = model,
                    TrialSettings = settings,
                    DurationInMilliseconds = stopWatch.ElapsedMilliseconds,
                    BinaryClassificationMetrics = metrics,
                });
            }

            throw new ArgumentException();
        }
Exemple #11
0
        public TrialResult Run(TrialSettings settings, IServiceProvider provider)
        {
            if (_datasetManager is TrainTestDatasetManager datasetSettings &&
                _metricManager is MultiClassMetricManager metricSettings)
            {
                var stopWatch = new Stopwatch();
                stopWatch.Start();

                var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter);
                var model    = pipeline.Fit(datasetSettings.TrainDataset);
                var eval     = model.Transform(datasetSettings.TestDataset);
                var metrics  = _context.MulticlassClassification.Evaluate(eval, metricSettings.LabelColumn, predictedLabelColumnName: metricSettings.PredictedColumn);

                var metric = metricSettings.Metric switch
                {
                    MulticlassClassificationMetric.MicroAccuracy => metrics.MicroAccuracy,
                    MulticlassClassificationMetric.MacroAccuracy => metrics.MacroAccuracy,
                    MulticlassClassificationMetric.TopKAccuracy => metrics.TopKAccuracy,
                    MulticlassClassificationMetric.LogLoss => metrics.LogLoss,
                    MulticlassClassificationMetric.LogLossReduction => metrics.LogLossReduction,
                    _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"),
                };

                stopWatch.Stop();


                return(new TrialResult()
                {
                    Metric = metric,
                    Model = model,
                    TrialSettings = settings,
                    DurationInMilliseconds = stopWatch.ElapsedMilliseconds,
                });
            }

            throw new ArgumentException();
        }
Exemple #12
0
        public TrialResult Run(TrialSettings settings, IServiceProvider provider)
        {
            if (_datasetManager is TrainTestDatasetManager datasetSettings &&
                _metricManager is RegressionMetricManager metricSettings)
            {
                var stopWatch = new Stopwatch();
                stopWatch.Start();

                var pipeline = settings.Pipeline.BuildTrainingPipeline(_context, settings.Parameter);
                var model    = pipeline.Fit(datasetSettings.TrainDataset);
                var eval     = model.Transform(datasetSettings.TestDataset);
                var metrics  = _context.Regression.Evaluate(eval, metricSettings.LabelColumn, scoreColumnName: metricSettings.ScoreColumn);

                var metric = metricSettings.Metric switch
                {
                    RegressionMetric.RootMeanSquaredError => metrics.RootMeanSquaredError,
                    RegressionMetric.RSquared => metrics.RSquared,
                    RegressionMetric.MeanSquaredError => metrics.MeanSquaredError,
                    RegressionMetric.MeanAbsoluteError => metrics.MeanAbsoluteError,
                    _ => throw new NotImplementedException($"{metricSettings.Metric} is not supported!"),
                };

                stopWatch.Stop();


                return(new TrialResult()
                {
                    Metric = metric,
                    Model = model,
                    TrialSettings = settings,
                    DurationInMilliseconds = stopWatch.ElapsedMilliseconds,
                });
            }

            throw new ArgumentException();
        }
 public void ReportRunningTrial(TrialSettings setting)
 {
 }
Exemple #14
0
        public ITuner CreateTuner(TrialSettings settings)
        {
            var searchSpace = settings.Pipeline.SearchSpace;

            return(new GridSearchTuner(searchSpace));
        }
Exemple #15
0
 public void ReportRunningTrial(TrialSettings setting)
 {
     _logger.Info($"Update Running Trial - Id: {setting.TrialId} - Pipeline: {setting.Pipeline}");
 }
Exemple #16
0
 public void ReportFailTrial(TrialSettings settings, Exception exception = null)
 {
     _logger.Info($"Update Failed Trial - Id: {settings.TrialId} - Pipeline: {settings.Pipeline}");
 }
        public void Update(TrialSettings parameter, TrialResult result)
        {
            var schema      = parameter.Schema;
            var error       = CaculateError(result.Metric, parameter.ExperimentSettings.IsMaximizeMetric);
            var duration    = result.DurationInMilliseconds / 1000;
            var pipelineIds = _multiModelPipeline.PipelineIds;
            var isSuccess   = duration != 0;

            // if k1 is null, it means this is the first completed trial.
            // in that case, initialize k1, k2, e1, e2 in the following way:
            // k1: for every learner, k1[l] = c * duration where c is a ratio defined in learnerInitialCost
            // k2: k2 = k1, which indicates the hypothesis that it costs the same time for learners to reach the next break through.
            // e1: current error
            // e2: 1.001*e1

            if (isSuccess)
            {
                if (_k1 == null)
                {
                    _k1 = pipelineIds.ToDictionary(id => id, id => duration * _learnerInitialCost[id] / _learnerInitialCost[schema]);
                    _k2 = _k1.ToDictionary(kv => kv.Key, kv => kv.Value);
                    _e1 = pipelineIds.ToDictionary(id => id, id => error);
                    _e2 = pipelineIds.ToDictionary(id => id, id => 1.05 * error);
                    _globalBestError = error;
                }
                else if (error >= _e1[schema])
                {
                    // if error is larger than current best error, which means there's no improvements for
                    // the last trial with the current learner.
                    // In that case, simply increase the total spent time since the last best error for that learner.
                    _k1[schema] += duration;
                }
                else
                {
                    // there's an improvement.
                    // k2 <= k1 && e2 <= e1, and update k1, e2.
                    _k2[schema] = _k1[schema];
                    _k1[schema] = duration;
                    _e2[schema] = _e1[schema];
                    _e1[schema] = error;

                    // update global best error as well
                    if (error < _globalBestError)
                    {
                        _globalBestError = error;
                    }
                }

                // update eci
                var eci1 = Math.Max(_k1[schema], _k2[schema]);
                var estimatorCostForBreakThrough = 2 * (error - _globalBestError) / ((_e2[schema] - _e1[schema]) / (_k2[schema] + _k1[schema]));
                _eci[schema] = Math.Max(eci1, estimatorCostForBreakThrough);
            }
            else
            {
                // double eci of current trial twice of maxium ecis.
                _eci[schema] = _eci.Select(kv => kv.Value).Max() * 2;
            }

            // normalize eci
            var sum = _eci.Select(x => x.Value).Sum();

            _eci = _eci.Select(x => (x.Key, x.Value / sum)).ToDictionary(x => x.Key, x => x.Item2);

            // TODO
            // save k1,k2,e1,e2,eci,bestError to training configuration
            return;
        }
Exemple #18
0
 public void ReportRunningTrial(TrialSettings setting)
 {
     ActiveTrial = setting;
     ThrottledUpdate();
 }
 public Parameter Propose(TrialSettings settings)
 {
     return(_tuner.Propose(_searchSpace));
 }
        /// <summary>
        /// Run experiment and return the best trial result asynchronizely. The experiment returns the current best trial result if there's any trial completed when <paramref name="ct"/> get cancelled,
        /// and throws <see cref="TimeoutException"/> with message "Training time finished without completing a trial run" when no trial has completed.
        /// Another thing needs to notice is that this function won't immediately return after <paramref name="ct"/> get cancelled. Instead, it will call <see cref="MLContext.CancelExecution"/> to cancel all training process
        /// and wait all running trials get cancelled or completed.
        /// </summary>
        /// <returns></returns>
        public async Task <TrialResult> RunAsync(CancellationToken ct = default)
        {
            ValidateSettings();
            var cts = new CancellationTokenSource();

            _settings.CancellationToken = ct;
            cts.CancelAfter((int)_settings.MaxExperimentTimeInSeconds * 1000);
            _settings.CancellationToken.Register(() => cts.Cancel());
            cts.Token.Register(() =>
            {
                // only force-canceling running trials when there's completed trials.
                // otherwise, wait for the current running trial to be completed.
                if (_bestTrialResult != null)
                {
                    _context.CancelExecution();
                }
            });

            InitializeServiceCollection();
            var serviceProvider        = _serviceCollection.BuildServiceProvider();
            var monitor                = serviceProvider.GetService <IMonitor>();
            var trialNum               = 0;
            var pipelineProposer       = serviceProvider.GetService <PipelineProposer>();
            var hyperParameterProposer = serviceProvider.GetService <HyperParameterProposer>();
            var runnerFactory          = serviceProvider.GetService <ITrialRunnerFactory>();

            while (true)
            {
                if (cts.Token.IsCancellationRequested)
                {
                    break;
                }
                var setting = new TrialSettings()
                {
                    ExperimentSettings = _settings,
                    TrialId            = trialNum++,
                };

                setting = pipelineProposer.Propose(setting);
                setting = hyperParameterProposer.Propose(setting);
                monitor.ReportRunningTrial(setting);
                var runner = runnerFactory.CreateTrialRunner();

                try
                {
                    var trialResult = runner.Run(setting, serviceProvider);
                    monitor.ReportCompletedTrial(trialResult);
                    hyperParameterProposer.Update(setting, trialResult);
                    pipelineProposer.Update(setting, trialResult);

                    var error = _settings.IsMaximizeMetric ? 1 - trialResult.Metric : trialResult.Metric;
                    if (error < _bestError)
                    {
                        _bestTrialResult = trialResult;
                        _bestError       = error;
                        monitor.ReportBestTrial(trialResult);
                    }
                }
                catch (Exception ex)
                {
                    if (cts.Token.IsCancellationRequested)
                    {
                        break;
                    }
                    else
                    {
                        // TODO
                        // it's questionable on whether to abort the entire training process
                        // for a single fail trial. We should make it an option and only exit
                        // when error is fatal (like schema mismatch).
                        monitor.ReportFailTrial(setting, ex);
                        throw;
                    }
                }
            }

            if (_bestTrialResult == null)
            {
                throw new TimeoutException("Training time finished without completing a trial run");
            }
            else
            {
                return(await Task.FromResult(_bestTrialResult));
            }
        }
Exemple #21
0
 public void ReportFailTrial(TrialSettings setting, Exception exp = null)
 {
     // TODO figure out what to do with failed trials.
     ThrottledUpdate();
 }