public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); // Cross val threshold for # of dataset rows -- // If dataset has < threshold # of rows, use cross val. // Else, run experiment using train-validate split. const int crossValRowCountThreshold = 15000; var rowCount = DatasetDimensionsUtil.CountRows(trainData, crossValRowCountThreshold); // TODO // split cross validation result according to sample key as well. if (rowCount < crossValRowCountThreshold) { const int numCrossValFolds = 10; _experiment.SetDataset(trainData, numCrossValFolds); } else { var splitData = Context.Data.TrainTestSplit(trainData); _experiment.SetDataset(splitData.TrainSet, splitData.TestSet); } MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)) .Append(Context.Auto().BinaryClassification(label, Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var detail = ToRunDetail(e); progressHandler?.Report(detail); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToRunDetail(e)); var bestRun = ToRunDetail(monitor.BestRun); var result = new ExperimentResult <BinaryClassificationMetrics>(runDetails, bestRun); return(result); }
private ExperimentResult <TMetrics> Execute(ColumnInformation columnInfo, DatasetColumnInfo[] columns, IEstimator <ITransformer> preFeaturizer, IProgress <RunDetail <TMetrics> > progressHandler, IRunner <RunDetail <TMetrics> > runner) { // Execute experiment & get all pipelines run var experiment = new Experiment <RunDetail <TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler, Settings, MetricsAgent, _trainerAllowList, columns, runner, _logger); var runDetails = experiment.Execute(); var bestRun = GetBestRun(runDetails); var experimentResult = new ExperimentResult <TMetrics>(runDetails, bestRun); return(experimentResult); }
public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); _experiment.SetDataset(trainData, validationData); MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, "__Features__")) .Append(Context.Auto().BinaryClassification(label, featureColumnName: Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var detail = ToRunDetail(e); progressHandler?.Report(detail); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToRunDetail(e)); var bestRun = ToRunDetail(monitor.BestRun); var result = new ExperimentResult <BinaryClassificationMetrics>(runDetails, bestRun); return(result); }