/// <summary> /// Executes an AutoML experiment. /// </summary> /// <param name="trainData">The training data to be used by the AutoML experiment.</param> /// <param name="numberOfCVFolds">The number of cross validation folds into which the training data should be divided when fitting a model.</param> /// <param name="columnInformation">Column information for the dataset.</param> /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an /// experiment. (The pre-featurizer will be fit only on the training data split to produce a /// trained transform. Then, the trained transform will be applied to both the training /// data split and corresponding validation data split.)</param> /// <param name="progressHandler">A user-defined object that implements /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the /// course of the experiment. /// </param> /// <returns>The cross validation experiment result.</returns> /// <remarks> /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// </remarks> public CrossValidationExperimentResult <TMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <TMetrics> > progressHandler = null) { UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds); var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName); return(ExecuteCrossVal(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler)); }
/// <summary> /// Executes an AutoML experiment. /// </summary> /// <param name="trainData">The training data to be used by the AutoML experiment.</param> /// <param name="columnInformation">Column information for the dataset.</param> /// <param name="preFeaturizer">Pre-featurizer that AutoML will apply to the data during an /// experiment. (The pre-featurizer will be fit only on the training data split to produce a /// trained transform. Then, the trained transform will be applied to both the training /// data split and corresponding validation data split.)</param> /// <param name="progressHandler">A user-defined object that implements /// the <see cref="IProgress{T}"/> interface. AutoML will invoke the method /// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the /// course of the experiment. /// </param> /// <returns>The experiment result.</returns> /// <remarks> /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// </remarks> public ExperimentResult <TMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <TMetrics> > progressHandler = null) { // Cross val threshold for # of dataset rows -- // If dataset has < threshold # of rows, use cross val. // Else, run experiment using train-validate split. const int crossValRowCountThreshold = 15000; var rowCount = DatasetDimensionsUtil.CountRows(trainData, crossValRowCountThreshold); if (rowCount < crossValRowCountThreshold) { const int numCrossValFolds = 10; var splitResult = SplitUtil.CrossValSplit(Context, trainData, numCrossValFolds, columnInformation?.SamplingKeyColumnName); return(ExecuteCrossValSummary(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler)); } else { var splitResult = SplitUtil.TrainValidateSplit(Context, trainData, columnInformation?.SamplingKeyColumnName); return(ExecuteTrainValidate(splitResult.trainData, columnInformation, splitResult.validationData, preFeaturizer, progressHandler)); } }