public static MultiModelPipeline Append(this SweepableEstimator estimator, params SweepableEstimator[] estimators) { var multiModelPipeline = new MultiModelPipeline(); multiModelPipeline = multiModelPipeline.Append(estimator); return(multiModelPipeline.Append(estimators)); }
public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); // Cross val threshold for # of dataset rows -- // If dataset has < threshold # of rows, use cross val. // Else, run experiment using train-validate split. const int crossValRowCountThreshold = 15000; var rowCount = DatasetDimensionsUtil.CountRows(trainData, crossValRowCountThreshold); // TODO // split cross validation result according to sample key as well. if (rowCount < crossValRowCountThreshold) { const int numCrossValFolds = 10; _experiment.SetDataset(trainData, numCrossValFolds); } else { var splitData = Context.Data.TrainTestSplit(trainData); _experiment.SetDataset(splitData.TrainSet, splitData.TestSet); } MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)) .Append(Context.Auto().BinaryClassification(label, Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var detail = ToRunDetail(e); progressHandler?.Report(detail); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToRunDetail(e)); var bestRun = ToRunDetail(monitor.BestRun); var result = new ExperimentResult <BinaryClassificationMetrics>(runDetails, bestRun); return(result); }
public static MultiModelPipeline Append(this SweepableEstimatorPipeline pipeline, params SweepableEstimator[] estimators) { var multiModelPipeline = new MultiModelPipeline(); foreach (var estimator in pipeline.Estimators) { multiModelPipeline = multiModelPipeline.Append(estimator); } return(multiModelPipeline.Append(estimators)); }
/// <summary> /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>, /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine /// them into a single feature column as output. /// </summary> /// <param name="data">input data.</param> /// <param name="columnInformation">column information.</param> /// <param name="outputColumnName">output feature column.</param> /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns> public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features") { Contracts.CheckValue(data, nameof(data)); Contracts.CheckValue(columnInformation, nameof(columnInformation)); var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation); var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature); var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature); var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature); var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var pipeline = new MultiModelPipeline(); if (numericFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames)); } if (catalogFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames)); } foreach (var textColumn in textFeatureColumnNames) { pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn)); } var option = new ConcatOption { InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(), OutputColumnName = outputColumnName, }; if (option.InputColumnNames.Length > 0) { pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option)); } return(pipeline); }
public AutoMLExperiment SetPipeline(SweepableEstimatorPipeline pipeline) { var res = new MultiModelPipeline(); foreach (var e in pipeline.Estimators) { res = res.Append(e); } SetPipeline(res); return(this); }
public override CrossValidationExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); _experiment.SetDataset(trainData, (int)numberOfCVFolds); MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, "__Features__")) .Append(Context.Auto().BinaryClassification(label, featureColumnName: Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var runDetails = ToCrossValidationRunDetail(e); progressHandler?.Report(runDetails); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToCrossValidationRunDetail(e)); var bestResult = ToCrossValidationRunDetail(monitor.BestRun); var result = new CrossValidationExperimentResult <BinaryClassificationMetrics>(runDetails, bestResult); return(result); }