public static MultiModelPipeline Append(this IEstimator <ITransformer> estimator, params SweepableEstimator[] estimators) { var sweepableEstimator = new SweepableEstimator((context, parameter) => estimator, new SearchSpace.SearchSpace()); var multiModelPipeline = new MultiModelPipeline().Append(sweepableEstimator).Append(estimators); return(multiModelPipeline); }
public PipelineProposer(AutoMLExperimentSettings settings) { // this cost is used to initialize eci when started, the smaller the number, the less cost this trainer will use at start, and more likely it will be // picked. _estimatorCost = new Dictionary <EstimatorType, double>() { { EstimatorType.LightGbmRegression, 0.788 }, { EstimatorType.FastTreeRegression, 0.382 }, { EstimatorType.FastForestRegression, 0.374 }, { EstimatorType.SdcaRegression, 0.566 }, { EstimatorType.FastTreeTweedieRegression, 0.401 }, { EstimatorType.LbfgsPoissonRegressionRegression, 4.73 }, { EstimatorType.FastForestOva, 4.283 }, { EstimatorType.FastTreeOva, 3.701 }, { EstimatorType.LightGbmMulti, 4.765 }, { EstimatorType.SdcaMaximumEntropyMulti, 10.129 }, { EstimatorType.SdcaLogisticRegressionOva, 13.16 }, { EstimatorType.LbfgsMaximumEntropyMulti, 7.980 }, { EstimatorType.LbfgsLogisticRegressionOva, 11.513 }, { EstimatorType.LightGbmBinary, 4.765 }, { EstimatorType.FastTreeBinary, 3.701 }, { EstimatorType.FastForestBinary, 4.283 }, { EstimatorType.SdcaLogisticRegressionBinary, 13.16 }, { EstimatorType.LbfgsLogisticRegressionBinary, 11.513 }, { EstimatorType.ForecastBySsa, 1 }, { EstimatorType.ImageClassificationMulti, 1 }, { EstimatorType.MatrixFactorization, 1 }, }; _rand = new Random(settings.Seed ?? 0); _multiModelPipeline = null; }
public static MultiModelPipeline Append(this SweepableEstimator estimator, params SweepableEstimator[] estimators) { var multiModelPipeline = new MultiModelPipeline(); multiModelPipeline = multiModelPipeline.Append(estimator); return(multiModelPipeline.Append(estimators)); }
public override ExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator <ITransformer> preFeaturizer = null, IProgress <RunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); // Cross val threshold for # of dataset rows -- // If dataset has < threshold # of rows, use cross val. // Else, run experiment using train-validate split. const int crossValRowCountThreshold = 15000; var rowCount = DatasetDimensionsUtil.CountRows(trainData, crossValRowCountThreshold); // TODO // split cross validation result according to sample key as well. if (rowCount < crossValRowCountThreshold) { const int numCrossValFolds = 10; _experiment.SetDataset(trainData, numCrossValFolds); } else { var splitData = Context.Data.TrainTestSplit(trainData); _experiment.SetDataset(splitData.TrainSet, splitData.TestSet); } MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)) .Append(Context.Auto().BinaryClassification(label, Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var detail = ToRunDetail(e); progressHandler?.Report(detail); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToRunDetail(e)); var bestRun = ToRunDetail(monitor.BestRun); var result = new ExperimentResult <BinaryClassificationMetrics>(runDetails, bestRun); return(result); }
public static MultiModelPipeline Append(this SweepableEstimatorPipeline pipeline, params SweepableEstimator[] estimators) { var multiModelPipeline = new MultiModelPipeline(); foreach (var estimator in pipeline.Estimators) { multiModelPipeline = multiModelPipeline.Append(estimator); } return(multiModelPipeline.Append(estimators)); }
public AutoMLExperiment SetPipeline(SweepableEstimatorPipeline pipeline) { var res = new MultiModelPipeline(); foreach (var e in pipeline.Estimators) { res = res.Append(e); } SetPipeline(res); return(this); }
/// <summary> /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>, /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine /// them into a single feature column as output. /// </summary> /// <param name="data">input data.</param> /// <param name="columnInformation">column information.</param> /// <param name="outputColumnName">output feature column.</param> /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns> public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features") { Contracts.CheckValue(data, nameof(data)); Contracts.CheckValue(columnInformation, nameof(columnInformation)); var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation); var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature); var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature); var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature); var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var pipeline = new MultiModelPipeline(); if (numericFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames)); } if (catalogFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames)); } foreach (var textColumn in textFeatureColumnNames) { pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn)); } var option = new ConcatOption { InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(), OutputColumnName = outputColumnName, }; if (option.InputColumnNames.Length > 0) { pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option)); } return(pipeline); }
public override CrossValidationExperimentResult <BinaryClassificationMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator <ITransformer> preFeaturizer = null, IProgress <CrossValidationRunDetail <BinaryClassificationMetrics> > progressHandler = null) { var label = columnInformation.LabelColumnName; _experiment.SetEvaluateMetric(Settings.OptimizingMetric, label); _experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds); _experiment.SetDataset(trainData, (int)numberOfCVFolds); MultiModelPipeline pipeline = new MultiModelPipeline(); if (preFeaturizer != null) { pipeline = pipeline.Append(preFeaturizer); } pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, "__Features__")) .Append(Context.Auto().BinaryClassification(label, featureColumnName: Features)); _experiment.SetPipeline(pipeline); var monitor = new BinaryClassificationTrialResultMonitor(); monitor.OnTrialCompleted += (o, e) => { var runDetails = ToCrossValidationRunDetail(e); progressHandler?.Report(runDetails); }; _experiment.SetMonitor(monitor); _experiment.Run(); var runDetails = monitor.RunDetails.Select(e => ToCrossValidationRunDetail(e)); var bestResult = ToCrossValidationRunDetail(monitor.BestRun); var result = new CrossValidationExperimentResult <BinaryClassificationMetrics>(runDetails, bestResult); return(result); }
public TrialSettings Propose(TrialSettings settings) { _multiModelPipeline = settings.ExperimentSettings.Pipeline; _learnerInitialCost = _multiModelPipeline.PipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline)); var pipelineIds = _multiModelPipeline.PipelineIds; if (_eci == null) { // initialize eci with the estimated cost and always start from pipeline which has lowest cost. _eci = pipelineIds.ToDictionary(kv => kv, kv => GetEstimatedCostForPipeline(kv, _multiModelPipeline)); settings.Schema = _eci.OrderBy(kv => kv.Value).First().Key; } else { var probabilities = pipelineIds.Select(id => _eci[id]).ToArray(); probabilities = ArrayMath.Inverse(probabilities); probabilities = ArrayMath.Normalize(probabilities); // sample var randdouble = _rand.NextDouble(); var sum = 0.0; // selected pipeline id index int i; for (i = 0; i != pipelineIds.Length; ++i) { sum += ((double[])probabilities)[i]; if (sum > randdouble) { break; } } settings.Schema = pipelineIds[i]; } settings.Pipeline = _multiModelPipeline.BuildSweepableEstimatorPipeline(settings.Schema); return(settings); }
private double GetEstimatedCostForPipeline(string kv, MultiModelPipeline multiModelPipeline) { var entity = Entity.FromExpression(kv); var estimatorTypes = entity.ValueEntities().Where(v => v is StringEntity s && s.Value != "Nil") .Select(v => { var s = v as StringEntity; var estimator = multiModelPipeline.Estimators[s.Value]; return(estimator.EstimatorType); }); var res = 1; foreach (var estimatorType in estimatorTypes) { if (_estimatorCost.ContainsKey(estimatorType)) { return(_estimatorCost[estimatorType]); } } return(res); }
public AutoMLExperiment SetPipeline(MultiModelPipeline pipeline) { _settings.Pipeline = pipeline; return(this); }