public void CreateSweepableEstimatorPipelineFromSweepableEstimatorTest() { var estimator = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption()); var pipeline = estimator.Append(estimator); pipeline.ToString().Should().Be("FastForestBinary=>FastForestBinary"); }
public void SweepablePipeline_Append_SweepableEstimator_Test() { var pipeline = new SweepablePipeline(); var concatOption = new ConcatOption() { InputColumnNames = new List <string> { "a", "b", "c" }.ToArray(), OutputColumnName = "a", }; var lgbmOption = new LgbmOption() { FeatureColumnName = "Feature", LabelColumnName = "Label", }; // pipeline can append a single sweepable estimator pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption)); // pipeline can append muliple sweepable estimators. pipeline = pipeline.Append(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption), SweepableEstimatorFactory.CreateConcatenate(concatOption)); // pipeline can append sweepable pipelines mixed with sweepble estimators pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption), pipeline); // pipeline can append sweepable pipelines. pipeline = pipeline.Append(pipeline, pipeline); Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions)); }
public void Execute(GeneratorExecutionContext context) { if (context.AdditionalFiles.Where(f => f.Path.Contains("code_gen_flag.json")).First() is AdditionalText text) { var json = text.GetText().ToString(); var flags = JsonSerializer.Deserialize <Dictionary <string, bool> >(json); if (flags.TryGetValue(nameof(SweepableEstimatorFactoryGenerator), out var res) && res == false) { return; } } var trainers = context.AdditionalFiles.Where(f => f.Path.Contains("trainer-estimators.json")) .SelectMany(file => Utils.GetEstimatorsFromJson(file.GetText().ToString()).Estimators, (text, estimator) => (estimator.FunctionName, estimator.EstimatorTypes, estimator.SearchOption)) .SelectMany(union => union.EstimatorTypes.Select(t => (Utils.CreateEstimatorName(union.FunctionName, t), Utils.ToTitleCase(union.SearchOption)))) .ToArray(); var transformers = context.AdditionalFiles.Where(f => f.Path.Contains("transformer-estimators.json")) .SelectMany(file => Utils.GetEstimatorsFromJson(file.GetText().ToString()).Estimators, (text, estimator) => (estimator.FunctionName, estimator.EstimatorTypes, estimator.SearchOption)) .SelectMany(union => union.EstimatorTypes.Select(t => (Utils.CreateEstimatorName(union.FunctionName, t), Utils.ToTitleCase(union.SearchOption)))) .ToArray(); var code = new SweepableEstimatorFactory() { NameSpace = Constant.CodeGeneratorNameSpace, EstimatorNames = trainers.Concat(transformers), }; context.AddSource(className + ".cs", SourceText.From(code.TransformText(), Encoding.UTF8)); }
public void CreateSweepableEstimatorPipelineFromSweepableEstimatorAndIEstimatorTest() { var context = new MLContext(); var estimator = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption()); var pipeline = estimator.Append(context.Transforms.Concatenate("output", "input")); pipeline.ToString().Should().Be("FastForestBinary=>Unknown"); }
public void AppendIEstimatorToSweepabeEstimatorPipelineTest() { var context = new MLContext(); var estimator = context.Transforms.Concatenate("output", "input"); var pipeline = estimator.Append(SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption())); pipeline = pipeline.Append(context.Transforms.CopyColumns("output", "input")); pipeline.ToString().Should().Be("Unknown=>FastForestBinary=>Unknown"); }
public void CreateMultiModelPipelineFromSweepableEstimatorAndMultiClassifiers() { var context = new MLContext(); var pipeline = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption()) .Append(context.Auto().MultiClassification()); var json = JsonSerializer.Serialize(pipeline, this._jsonSerializerOptions); Approvals.Verify(json); }
/// <summary> /// Create a list of <see cref="SweepableEstimator"/> for featurizing text. /// </summary> /// <param name="outputColumnName">output column name.</param> /// <param name="inputColumnName">input column name.</param> internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName) { var option = new FeaturizeTextOption { InputColumnName = inputColumnName, OutputColumnName = outputColumnName, }; return(new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) }); }
/// <summary> /// Create a list of <see cref="SweepableEstimator"/> for multiclass classification. /// </summary> /// <param name="labelColumnName">label column name.</param> /// <param name="featureColumnName">feature column name.</param> /// <param name="exampleWeightColumnName">example weight column name.</param> /// <param name="useFastForest">true if use fast forest as available trainer.</param> /// <param name="useLgbm">true if use lgbm as available trainer.</param> /// <param name="useFastTree">true if use fast tree as available trainer.</param> /// <param name="useLbfgs">true if use lbfgs as available trainer.</param> /// <param name="useSdca">true if use sdca as available trainer.</param> /// <param name="fastTreeOption">if provided, use it as initial option for fast tree, otherwise the default option will be used.</param> /// <param name="lgbmOption">if provided, use it as initial option for lgbm, otherwise the default option will be used.</param> /// <param name="fastForestOption">if provided, use it as initial option for fast forest, otherwise the default option will be used.</param> /// <param name="lbfgsOption">if provided, use it as initial option for lbfgs, otherwise the default option will be used.</param> /// <param name="sdcaOption">if provided, use it as initial option for sdca, otherwise the default option will be used.</param> /// <param name="fastTreeSearchSpace">if provided, use it as search space for fast tree, otherwise the default search space will be used.</param> /// <param name="lgbmSearchSpace">if provided, use it as search space for lgbm, otherwise the default search space will be used.</param> /// <param name="fastForestSearchSpace">if provided, use it as search space for fast forest, otherwise the default search space will be used.</param> /// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param> /// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param> /// <returns></returns> public SweepableEstimator[] MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true, FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null, SearchSpace <FastTreeOption> fastTreeSearchSpace = null, SearchSpace <LgbmOption> lgbmSearchSpace = null, SearchSpace <FastForestOption> fastForestSearchSpace = null, SearchSpace <LbfgsOption> lbfgsSearchSpace = null, SearchSpace <SdcaOption> sdcaSearchSpace = null) { var res = new List <SweepableEstimator>(); if (useFastTree) { fastTreeOption = fastTreeOption ?? new FastTreeOption(); fastTreeOption.LabelColumnName = labelColumnName; fastTreeOption.FeatureColumnName = featureColumnName; fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateFastTreeOva(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>(fastTreeOption))); } if (useFastForest) { fastForestOption = fastForestOption ?? new FastForestOption(); fastForestOption.LabelColumnName = labelColumnName; fastForestOption.FeatureColumnName = featureColumnName; fastForestOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateFastForestOva(fastForestOption, fastForestSearchSpace ?? new SearchSpace <FastForestOption>(fastForestOption))); } if (useLgbm) { lgbmOption = lgbmOption ?? new LgbmOption(); lgbmOption.LabelColumnName = labelColumnName; lgbmOption.FeatureColumnName = featureColumnName; lgbmOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateLightGbmMulti(lgbmOption, lgbmSearchSpace ?? new SearchSpace <LgbmOption>(lgbmOption))); } if (useLbfgs) { lbfgsOption = lbfgsOption ?? new LbfgsOption(); lbfgsOption.LabelColumnName = labelColumnName; lbfgsOption.FeatureColumnName = featureColumnName; lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>(lbfgsOption))); res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>(lbfgsOption))); } if (useSdca) { sdcaOption = sdcaOption ?? new SdcaOption(); sdcaOption.LabelColumnName = labelColumnName; sdcaOption.FeatureColumnName = featureColumnName; sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>(sdcaOption))); res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>(sdcaOption))); } return(res.ToArray()); }
public void CreateMultiModelPipelineFromSweepableEstimatorPipelineAndMultiClassifiers() { var context = new MLContext(); var pipeline = context.Transforms.Concatenate("output", "input") .Append(SweepableEstimatorFactory.CreateFeaturizeText(new FeaturizeTextOption())) .Append(context.Auto().MultiClassification()); var json = JsonSerializer.Serialize(pipeline, this._jsonSerializerOptions); Approvals.Verify(json); }
internal SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true, FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null, SearchSpace <FastTreeOption> fastTreeSearchSpace = null, SearchSpace <LgbmOption> lgbmSearchSpace = null, SearchSpace <FastForestOption> fastForestSearchSpace = null, SearchSpace <LbfgsOption> lbfgsSearchSpace = null, SearchSpace <SdcaOption> sdcaSearchSpace = null) { var res = new List <SweepableEstimator>(); if (useFastTree) { fastTreeOption = fastTreeOption ?? new FastTreeOption(); fastTreeOption.LabelColumnName = labelColumnName; fastTreeOption.FeatureColumnName = featureColumnName; fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateFastTreeRegression(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>())); res.Add(SweepableEstimatorFactory.CreateFastTreeTweedieRegression(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>())); } if (useFastForest) { fastForestOption = fastForestOption ?? new FastForestOption(); fastForestOption.LabelColumnName = labelColumnName; fastForestOption.FeatureColumnName = featureColumnName; fastForestOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateFastForestRegression(fastForestOption, fastForestSearchSpace ?? new SearchSpace <FastForestOption>())); } if (useLgbm) { lgbmOption = lgbmOption ?? new LgbmOption(); lgbmOption.LabelColumnName = labelColumnName; lgbmOption.FeatureColumnName = featureColumnName; lgbmOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateLightGbmRegression(lgbmOption, lgbmSearchSpace ?? new SearchSpace <LgbmOption>())); } if (useLbfgs) { lbfgsOption = lbfgsOption ?? new LbfgsOption(); lbfgsOption.LabelColumnName = labelColumnName; lbfgsOption.FeatureColumnName = featureColumnName; lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>())); } if (useSdca) { sdcaOption = sdcaOption ?? new SdcaOption(); sdcaOption.LabelColumnName = labelColumnName; sdcaOption.FeatureColumnName = featureColumnName; sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; res.Add(SweepableEstimatorFactory.CreateSdcaRegression(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>())); } return(res.ToArray()); }
/// <summary> /// Create a list of <see cref="SweepableEstimator"/> for featurizing catalog columns. /// </summary> /// <param name="outputColumnNames">output column names.</param> /// <param name="inputColumnNames">input column names.</param> internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames) { Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0"); var option = new OneHotOption { InputColumnNames = inputColumnNames, OutputColumnNames = outputColumnNames, }; return(new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) }); }
private SweepableEstimatorPipeline CreateSweepbaleEstimatorPipeline() { var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption()); var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption()); var oneHot = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption()); var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption()); var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption()); var pipeline = new SweepableEstimatorPipeline(new SweepableEstimator[] { concat, replaceMissingValue, oneHot, lightGbm, fastTree }); return(pipeline); }
/// <summary> /// Create a list of <see cref="SweepableEstimator"/> for featurizing numeric columns. /// </summary> /// <param name="outputColumnNames">output column names.</param> /// <param name="inputColumnNames">input column names.</param> internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames) { Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames)); Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames)); Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0"); var replaceMissingValueOption = new ReplaceMissingValueOption { InputColumnNames = inputColumnNames, OutputColumnNames = outputColumnNames, }; return(new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) }); }
private MultiModelPipeline CreateMultiModelPipeline() { var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption()); var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption()); var oneHot = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption()); var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption()); var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption()); var pipeline = new MultiModelPipeline(); pipeline = pipeline.AppendOrSkip(replaceMissingValue + replaceMissingValue * oneHot); pipeline = pipeline.AppendOrSkip(concat); pipeline = pipeline.Append(lightGbm + fastTree); return(pipeline); }
/// <summary> /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>, /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine /// them into a single feature column as output. /// </summary> /// <param name="data">input data.</param> /// <param name="columnInformation">column information.</param> /// <param name="outputColumnName">output feature column.</param> /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns> public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features") { Contracts.CheckValue(data, nameof(data)); Contracts.CheckValue(columnInformation, nameof(columnInformation)); var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation); var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature); var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature); var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature); var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray(); var pipeline = new MultiModelPipeline(); if (numericFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames)); } if (catalogFeatureColumnNames.Length > 0) { pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames)); } foreach (var textColumn in textFeatureColumnNames) { pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn)); } var option = new ConcatOption { InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(), OutputColumnName = outputColumnName, }; if (option.InputColumnNames.Length > 0) { pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option)); } return(pipeline); }