Beispiel #1
0
        public void CreateSweepableEstimatorPipelineFromSweepableEstimatorTest()
        {
            var estimator = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption());
            var pipeline  = estimator.Append(estimator);

            pipeline.ToString().Should().Be("FastForestBinary=>FastForestBinary");
        }
Beispiel #2
0
        public void SweepablePipeline_Append_SweepableEstimator_Test()
        {
            var pipeline     = new SweepablePipeline();
            var concatOption = new ConcatOption()
            {
                InputColumnNames = new List <string> {
                    "a", "b", "c"
                }.ToArray(),
                OutputColumnName = "a",
            };
            var lgbmOption = new LgbmOption()
            {
                FeatureColumnName = "Feature",
                LabelColumnName   = "Label",
            };

            // pipeline can append a single sweepable estimator
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption));

            // pipeline can append muliple sweepable estimators.
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption), SweepableEstimatorFactory.CreateConcatenate(concatOption));

            // pipeline can append sweepable pipelines mixed with sweepble estimators
            pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(concatOption), pipeline);

            // pipeline can append sweepable pipelines.
            pipeline = pipeline.Append(pipeline, pipeline);

            Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions));
        }
        public void Execute(GeneratorExecutionContext context)
        {
            if (context.AdditionalFiles.Where(f => f.Path.Contains("code_gen_flag.json")).First() is AdditionalText text)
            {
                var json  = text.GetText().ToString();
                var flags = JsonSerializer.Deserialize <Dictionary <string, bool> >(json);
                if (flags.TryGetValue(nameof(SweepableEstimatorFactoryGenerator), out var res) && res == false)
                {
                    return;
                }
            }

            var trainers = context.AdditionalFiles.Where(f => f.Path.Contains("trainer-estimators.json"))
                           .SelectMany(file => Utils.GetEstimatorsFromJson(file.GetText().ToString()).Estimators, (text, estimator) => (estimator.FunctionName, estimator.EstimatorTypes, estimator.SearchOption))
                           .SelectMany(union => union.EstimatorTypes.Select(t => (Utils.CreateEstimatorName(union.FunctionName, t), Utils.ToTitleCase(union.SearchOption))))
                           .ToArray();

            var transformers = context.AdditionalFiles.Where(f => f.Path.Contains("transformer-estimators.json"))
                               .SelectMany(file => Utils.GetEstimatorsFromJson(file.GetText().ToString()).Estimators, (text, estimator) => (estimator.FunctionName, estimator.EstimatorTypes, estimator.SearchOption))
                               .SelectMany(union => union.EstimatorTypes.Select(t => (Utils.CreateEstimatorName(union.FunctionName, t), Utils.ToTitleCase(union.SearchOption))))
                               .ToArray();

            var code = new SweepableEstimatorFactory()
            {
                NameSpace      = Constant.CodeGeneratorNameSpace,
                EstimatorNames = trainers.Concat(transformers),
            };

            context.AddSource(className + ".cs", SourceText.From(code.TransformText(), Encoding.UTF8));
        }
Beispiel #4
0
        public void CreateSweepableEstimatorPipelineFromSweepableEstimatorAndIEstimatorTest()
        {
            var context   = new MLContext();
            var estimator = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption());
            var pipeline  = estimator.Append(context.Transforms.Concatenate("output", "input"));

            pipeline.ToString().Should().Be("FastForestBinary=>Unknown");
        }
Beispiel #5
0
        public void AppendIEstimatorToSweepabeEstimatorPipelineTest()
        {
            var context   = new MLContext();
            var estimator = context.Transforms.Concatenate("output", "input");
            var pipeline  = estimator.Append(SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption()));

            pipeline = pipeline.Append(context.Transforms.CopyColumns("output", "input"));

            pipeline.ToString().Should().Be("Unknown=>FastForestBinary=>Unknown");
        }
Beispiel #6
0
        public void CreateMultiModelPipelineFromSweepableEstimatorAndMultiClassifiers()
        {
            var context  = new MLContext();
            var pipeline = SweepableEstimatorFactory.CreateFastForestBinary(new FastForestOption())
                           .Append(context.Auto().MultiClassification());

            var json = JsonSerializer.Serialize(pipeline, this._jsonSerializerOptions);

            Approvals.Verify(json);
        }
Beispiel #7
0
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
        /// </summary>
        /// <param name="outputColumnName">output column name.</param>
        /// <param name="inputColumnName">input column name.</param>
        internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName)
        {
            var option = new FeaturizeTextOption
            {
                InputColumnName  = inputColumnName,
                OutputColumnName = outputColumnName,
            };

            return(new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) });
        }
Beispiel #8
0
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for multiclass classification.
        /// </summary>
        /// <param name="labelColumnName">label column name.</param>
        /// <param name="featureColumnName">feature column name.</param>
        /// <param name="exampleWeightColumnName">example weight column name.</param>
        /// <param name="useFastForest">true if use fast forest as available trainer.</param>
        /// <param name="useLgbm">true if use lgbm as available trainer.</param>
        /// <param name="useFastTree">true if use fast tree as available trainer.</param>
        /// <param name="useLbfgs">true if use lbfgs as available trainer.</param>
        /// <param name="useSdca">true if use sdca as available trainer.</param>
        /// <param name="fastTreeOption">if provided, use it as initial option for fast tree, otherwise the default option will be used.</param>
        /// <param name="lgbmOption">if provided, use it as initial option for lgbm, otherwise the default option will be used.</param>
        /// <param name="fastForestOption">if provided, use it as initial option for fast forest, otherwise the default option will be used.</param>
        /// <param name="lbfgsOption">if provided, use it as initial option for lbfgs, otherwise the default option will be used.</param>
        /// <param name="sdcaOption">if provided, use it as initial option for sdca, otherwise the default option will be used.</param>
        /// <param name="fastTreeSearchSpace">if provided, use it as search space for fast tree, otherwise the default search space will be used.</param>
        /// <param name="lgbmSearchSpace">if provided, use it as search space for lgbm, otherwise the default search space will be used.</param>
        /// <param name="fastForestSearchSpace">if provided, use it as search space for fast forest, otherwise the default search space will be used.</param>
        /// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
        /// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
        /// <returns></returns>
        public SweepableEstimator[] MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
                                                        FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
                                                        SearchSpace <FastTreeOption> fastTreeSearchSpace = null, SearchSpace <LgbmOption> lgbmSearchSpace = null, SearchSpace <FastForestOption> fastForestSearchSpace = null, SearchSpace <LbfgsOption> lbfgsSearchSpace = null, SearchSpace <SdcaOption> sdcaSearchSpace = null)
        {
            var res = new List <SweepableEstimator>();

            if (useFastTree)
            {
                fastTreeOption = fastTreeOption ?? new FastTreeOption();
                fastTreeOption.LabelColumnName         = labelColumnName;
                fastTreeOption.FeatureColumnName       = featureColumnName;
                fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastTreeOva(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>(fastTreeOption)));
            }

            if (useFastForest)
            {
                fastForestOption = fastForestOption ?? new FastForestOption();
                fastForestOption.LabelColumnName         = labelColumnName;
                fastForestOption.FeatureColumnName       = featureColumnName;
                fastForestOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastForestOva(fastForestOption, fastForestSearchSpace ?? new SearchSpace <FastForestOption>(fastForestOption)));
            }

            if (useLgbm)
            {
                lgbmOption = lgbmOption ?? new LgbmOption();
                lgbmOption.LabelColumnName         = labelColumnName;
                lgbmOption.FeatureColumnName       = featureColumnName;
                lgbmOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLightGbmMulti(lgbmOption, lgbmSearchSpace ?? new SearchSpace <LgbmOption>(lgbmOption)));
            }

            if (useLbfgs)
            {
                lbfgsOption = lbfgsOption ?? new LbfgsOption();
                lbfgsOption.LabelColumnName         = labelColumnName;
                lbfgsOption.FeatureColumnName       = featureColumnName;
                lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>(lbfgsOption)));
                res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>(lbfgsOption)));
            }

            if (useSdca)
            {
                sdcaOption = sdcaOption ?? new SdcaOption();
                sdcaOption.LabelColumnName         = labelColumnName;
                sdcaOption.FeatureColumnName       = featureColumnName;
                sdcaOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>(sdcaOption)));
                res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>(sdcaOption)));
            }

            return(res.ToArray());
        }
Beispiel #9
0
        public void CreateMultiModelPipelineFromSweepableEstimatorPipelineAndMultiClassifiers()
        {
            var context  = new MLContext();
            var pipeline = context.Transforms.Concatenate("output", "input")
                           .Append(SweepableEstimatorFactory.CreateFeaturizeText(new FeaturizeTextOption()))
                           .Append(context.Auto().MultiClassification());

            var json = JsonSerializer.Serialize(pipeline, this._jsonSerializerOptions);

            Approvals.Verify(json);
        }
Beispiel #10
0
        internal SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
                                                 FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
                                                 SearchSpace <FastTreeOption> fastTreeSearchSpace = null, SearchSpace <LgbmOption> lgbmSearchSpace = null, SearchSpace <FastForestOption> fastForestSearchSpace = null, SearchSpace <LbfgsOption> lbfgsSearchSpace = null, SearchSpace <SdcaOption> sdcaSearchSpace = null)
        {
            var res = new List <SweepableEstimator>();

            if (useFastTree)
            {
                fastTreeOption = fastTreeOption ?? new FastTreeOption();
                fastTreeOption.LabelColumnName         = labelColumnName;
                fastTreeOption.FeatureColumnName       = featureColumnName;
                fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastTreeRegression(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>()));
                res.Add(SweepableEstimatorFactory.CreateFastTreeTweedieRegression(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace <FastTreeOption>()));
            }

            if (useFastForest)
            {
                fastForestOption = fastForestOption ?? new FastForestOption();
                fastForestOption.LabelColumnName         = labelColumnName;
                fastForestOption.FeatureColumnName       = featureColumnName;
                fastForestOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastForestRegression(fastForestOption, fastForestSearchSpace ?? new SearchSpace <FastForestOption>()));
            }

            if (useLgbm)
            {
                lgbmOption = lgbmOption ?? new LgbmOption();
                lgbmOption.LabelColumnName         = labelColumnName;
                lgbmOption.FeatureColumnName       = featureColumnName;
                lgbmOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLightGbmRegression(lgbmOption, lgbmSearchSpace ?? new SearchSpace <LgbmOption>()));
            }

            if (useLbfgs)
            {
                lbfgsOption = lbfgsOption ?? new LbfgsOption();
                lbfgsOption.LabelColumnName         = labelColumnName;
                lbfgsOption.FeatureColumnName       = featureColumnName;
                lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace <LbfgsOption>()));
            }

            if (useSdca)
            {
                sdcaOption = sdcaOption ?? new SdcaOption();
                sdcaOption.LabelColumnName         = labelColumnName;
                sdcaOption.FeatureColumnName       = featureColumnName;
                sdcaOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaRegression(sdcaOption, sdcaSearchSpace ?? new SearchSpace <SdcaOption>()));
            }

            return(res.ToArray());
        }
Beispiel #11
0
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for featurizing catalog columns.
        /// </summary>
        /// <param name="outputColumnNames">output column names.</param>
        /// <param name="inputColumnNames">input column names.</param>
        internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
        {
            Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");

            var option = new OneHotOption
            {
                InputColumnNames  = inputColumnNames,
                OutputColumnNames = outputColumnNames,
            };

            return(new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) });
        }
Beispiel #12
0
        private SweepableEstimatorPipeline CreateSweepbaleEstimatorPipeline()
        {
            var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption());
            var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption());
            var oneHot   = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption());
            var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption());
            var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption());

            var pipeline = new SweepableEstimatorPipeline(new SweepableEstimator[] { concat, replaceMissingValue, oneHot, lightGbm, fastTree });

            return(pipeline);
        }
Beispiel #13
0
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for featurizing numeric columns.
        /// </summary>
        /// <param name="outputColumnNames">output column names.</param>
        /// <param name="inputColumnNames">input column names.</param>
        internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
        {
            Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
            Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames));
            Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");
            var replaceMissingValueOption = new ReplaceMissingValueOption
            {
                InputColumnNames  = inputColumnNames,
                OutputColumnNames = outputColumnNames,
            };

            return(new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) });
        }
Beispiel #14
0
        private MultiModelPipeline CreateMultiModelPipeline()
        {
            var concat = SweepableEstimatorFactory.CreateConcatenate(new ConcatOption());
            var replaceMissingValue = SweepableEstimatorFactory.CreateReplaceMissingValues(new ReplaceMissingValueOption());
            var oneHot   = SweepableEstimatorFactory.CreateOneHotEncoding(new OneHotOption());
            var lightGbm = SweepableEstimatorFactory.CreateLightGbmBinary(new LgbmOption());
            var fastTree = SweepableEstimatorFactory.CreateFastTreeBinary(new FastTreeOption());

            var pipeline = new MultiModelPipeline();

            pipeline = pipeline.AppendOrSkip(replaceMissingValue + replaceMissingValue * oneHot);
            pipeline = pipeline.AppendOrSkip(concat);
            pipeline = pipeline.Append(lightGbm + fastTree);

            return(pipeline);
        }
Beispiel #15
0
        /// <summary>
        /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>,
        /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
        /// them into a single feature column as output.
        /// </summary>
        /// <param name="data">input data.</param>
        /// <param name="columnInformation">column information.</param>
        /// <param name="outputColumnName">output feature column.</param>
        /// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns>
        public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
        {
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckValue(columnInformation, nameof(columnInformation));

            var columnPurposes            = PurposeInference.InferPurposes(this._context, data, columnInformation);
            var textFeatures              = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature);
            var numericFeatures           = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature);
            var catalogFeatures           = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature);
            var textFeatureColumnNames    = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();

            var pipeline = new MultiModelPipeline();

            if (numericFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames));
            }

            if (catalogFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames));
            }

            foreach (var textColumn in textFeatureColumnNames)
            {
                pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn));
            }

            var option = new ConcatOption
            {
                InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).ToArray(),
                OutputColumnName = outputColumnName,
            };

            if (option.InputColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option));
            }

            return(pipeline);
        }