Exemple #1
0
        static async Task Main(string[] args)
        {
            var context = new MLContext();
            var dataset = context.Data.LoadFromTextFile <Iris>(@".\iris.csv", separatorChar: ',', hasHeader: true);
            var split   = context.Data.TrainTestSplit(dataset, 0.3);

            var estimatorChain = context.Transforms.Conversion.MapValueToKey("species", "species")
                                 .Append(context.Transforms.Concatenate("features", new string[] { "sepal_length" }))
                                 .Append(context.AutoML().MultiClassification.LbfgsMaximumEntropy("species", "features"));

            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.MulticlassClassification.Evaluate(data, "species").MicroAccuracy);
                }
            };

            var experiment = context.AutoML().CreateExperiment(estimatorChain, experimentOption);
            var reporter   = new Reporter();
            var result     = await experiment.TrainAsync(split.TrainSet, validateFraction : 0.1f, reporter : reporter);

            var bestModel = result.BestModel;

            // evaluate on test
            var eval   = bestModel.Transform(split.TestSet);
            var metric = context.MulticlassClassification.Evaluate(eval, "species");

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {metric.MicroAccuracy}");
        }
 public IEnumerable <SweepableEstimatorBase> GetSuggestedTextColumnTransformers(MLContext context, Column column)
 {
     return(new SweepableEstimatorBase[]
     {
         context.AutoML().Serializable().Transformer.Text.FeaturizeText(column.Name, column.Name),
         context.AutoML().Serializable().Transformer.Text.FeaturizeTextWithWordEmbedding(column.Name, column.Name),
     });
 }
        public void AutoMLCatalog_should_only_create_once()
        {
            var context = new MLContext();

            context.Log += this.Context_Log;
            var autoMLCatalog  = context.AutoML();
            var autoMLCatalog2 = context.AutoML();

            autoMLCatalog.Should().Equals(autoMLCatalog2);
        }
        static void Main(string[] args)
        {
            var context    = new MLContext();
            var paramaters = new MFOption();
            var train_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-train.csv", separatorChar: ',', hasHeader: true);
            var test_data  = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-test.csv", separatorChar: ',', hasHeader: true);

            var gpSweeper = new GaussProcessSweeper(new GaussProcessSweeper.Option()
            {
                InitialPopulation = 50
            });
            var pipeline = context.Transforms.Conversion.MapValueToKey("userId", "userId")
                           .Append(context.Transforms.Conversion.MapValueToKey("movieId", "movieId"))
                           .Append(context.AutoML().CreateSweepableEstimator(
                                       (context, option) =>
            {
                return(context.Recommendation().Trainers.MatrixFactorization(option));
            },
                                       MFOption.Default,
                                       new string[] { "userId", "movieId" },
                                       new string[] { "Score" },
                                       nameof(MatrixFactorizationTrainer)))
                           .Append(context.Transforms.CopyColumns("output", "Score"));

            Console.WriteLine(pipeline.Summary());

            var experimentOption = new Experiment.Option()
            {
                ParameterSweeper          = gpSweeper,
                ParameterSweeperIteration = 100,
                EvaluateFunction          = (MLContext context, IDataView data) =>
                {
                    return(context.Recommendation().Evaluate(data, "rating").RootMeanSquaredError);
                },
                IsMaximizing = false
            };

            var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption);
            var result     = experiment.TrainAsync(train_data, validateFraction: 0.1f, new Reporter()).Result;
            var bestModel  = result.BestModel;

            // evaluate on test
            var eval = bestModel.Transform(test_data);
            var rmse = context.Recommendation().Evaluate(eval, "rating").RootMeanSquaredError;

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {rmse}");
        }
 public IEnumerable <SweepableEstimatorBase> GetSuggestedCatagoricalColumnTransformers(MLContext context, Column column)
 {
     return(new SweepableEstimatorBase[]
     {
         context.AutoML().Serializable().Transformer.Categorical.OneHotEncoding(column.Name, column.Name),
     });
 }
        public void AutoPipeline_should_create_naive_bayes_classifier()
        {
            var context = new MLContext();
            var trainer = context.AutoML().MultiClassification.NaiveBayes("label", "feature");

            Approvals.Verify(trainer.ToCodeGenNodeContract());
        }
        public override void Run()
        {
            var context = new MLContext(1);

            context.Log += this.Context_Log;
            var columns = new List <Column>();

            columns.Add(new Column("Sentiment", ColumnType.Catagorical, ColumnPurpose.Label));
            columns.Add(new Column("SentimentText", ColumnType.String, ColumnPurpose.TextFeature));
            var wiki             = this.GetFileFromTestData("wiki.tsv");
            var data             = context.Data.LoadFromTextFile <Wiki>(wiki, hasHeader: true);
            var trainTestSplit   = context.Data.TrainTestSplit(data);
            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy);
                },
                ParameterSweeperIteration = 5,
            };
            var experiment = context.AutoML().CreateBinaryClassificationExperiment(columns, experimentOption);
            var result     = experiment.TrainAsync(trainTestSplit.TrainSet, 0.1f, Reporter.Instance).Result;
            var eval       = result.BestModel.Transform(trainTestSplit.TestSet);
            var eval_score = experimentOption.EvaluateFunction(context, eval);

            Console.WriteLine($"eval accuracy: {eval_score}");
        }
 public IEnumerable <SweepableEstimatorBase> GetSuggestedNumericColumnTransformers(MLContext context, Column column)
 {
     return(new SweepableEstimatorBase[]
     {
         context.AutoML().Serializable().Transformer.ReplaceMissingValues(column.Name, column.Name),
     });
 }
        public void AutoML_should_create_sweepable_pipeline_from_INode_using_extension()
        {
            var context  = new MLContext();
            var pipeline = context.AutoML().CreateUnsweepableEstimator(context.Transforms.Conversion.MapKeyToValue("species", "species"))
                           .Append(context.AutoML().MultiClassification.LightGbm("species", "features"));

            pipeline.ToString().Should().Be("SweepablePipeline([KeyToValueMappingEstimator]=>[LightGbmMulticlassTrainer])");

            pipeline = context.AutoML().MultiClassification.LightGbm("species", "features")
                       .Append(context.AutoML().CreateUnsweepableEstimator(context.Transforms.Conversion.MapKeyToValue("species", "species")));

            pipeline.ToString().Should().Be("SweepablePipeline([LightGbmMulticlassTrainer]=>[KeyToValueMappingEstimator])");

            pipeline = context.AutoML().MultiClassification.LightGbm("species", "features")
                       .Append(context.Transforms.Conversion.MapKeyToValue("species", "species"));

            pipeline.ToString().Should().Be("SweepablePipeline([LightGbmMulticlassTrainer]=>[KeyToValueMappingEstimator])");
        }
Exemple #10
0
        public void AutoPipeline_should_create_lightGbm_classifier_with_default_option()
        {
            var context         = new MLContext();
            var trainer         = context.AutoML().MultiClassification.LightGbm("label", "feature");
            var parameterValues = LightGbmMulticlassTrainerSweepableOptions.Default.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5));
            var parameterset    = new Parameters(parameterValues);

            Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset));
        }
Exemple #11
0
        public void AutoML_should_create_averaged_perceptron_binary_classifier_with_option()
        {
            var context        = new MLContext();
            var optionBuilder  = AveragedPerceptronBinaryTrainerSweepableOptions.Default;
            var trainer        = context.AutoML().BinaryClassification.AveragedPerceptron("label", "feature", optionBuilder);
            var parameterValue = optionBuilder.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5));
            var parameterset   = new Parameters(parameterValue);

            Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset));
        }
Exemple #12
0
        public void AutoPipeline_should_create_sdca_maximum_entropy_classifier_with_custom_option()
        {
            var context         = new MLContext();
            var option          = new CustomSdcaMaximumEntropyOptionBuilder();
            var trainer         = context.AutoML().MultiClassification.SdcaMaximumEntropy("label", "feature", option);
            var parameterValues = option.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5));
            var parameterset    = new Parameters(parameterValues);

            Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset));
        }
Exemple #13
0
        public void AutoPipeline_should_create_sweepable_pipeline_from_estimator()
        {
            var context  = new MLContext();
            var pipeline = context.Transforms.Conversion.MapValueToKey("species", "species")
                           .Append(context.AutoML().MultiClassification.LightGbm("species", "features"));

            var parameterValues = pipeline.EstimatorGenerators[1].Values[0].SweepableValueGenerators.Select(x => x.Name);

            pipeline.EstimatorGenerators.Count.Should().Be(2);
            parameterValues.Should().Equal(new string[] { "LearningRate", "NumberOfLeaves", "NumberOfIterations", "MinimumExampleCountPerLeaf" });
        }
 public TrainingManager(MLContext context, IEnumerable <Column> columns, Option option)
 {
     this.context = context;
     this.columns = columns;
     this.singlePipelineTrainingServiceMap = new Dictionary <IDictionary <string, string>, ITrainingService>();
     this.bestIterations  = new Dictionary <IDictionary <string, string>, IterationInfo>();
     this.option          = option;
     this.pipelineSweeper = context.AutoML().Serializable().Factory.CreateSweeper(this.option.ParameterSweeper);
     this.pipelineBuilder = new PipelineBuilder(this.option.TaskType, this.option.IsAzureAttach, true);
     this.Pipeline        = this.pipelineBuilder.BuildPipeline(context, columns);
 }
Exemple #15
0
        public static SingleEstimatorSweepablePipeline ToPipeline(this SingleEstimatorSweepablePipelineDataContract pipelineContract, MLContext context)
        {
            var estimators = new List <SweepableEstimatorBase>();

            foreach (var estimator in pipelineContract.Estimators)
            {
                estimators.Append(context.AutoML().Serializable().Factory.CreateSweepableEstimator(estimator));
            }

            return(new SingleEstimatorSweepablePipeline(estimators));
        }
Exemple #16
0
        public static SweepablePipeline ToPipeline(this SweepablePipelineDataContract pipelineContract, MLContext context)
        {
            var sweepablePipeline = new SweepablePipeline();

            foreach (var node in pipelineContract.Estimators)
            {
                sweepablePipeline.Append(node.Select(n => context.AutoML().Serializable().Factory.CreateSweepableEstimator(n)).ToArray());
            }

            return(sweepablePipeline);
        }
Exemple #17
0
        public void AutoML_should_create_gam_regressor_with_default_option()
        {
            var context = new MLContext();

            context.Log += this.Context_Log;
            var optionSweeper  = GamRegressionTrainerSweepableOptions.Default;
            var trainer        = context.AutoML().Regression.Gam();
            var parameterValue = optionSweeper.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5));
            var parameterSet   = new Parameters(parameterValue);

            Approvals.Verify(trainer.ToCodeGenNodeContract(parameterSet));
        }
        public IEnumerable <SweepableEstimatorBase> GetSuggestedLabelColumnTransformers(MLContext context, Column column)
        {
            if (this.PipelineBuilderOption.TaskType == TaskType.MultiClassification)
            {
                return(new SweepableEstimatorBase[]
                {
                    context.AutoML().Serializable().Transformer.Conversion.MapValueToKey(column.Name, column.Name),
                });
            }

            return(new SweepableEstimatorBase[0]);
        }
Exemple #19
0
        public void AutoPipeline_should_create_ova_classifier_from_binary_classifier()
        {
            var context       = new MLContext();
            var optionBuilder = new CustomSdcaMaximumEntropyOptionBuilder();
            var binaryTrainer = context.AutoML().CreateSweepableEstimator(
                (context, option) =>
            {
                option.LabelColumnName   = "Label";
                option.FeatureColumnName = "Features";
                return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option.LabelColumnName, option.FeatureColumnName));
            },
                optionBuilder,
                new string[] { "Features" },
                new string[] { "Score" },
                "CustomSdca");
            var ovaTrainer = context.AutoML().MultiClassification.OneVersusAll(binaryTrainer);

            var parameterValues = optionBuilder.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5));
            var parameterset    = new Parameters(parameterValues);

            Approvals.Verify(ovaTrainer.ToCodeGenNodeContract(parameterset));
        }
        public IEnumerable <SweepableEstimatorBase> GetSuggestedSingleFeatureTrainers(MLContext context, Column column, string featureColumnName)
        {
            switch (this.PipelineBuilderOption.TaskType)
            {
            case TaskType.BinaryClassification:
                var res = new List <SweepableEstimatorBase>();
                res.Add(context.AutoML().Serializable().BinaryClassification.LinearSvm(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.LdSvm(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.FastForest(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.FastTree(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.LightGbm(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.Gam(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.SgdNonCalibrated(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.SgdCalibrated(column.Name, featureColumnName));
                res.Add(context.AutoML().Serializable().BinaryClassification.AveragedPerceptron(column.Name, featureColumnName));

                return(res);

            default:
                throw new NotImplementedException();
            }
        }
        public SweepablePipeline BuildPipeline(MLContext context, IEnumerable <Column> columns)
        {
            var sweepablePipeline = new SweepablePipeline();

            foreach (var column in columns)
            {
                switch (column.ColumnPurpose)
                {
                case ColumnPurpose.NumericFeature:
                    sweepablePipeline.Append(this.GetSuggestedNumericColumnTransformers(context, column).ToArray());
                    break;

                case ColumnPurpose.CategoricalFeature:
                    sweepablePipeline.Append(this.GetSuggestedCatagoricalColumnTransformers(context, column).ToArray());
                    break;

                case ColumnPurpose.TextFeature:
                    sweepablePipeline.Append(this.GetSuggestedTextColumnTransformers(context, column).ToArray());
                    break;

                case ColumnPurpose.Label:
                    sweepablePipeline.Append(this.GetSuggestedLabelColumnTransformers(context, column).ToArray());
                    break;

                default:
                    break;
                }
            }

            var featureColumns = columns.Where(c => c.ColumnPurpose == ColumnPurpose.CategoricalFeature ||
                                               c.ColumnPurpose == ColumnPurpose.NumericFeature ||
                                               c.ColumnPurpose == ColumnPurpose.TextFeature)
                                 .Select(c => c.Name)
                                 .ToArray();

            if (this.PipelineBuilderOption.IsUsingSingleFeatureTrainer)
            {
                sweepablePipeline.Append(context.AutoML().Serializable().Transformer.Concatnate(featureColumns, "_FEATURE"));
                var labelColumn = columns.Where(c => c.ColumnPurpose == ColumnPurpose.Label).First();
                sweepablePipeline.Append(this.GetSuggestedSingleFeatureTrainers(context, labelColumn, "_FEATURE").ToArray());
            }

            return(sweepablePipeline);
        }
Exemple #22
0
        static void Main(string[] args)
        {
            var context = new MLContext();

            context.Log += Context_Log;

            // Load Data
            var trainDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-data-train.tsv", hasHeader: true);
            var testDataset  = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-test.tsv", hasHeader: true);

            var normalizeTextOption      = new NormalizeTextOption();
            var applyWordEmbeddingOption = new ApplyWordEmbeddingOption();

            // Create pipeline
            var pipeline = context.AutoML().CreateSweepableEstimator(
                // Create NormalizeText transformer and sweep over it.
                (context, option) =>
            {
                return(context.Transforms.Text.NormalizeText(
                           option.OutputColumnName,
                           option.InputColumnName,
                           option.CaseMode,
                           option.KeepDiacritics,
                           option.KeepPunctuations,
                           option.KeepNumbers));
            },
                normalizeTextOption,
                new string[] { "SentimentText" },
                new string[] { "txt" },
                nameof(TextNormalizingEstimator))
                           .Append(context.Transforms.Text.TokenizeIntoWords("txt", "txt"))
                           .Append(context.Transforms.Text.RemoveDefaultStopWords("txt", "txt"))
                           .Append(context.AutoML().CreateSweepableEstimator(
                                       // Create ApplyWordEmbedding transformer and sweep over it
                                       (context, option) =>
            {
                return(context.Transforms.Text.ApplyWordEmbedding(
                           option.outputColumnName,
                           option.inputColumnName,
                           option.ModelKind));
            },
                                       applyWordEmbeddingOption,
                                       new string[] { "txt" },
                                       new string[] { "txt" },
                                       nameof(WordEmbeddingEstimator)))
                           .Append(
                // use SdcaLogisticRegression and FastForest as trainer
                context.AutoML().BinaryClassification.SdcaLogisticRegression("Sentiment", "txt"),
                context.AutoML().BinaryClassification.FastForest("Sentiment", "txt"));

            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy);
                },
                MaximumTrainingTime       = 60 * 60,
                ParameterSweeperIteration = 100,
            };

            var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption);
            var result     = experiment.TrainAsync(trainDataset, 0.1f, new Reporter()).Result;

            // evaluate on test
            var eval   = result.BestModel.Transform(testDataset);
            var metric = context.BinaryClassification.EvaluateNonCalibrated(eval, "Sentiment");

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {metric.Accuracy}");
        }