static async Task Main(string[] args) { var context = new MLContext(); var dataset = context.Data.LoadFromTextFile <Iris>(@".\iris.csv", separatorChar: ',', hasHeader: true); var split = context.Data.TrainTestSplit(dataset, 0.3); var estimatorChain = context.Transforms.Conversion.MapValueToKey("species", "species") .Append(context.Transforms.Concatenate("features", new string[] { "sepal_length" })) .Append(context.AutoML().MultiClassification.LbfgsMaximumEntropy("species", "features")); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.MulticlassClassification.Evaluate(data, "species").MicroAccuracy); } }; var experiment = context.AutoML().CreateExperiment(estimatorChain, experimentOption); var reporter = new Reporter(); var result = await experiment.TrainAsync(split.TrainSet, validateFraction : 0.1f, reporter : reporter); var bestModel = result.BestModel; // evaluate on test var eval = bestModel.Transform(split.TestSet); var metric = context.MulticlassClassification.Evaluate(eval, "species"); Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {metric.MicroAccuracy}"); }
public IEnumerable <SweepableEstimatorBase> GetSuggestedTextColumnTransformers(MLContext context, Column column) { return(new SweepableEstimatorBase[] { context.AutoML().Serializable().Transformer.Text.FeaturizeText(column.Name, column.Name), context.AutoML().Serializable().Transformer.Text.FeaturizeTextWithWordEmbedding(column.Name, column.Name), }); }
public void AutoMLCatalog_should_only_create_once() { var context = new MLContext(); context.Log += this.Context_Log; var autoMLCatalog = context.AutoML(); var autoMLCatalog2 = context.AutoML(); autoMLCatalog.Should().Equals(autoMLCatalog2); }
static void Main(string[] args) { var context = new MLContext(); var paramaters = new MFOption(); var train_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-train.csv", separatorChar: ',', hasHeader: true); var test_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-test.csv", separatorChar: ',', hasHeader: true); var gpSweeper = new GaussProcessSweeper(new GaussProcessSweeper.Option() { InitialPopulation = 50 }); var pipeline = context.Transforms.Conversion.MapValueToKey("userId", "userId") .Append(context.Transforms.Conversion.MapValueToKey("movieId", "movieId")) .Append(context.AutoML().CreateSweepableEstimator( (context, option) => { return(context.Recommendation().Trainers.MatrixFactorization(option)); }, MFOption.Default, new string[] { "userId", "movieId" }, new string[] { "Score" }, nameof(MatrixFactorizationTrainer))) .Append(context.Transforms.CopyColumns("output", "Score")); Console.WriteLine(pipeline.Summary()); var experimentOption = new Experiment.Option() { ParameterSweeper = gpSweeper, ParameterSweeperIteration = 100, EvaluateFunction = (MLContext context, IDataView data) => { return(context.Recommendation().Evaluate(data, "rating").RootMeanSquaredError); }, IsMaximizing = false }; var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption); var result = experiment.TrainAsync(train_data, validateFraction: 0.1f, new Reporter()).Result; var bestModel = result.BestModel; // evaluate on test var eval = bestModel.Transform(test_data); var rmse = context.Recommendation().Evaluate(eval, "rating").RootMeanSquaredError; Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {rmse}"); }
public IEnumerable <SweepableEstimatorBase> GetSuggestedCatagoricalColumnTransformers(MLContext context, Column column) { return(new SweepableEstimatorBase[] { context.AutoML().Serializable().Transformer.Categorical.OneHotEncoding(column.Name, column.Name), }); }
public void AutoPipeline_should_create_naive_bayes_classifier() { var context = new MLContext(); var trainer = context.AutoML().MultiClassification.NaiveBayes("label", "feature"); Approvals.Verify(trainer.ToCodeGenNodeContract()); }
public override void Run() { var context = new MLContext(1); context.Log += this.Context_Log; var columns = new List <Column>(); columns.Add(new Column("Sentiment", ColumnType.Catagorical, ColumnPurpose.Label)); columns.Add(new Column("SentimentText", ColumnType.String, ColumnPurpose.TextFeature)); var wiki = this.GetFileFromTestData("wiki.tsv"); var data = context.Data.LoadFromTextFile <Wiki>(wiki, hasHeader: true); var trainTestSplit = context.Data.TrainTestSplit(data); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy); }, ParameterSweeperIteration = 5, }; var experiment = context.AutoML().CreateBinaryClassificationExperiment(columns, experimentOption); var result = experiment.TrainAsync(trainTestSplit.TrainSet, 0.1f, Reporter.Instance).Result; var eval = result.BestModel.Transform(trainTestSplit.TestSet); var eval_score = experimentOption.EvaluateFunction(context, eval); Console.WriteLine($"eval accuracy: {eval_score}"); }
public IEnumerable <SweepableEstimatorBase> GetSuggestedNumericColumnTransformers(MLContext context, Column column) { return(new SweepableEstimatorBase[] { context.AutoML().Serializable().Transformer.ReplaceMissingValues(column.Name, column.Name), }); }
public void AutoML_should_create_sweepable_pipeline_from_INode_using_extension() { var context = new MLContext(); var pipeline = context.AutoML().CreateUnsweepableEstimator(context.Transforms.Conversion.MapKeyToValue("species", "species")) .Append(context.AutoML().MultiClassification.LightGbm("species", "features")); pipeline.ToString().Should().Be("SweepablePipeline([KeyToValueMappingEstimator]=>[LightGbmMulticlassTrainer])"); pipeline = context.AutoML().MultiClassification.LightGbm("species", "features") .Append(context.AutoML().CreateUnsweepableEstimator(context.Transforms.Conversion.MapKeyToValue("species", "species"))); pipeline.ToString().Should().Be("SweepablePipeline([LightGbmMulticlassTrainer]=>[KeyToValueMappingEstimator])"); pipeline = context.AutoML().MultiClassification.LightGbm("species", "features") .Append(context.Transforms.Conversion.MapKeyToValue("species", "species")); pipeline.ToString().Should().Be("SweepablePipeline([LightGbmMulticlassTrainer]=>[KeyToValueMappingEstimator])"); }
public void AutoPipeline_should_create_lightGbm_classifier_with_default_option() { var context = new MLContext(); var trainer = context.AutoML().MultiClassification.LightGbm("label", "feature"); var parameterValues = LightGbmMulticlassTrainerSweepableOptions.Default.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5)); var parameterset = new Parameters(parameterValues); Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset)); }
public void AutoML_should_create_averaged_perceptron_binary_classifier_with_option() { var context = new MLContext(); var optionBuilder = AveragedPerceptronBinaryTrainerSweepableOptions.Default; var trainer = context.AutoML().BinaryClassification.AveragedPerceptron("label", "feature", optionBuilder); var parameterValue = optionBuilder.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5)); var parameterset = new Parameters(parameterValue); Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset)); }
public void AutoPipeline_should_create_sdca_maximum_entropy_classifier_with_custom_option() { var context = new MLContext(); var option = new CustomSdcaMaximumEntropyOptionBuilder(); var trainer = context.AutoML().MultiClassification.SdcaMaximumEntropy("label", "feature", option); var parameterValues = option.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5)); var parameterset = new Parameters(parameterValues); Approvals.Verify(trainer.ToCodeGenNodeContract(parameterset)); }
public void AutoPipeline_should_create_sweepable_pipeline_from_estimator() { var context = new MLContext(); var pipeline = context.Transforms.Conversion.MapValueToKey("species", "species") .Append(context.AutoML().MultiClassification.LightGbm("species", "features")); var parameterValues = pipeline.EstimatorGenerators[1].Values[0].SweepableValueGenerators.Select(x => x.Name); pipeline.EstimatorGenerators.Count.Should().Be(2); parameterValues.Should().Equal(new string[] { "LearningRate", "NumberOfLeaves", "NumberOfIterations", "MinimumExampleCountPerLeaf" }); }
public TrainingManager(MLContext context, IEnumerable <Column> columns, Option option) { this.context = context; this.columns = columns; this.singlePipelineTrainingServiceMap = new Dictionary <IDictionary <string, string>, ITrainingService>(); this.bestIterations = new Dictionary <IDictionary <string, string>, IterationInfo>(); this.option = option; this.pipelineSweeper = context.AutoML().Serializable().Factory.CreateSweeper(this.option.ParameterSweeper); this.pipelineBuilder = new PipelineBuilder(this.option.TaskType, this.option.IsAzureAttach, true); this.Pipeline = this.pipelineBuilder.BuildPipeline(context, columns); }
public static SingleEstimatorSweepablePipeline ToPipeline(this SingleEstimatorSweepablePipelineDataContract pipelineContract, MLContext context) { var estimators = new List <SweepableEstimatorBase>(); foreach (var estimator in pipelineContract.Estimators) { estimators.Append(context.AutoML().Serializable().Factory.CreateSweepableEstimator(estimator)); } return(new SingleEstimatorSweepablePipeline(estimators)); }
public static SweepablePipeline ToPipeline(this SweepablePipelineDataContract pipelineContract, MLContext context) { var sweepablePipeline = new SweepablePipeline(); foreach (var node in pipelineContract.Estimators) { sweepablePipeline.Append(node.Select(n => context.AutoML().Serializable().Factory.CreateSweepableEstimator(n)).ToArray()); } return(sweepablePipeline); }
public void AutoML_should_create_gam_regressor_with_default_option() { var context = new MLContext(); context.Log += this.Context_Log; var optionSweeper = GamRegressionTrainerSweepableOptions.Default; var trainer = context.AutoML().Regression.Gam(); var parameterValue = optionSweeper.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5)); var parameterSet = new Parameters(parameterValue); Approvals.Verify(trainer.ToCodeGenNodeContract(parameterSet)); }
public IEnumerable <SweepableEstimatorBase> GetSuggestedLabelColumnTransformers(MLContext context, Column column) { if (this.PipelineBuilderOption.TaskType == TaskType.MultiClassification) { return(new SweepableEstimatorBase[] { context.AutoML().Serializable().Transformer.Conversion.MapValueToKey(column.Name, column.Name), }); } return(new SweepableEstimatorBase[0]); }
public void AutoPipeline_should_create_ova_classifier_from_binary_classifier() { var context = new MLContext(); var optionBuilder = new CustomSdcaMaximumEntropyOptionBuilder(); var binaryTrainer = context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = "Label"; option.FeatureColumnName = "Features"; return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option.LabelColumnName, option.FeatureColumnName)); }, optionBuilder, new string[] { "Features" }, new string[] { "Score" }, "CustomSdca"); var ovaTrainer = context.AutoML().MultiClassification.OneVersusAll(binaryTrainer); var parameterValues = optionBuilder.SweepableValueGenerators.Select(x => x.CreateFromNormalized(0.5)); var parameterset = new Parameters(parameterValues); Approvals.Verify(ovaTrainer.ToCodeGenNodeContract(parameterset)); }
public IEnumerable <SweepableEstimatorBase> GetSuggestedSingleFeatureTrainers(MLContext context, Column column, string featureColumnName) { switch (this.PipelineBuilderOption.TaskType) { case TaskType.BinaryClassification: var res = new List <SweepableEstimatorBase>(); res.Add(context.AutoML().Serializable().BinaryClassification.LinearSvm(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.LdSvm(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.FastForest(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.FastTree(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.LightGbm(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.Gam(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.SgdNonCalibrated(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.SgdCalibrated(column.Name, featureColumnName)); res.Add(context.AutoML().Serializable().BinaryClassification.AveragedPerceptron(column.Name, featureColumnName)); return(res); default: throw new NotImplementedException(); } }
public SweepablePipeline BuildPipeline(MLContext context, IEnumerable <Column> columns) { var sweepablePipeline = new SweepablePipeline(); foreach (var column in columns) { switch (column.ColumnPurpose) { case ColumnPurpose.NumericFeature: sweepablePipeline.Append(this.GetSuggestedNumericColumnTransformers(context, column).ToArray()); break; case ColumnPurpose.CategoricalFeature: sweepablePipeline.Append(this.GetSuggestedCatagoricalColumnTransformers(context, column).ToArray()); break; case ColumnPurpose.TextFeature: sweepablePipeline.Append(this.GetSuggestedTextColumnTransformers(context, column).ToArray()); break; case ColumnPurpose.Label: sweepablePipeline.Append(this.GetSuggestedLabelColumnTransformers(context, column).ToArray()); break; default: break; } } var featureColumns = columns.Where(c => c.ColumnPurpose == ColumnPurpose.CategoricalFeature || c.ColumnPurpose == ColumnPurpose.NumericFeature || c.ColumnPurpose == ColumnPurpose.TextFeature) .Select(c => c.Name) .ToArray(); if (this.PipelineBuilderOption.IsUsingSingleFeatureTrainer) { sweepablePipeline.Append(context.AutoML().Serializable().Transformer.Concatnate(featureColumns, "_FEATURE")); var labelColumn = columns.Where(c => c.ColumnPurpose == ColumnPurpose.Label).First(); sweepablePipeline.Append(this.GetSuggestedSingleFeatureTrainers(context, labelColumn, "_FEATURE").ToArray()); } return(sweepablePipeline); }
static void Main(string[] args) { var context = new MLContext(); context.Log += Context_Log; // Load Data var trainDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-data-train.tsv", hasHeader: true); var testDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-test.tsv", hasHeader: true); var normalizeTextOption = new NormalizeTextOption(); var applyWordEmbeddingOption = new ApplyWordEmbeddingOption(); // Create pipeline var pipeline = context.AutoML().CreateSweepableEstimator( // Create NormalizeText transformer and sweep over it. (context, option) => { return(context.Transforms.Text.NormalizeText( option.OutputColumnName, option.InputColumnName, option.CaseMode, option.KeepDiacritics, option.KeepPunctuations, option.KeepNumbers)); }, normalizeTextOption, new string[] { "SentimentText" }, new string[] { "txt" }, nameof(TextNormalizingEstimator)) .Append(context.Transforms.Text.TokenizeIntoWords("txt", "txt")) .Append(context.Transforms.Text.RemoveDefaultStopWords("txt", "txt")) .Append(context.AutoML().CreateSweepableEstimator( // Create ApplyWordEmbedding transformer and sweep over it (context, option) => { return(context.Transforms.Text.ApplyWordEmbedding( option.outputColumnName, option.inputColumnName, option.ModelKind)); }, applyWordEmbeddingOption, new string[] { "txt" }, new string[] { "txt" }, nameof(WordEmbeddingEstimator))) .Append( // use SdcaLogisticRegression and FastForest as trainer context.AutoML().BinaryClassification.SdcaLogisticRegression("Sentiment", "txt"), context.AutoML().BinaryClassification.FastForest("Sentiment", "txt")); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy); }, MaximumTrainingTime = 60 * 60, ParameterSweeperIteration = 100, }; var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption); var result = experiment.TrainAsync(trainDataset, 0.1f, new Reporter()).Result; // evaluate on test var eval = result.BestModel.Transform(testDataset); var metric = context.BinaryClassification.EvaluateNonCalibrated(eval, "Sentiment"); Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {metric.Accuracy}"); }