static async Task Main(string[] args) { var context = new MLContext(); var dataset = context.Data.LoadFromTextFile <Iris>(@".\iris.csv", separatorChar: ',', hasHeader: true); var split = context.Data.TrainTestSplit(dataset, 0.3); var estimatorChain = context.Transforms.Conversion.MapValueToKey("species", "species") .Append(context.Transforms.Concatenate("features", new string[] { "sepal_length" })) .Append(context.AutoML().MultiClassification.LbfgsMaximumEntropy("species", "features")); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.MulticlassClassification.Evaluate(data, "species").MicroAccuracy); } }; var experiment = context.AutoML().CreateExperiment(estimatorChain, experimentOption); var reporter = new Reporter(); var result = await experiment.TrainAsync(split.TrainSet, validateFraction : 0.1f, reporter : reporter); var bestModel = result.BestModel; // evaluate on test var eval = bestModel.Transform(split.TestSet); var metric = context.MulticlassClassification.Evaluate(eval, "species"); Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {metric.MicroAccuracy}"); }
public override void Run() { var context = new MLContext(1); context.Log += this.Context_Log; var columns = new List <Column>(); columns.Add(new Column("Sentiment", ColumnType.Catagorical, ColumnPurpose.Label)); columns.Add(new Column("SentimentText", ColumnType.String, ColumnPurpose.TextFeature)); var wiki = this.GetFileFromTestData("wiki.tsv"); var data = context.Data.LoadFromTextFile <Wiki>(wiki, hasHeader: true); var trainTestSplit = context.Data.TrainTestSplit(data); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy); }, ParameterSweeperIteration = 5, }; var experiment = context.AutoML().CreateBinaryClassificationExperiment(columns, experimentOption); var result = experiment.TrainAsync(trainTestSplit.TrainSet, 0.1f, Reporter.Instance).Result; var eval = result.BestModel.Transform(trainTestSplit.TestSet); var eval_score = experimentOption.EvaluateFunction(context, eval); Console.WriteLine($"eval accuracy: {eval_score}"); }
public ABsoluteMaybeStatisticsResult(bool insufficientSampleSize, double confidenceLevel, Experiment.Option bestOption, Experiment.Option worstOption) { InsufficientSampleSize = insufficientSampleSize; ConfidenceLevel = confidenceLevel; BestOption = bestOption; WorstOption = worstOption; }
static void Main(string[] args) { var context = new MLContext(); var paramaters = new MFOption(); var train_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-train.csv", separatorChar: ',', hasHeader: true); var test_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-test.csv", separatorChar: ',', hasHeader: true); var gpSweeper = new GaussProcessSweeper(new GaussProcessSweeper.Option() { InitialPopulation = 50 }); var pipeline = context.Transforms.Conversion.MapValueToKey("userId", "userId") .Append(context.Transforms.Conversion.MapValueToKey("movieId", "movieId")) .Append(context.AutoML().CreateSweepableEstimator( (context, option) => { return(context.Recommendation().Trainers.MatrixFactorization(option)); }, MFOption.Default, new string[] { "userId", "movieId" }, new string[] { "Score" }, nameof(MatrixFactorizationTrainer))) .Append(context.Transforms.CopyColumns("output", "Score")); Console.WriteLine(pipeline.Summary()); var experimentOption = new Experiment.Option() { ParameterSweeper = gpSweeper, ParameterSweeperIteration = 100, EvaluateFunction = (MLContext context, IDataView data) => { return(context.Recommendation().Evaluate(data, "rating").RootMeanSquaredError); }, IsMaximizing = false }; var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption); var result = experiment.TrainAsync(train_data, validateFraction: 0.1f, new Reporter()).Result; var bestModel = result.BestModel; // evaluate on test var eval = bestModel.Transform(test_data); var rmse = context.Recommendation().Evaluate(eval, "rating").RootMeanSquaredError; Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {rmse}"); }
static void Main(string[] args) { var context = new MLContext(); context.Log += Context_Log; // Load Data var trainDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-data-train.tsv", hasHeader: true); var testDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-test.tsv", hasHeader: true); var normalizeTextOption = new NormalizeTextOption(); var applyWordEmbeddingOption = new ApplyWordEmbeddingOption(); // Create pipeline var pipeline = context.AutoML().CreateSweepableEstimator( // Create NormalizeText transformer and sweep over it. (context, option) => { return(context.Transforms.Text.NormalizeText( option.OutputColumnName, option.InputColumnName, option.CaseMode, option.KeepDiacritics, option.KeepPunctuations, option.KeepNumbers)); }, normalizeTextOption, new string[] { "SentimentText" }, new string[] { "txt" }, nameof(TextNormalizingEstimator)) .Append(context.Transforms.Text.TokenizeIntoWords("txt", "txt")) .Append(context.Transforms.Text.RemoveDefaultStopWords("txt", "txt")) .Append(context.AutoML().CreateSweepableEstimator( // Create ApplyWordEmbedding transformer and sweep over it (context, option) => { return(context.Transforms.Text.ApplyWordEmbedding( option.outputColumnName, option.inputColumnName, option.ModelKind)); }, applyWordEmbeddingOption, new string[] { "txt" }, new string[] { "txt" }, nameof(WordEmbeddingEstimator))) .Append( // use SdcaLogisticRegression and FastForest as trainer context.AutoML().BinaryClassification.SdcaLogisticRegression("Sentiment", "txt"), context.AutoML().BinaryClassification.FastForest("Sentiment", "txt")); var experimentOption = new Experiment.Option() { EvaluateFunction = (MLContext context, IDataView data) => { return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy); }, MaximumTrainingTime = 60 * 60, ParameterSweeperIteration = 100, }; var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption); var result = experiment.TrainAsync(trainDataset, 0.1f, new Reporter()).Result; // evaluate on test var eval = result.BestModel.Transform(testDataset); var metric = context.BinaryClassification.EvaluateNonCalibrated(eval, "Sentiment"); Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}"); Console.WriteLine($"best model test score: {metric.Accuracy}"); }
public static Experiment CreateBinaryClassificationExperiment(this AutoPipelineCatalog autoPipelineCatalog, IEnumerable <Column> columns, Experiment.Option option) { var pipelineBuilder = new PipelineBuilder(TaskType.BinaryClassification, false, true); var sweepablePipeline = pipelineBuilder.BuildPipeline(autoPipelineCatalog.Context, columns); return(autoPipelineCatalog.Context.AutoML().CreateExperiment(sweepablePipeline, option)); }