Exemplo n.º 1
0
        static async Task Main(string[] args)
        {
            var context = new MLContext();
            var dataset = context.Data.LoadFromTextFile <Iris>(@".\iris.csv", separatorChar: ',', hasHeader: true);
            var split   = context.Data.TrainTestSplit(dataset, 0.3);

            var estimatorChain = context.Transforms.Conversion.MapValueToKey("species", "species")
                                 .Append(context.Transforms.Concatenate("features", new string[] { "sepal_length" }))
                                 .Append(context.AutoML().MultiClassification.LbfgsMaximumEntropy("species", "features"));

            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.MulticlassClassification.Evaluate(data, "species").MicroAccuracy);
                }
            };

            var experiment = context.AutoML().CreateExperiment(estimatorChain, experimentOption);
            var reporter   = new Reporter();
            var result     = await experiment.TrainAsync(split.TrainSet, validateFraction : 0.1f, reporter : reporter);

            var bestModel = result.BestModel;

            // evaluate on test
            var eval   = bestModel.Transform(split.TestSet);
            var metric = context.MulticlassClassification.Evaluate(eval, "species");

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {metric.MicroAccuracy}");
        }
        public override void Run()
        {
            var context = new MLContext(1);

            context.Log += this.Context_Log;
            var columns = new List <Column>();

            columns.Add(new Column("Sentiment", ColumnType.Catagorical, ColumnPurpose.Label));
            columns.Add(new Column("SentimentText", ColumnType.String, ColumnPurpose.TextFeature));
            var wiki             = this.GetFileFromTestData("wiki.tsv");
            var data             = context.Data.LoadFromTextFile <Wiki>(wiki, hasHeader: true);
            var trainTestSplit   = context.Data.TrainTestSplit(data);
            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy);
                },
                ParameterSweeperIteration = 5,
            };
            var experiment = context.AutoML().CreateBinaryClassificationExperiment(columns, experimentOption);
            var result     = experiment.TrainAsync(trainTestSplit.TrainSet, 0.1f, Reporter.Instance).Result;
            var eval       = result.BestModel.Transform(trainTestSplit.TestSet);
            var eval_score = experimentOption.EvaluateFunction(context, eval);

            Console.WriteLine($"eval accuracy: {eval_score}");
        }
 public ABsoluteMaybeStatisticsResult(bool insufficientSampleSize,
                                      double confidenceLevel,
                                      Experiment.Option bestOption,
                                      Experiment.Option worstOption)
 {
     InsufficientSampleSize = insufficientSampleSize;
     ConfidenceLevel        = confidenceLevel;
     BestOption             = bestOption;
     WorstOption            = worstOption;
 }
Exemplo n.º 4
0
        static void Main(string[] args)
        {
            var context    = new MLContext();
            var paramaters = new MFOption();
            var train_data = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-train.csv", separatorChar: ',', hasHeader: true);
            var test_data  = context.Data.LoadFromTextFile <ModelInput>(@".\recommendation-ratings-test.csv", separatorChar: ',', hasHeader: true);

            var gpSweeper = new GaussProcessSweeper(new GaussProcessSweeper.Option()
            {
                InitialPopulation = 50
            });
            var pipeline = context.Transforms.Conversion.MapValueToKey("userId", "userId")
                           .Append(context.Transforms.Conversion.MapValueToKey("movieId", "movieId"))
                           .Append(context.AutoML().CreateSweepableEstimator(
                                       (context, option) =>
            {
                return(context.Recommendation().Trainers.MatrixFactorization(option));
            },
                                       MFOption.Default,
                                       new string[] { "userId", "movieId" },
                                       new string[] { "Score" },
                                       nameof(MatrixFactorizationTrainer)))
                           .Append(context.Transforms.CopyColumns("output", "Score"));

            Console.WriteLine(pipeline.Summary());

            var experimentOption = new Experiment.Option()
            {
                ParameterSweeper          = gpSweeper,
                ParameterSweeperIteration = 100,
                EvaluateFunction          = (MLContext context, IDataView data) =>
                {
                    return(context.Recommendation().Evaluate(data, "rating").RootMeanSquaredError);
                },
                IsMaximizing = false
            };

            var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption);
            var result     = experiment.TrainAsync(train_data, validateFraction: 0.1f, new Reporter()).Result;
            var bestModel  = result.BestModel;

            // evaluate on test
            var eval = bestModel.Transform(test_data);
            var rmse = context.Recommendation().Evaluate(eval, "rating").RootMeanSquaredError;

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {rmse}");
        }
Exemplo n.º 5
0
        static void Main(string[] args)
        {
            var context = new MLContext();

            context.Log += Context_Log;

            // Load Data
            var trainDataset = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-data-train.tsv", hasHeader: true);
            var testDataset  = context.Data.LoadFromTextFile <ModelInput>(@".\datasets\wikipedia-detox-250-line-test.tsv", hasHeader: true);

            var normalizeTextOption      = new NormalizeTextOption();
            var applyWordEmbeddingOption = new ApplyWordEmbeddingOption();

            // Create pipeline
            var pipeline = context.AutoML().CreateSweepableEstimator(
                // Create NormalizeText transformer and sweep over it.
                (context, option) =>
            {
                return(context.Transforms.Text.NormalizeText(
                           option.OutputColumnName,
                           option.InputColumnName,
                           option.CaseMode,
                           option.KeepDiacritics,
                           option.KeepPunctuations,
                           option.KeepNumbers));
            },
                normalizeTextOption,
                new string[] { "SentimentText" },
                new string[] { "txt" },
                nameof(TextNormalizingEstimator))
                           .Append(context.Transforms.Text.TokenizeIntoWords("txt", "txt"))
                           .Append(context.Transforms.Text.RemoveDefaultStopWords("txt", "txt"))
                           .Append(context.AutoML().CreateSweepableEstimator(
                                       // Create ApplyWordEmbedding transformer and sweep over it
                                       (context, option) =>
            {
                return(context.Transforms.Text.ApplyWordEmbedding(
                           option.outputColumnName,
                           option.inputColumnName,
                           option.ModelKind));
            },
                                       applyWordEmbeddingOption,
                                       new string[] { "txt" },
                                       new string[] { "txt" },
                                       nameof(WordEmbeddingEstimator)))
                           .Append(
                // use SdcaLogisticRegression and FastForest as trainer
                context.AutoML().BinaryClassification.SdcaLogisticRegression("Sentiment", "txt"),
                context.AutoML().BinaryClassification.FastForest("Sentiment", "txt"));

            var experimentOption = new Experiment.Option()
            {
                EvaluateFunction = (MLContext context, IDataView data) =>
                {
                    return(context.BinaryClassification.EvaluateNonCalibrated(data, "Sentiment").Accuracy);
                },
                MaximumTrainingTime       = 60 * 60,
                ParameterSweeperIteration = 100,
            };

            var experiment = context.AutoML().CreateExperiment(pipeline, experimentOption);
            var result     = experiment.TrainAsync(trainDataset, 0.1f, new Reporter()).Result;

            // evaluate on test
            var eval   = result.BestModel.Transform(testDataset);
            var metric = context.BinaryClassification.EvaluateNonCalibrated(eval, "Sentiment");

            Console.WriteLine($"best model validate score: {result.BestIteration.EvaluateScore}");
            Console.WriteLine($"best model test score: {metric.Accuracy}");
        }
Exemplo n.º 6
0
        public static Experiment CreateBinaryClassificationExperiment(this AutoPipelineCatalog autoPipelineCatalog, IEnumerable <Column> columns, Experiment.Option option)
        {
            var pipelineBuilder   = new PipelineBuilder(TaskType.BinaryClassification, false, true);
            var sweepablePipeline = pipelineBuilder.BuildPipeline(autoPipelineCatalog.Context, columns);

            return(autoPipelineCatalog.Context.AutoML().CreateExperiment(sweepablePipeline, option));
        }