private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Load data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TrainDataPath, hasHeader: true);
            IDataView testDataView     = mlContext.Data.LoadFromTextFile <SentimentIssue>(TestDataPath, hasHeader: true);

            // STEP 2: Display first few rows of training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView);

            // STEP 3: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new BinaryExperimentProgressHandler();

            // STEP 4: Run AutoML binary classification experiment
            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
            ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto()
                                                                              .CreateBinaryClassificationExperiment(ExperimentTime)
                                                                              .Execute(trainingDataView, progressHandler: progressHandler);

            // Print top models found by AutoML
            Console.WriteLine();
            PrintTopModels(experimentResult);

            // STEP 5: Evaluate the model and print metrics
            ConsoleHelper.ConsoleWriteHeader("=============== Evaluating model's accuracy with test data ===============");
            RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun;
            ITransformer trainedModel = bestRun.Model;
            var          predictions  = trainedModel.Transform(testDataView);
            var          metrics      = mlContext.BinaryClassification.EvaluateNonCalibrated(data: predictions, scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(bestRun.TrainerName, metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }
Пример #2
0
        private static ExperimentResult <BinaryClassificationMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                          ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data.

            // ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
            //    new[] { new KeyValuePair<string, bool>("CSH", true) }, "payment_type");

            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("fstrClassCategory",
            //    new[] { new KeyValuePair<float, String>(1, "First"), new KeyValuePair<float, String>(2, "Second"), new KeyValuePair<float, String>(3, "Third") }, "fstrClass").Append(mlContext.Transforms.Categorical.OneHotEncoding("fstrClassCategory", "fstrClassCategory")).Append(mlContext.Transforms.DropColumns("fstrClass"));

            // STEP 3: Customize column information returned by InferColumns API.
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation = CorrectColumnTypes(columnInformation);

            // columnInformation.NumericColumnNames.Remove("fstrClass");
            // columnInformation.CategoricalColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.


            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new BinaryExperimentProgressHandler(); //  RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML Binary Classification experiment.
            var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key.
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <BinaryClassificationMetrics> experimentResult = experiment.Execute(trainData: TrainDataView, columnInformation: columnInformation, progressHandler: progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML.
            PrintTopModels(experimentResult);
            // var featureNames = columnInformation.CategoricalColumnNames.Concat(columnInformation.ImagePathColumnNames).Concat(columnInformation.NumericColumnNames).Concat(columnInformation.TextColumnNames).ToList();
            // var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(predictionTransformer: )
            // PrintContributions(featureNames, TrainDataView, experimentResult.RunDetails);

            // DatasetDimensionsUtil.GetTextColumnCardinality();

            return(experimentResult);
        }
Пример #3
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration

            string connectionString = @"Data Source=(LocalDB)\MSSQLLocalDB;Database=Northwind;Integrated Security=True;Connect Timeout=30";

            string sqlCommand = @"SELECT OrderID, 
                                    CAST([ProductID] as varchar) as ProductID,
                                    CAST([UnitPrice] as REAL) as UnitPrice,
                                    CAST([Quantity] as REAL) as Quantity,
                                    CAST([Discount] as varchar) as Discount 
                                    FROM [dbo].[Order Details]";

            DatabaseSource dbSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, sqlCommand);

            DatabaseLoader loader = mlContext.Data.CreateDatabaseLoader <OrderDetails>();

            IDataView dataview = loader.Load(dbSource);

            //IDataView dataview = mlContext.Data.LoadFromTextFile<OrderDetails>(DataPath, hasHeader: true, separatorChar: ',');

            var pipeline = mlContext.Transforms.Conversion.MapValue("PayFullPrice",
                                                                    new[] { new KeyValuePair <string, bool>("0", true) }, "Discount")
                           .Append(mlContext.Transforms.DropColumns("Discount"));

            var transformedDataView = pipeline.Fit(dataview).Transform(dataview);

            using (var stream = File.Create("transformedData.tsv"))
            {
                mlContext.Data.SaveAsText(transformedDataView, stream);
            }

            ConsoleHelper.ShowDataViewInConsole(mlContext, transformedDataView);

            //// STEP 2: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new BinaryExperimentProgressHandler();

            //// STEP 3: Run AutoML regression experiment
            ConsoleHelper.ConsoleWriteHeader("=============== Training the model ===============");
            Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");

            ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto()
                                                                              .CreateBinaryClassificationExperiment(ExperimentTime)
                                                                              .Execute(transformedDataView, LabelColumnName, progressHandler: progressHandler);

            // Print top models found by AutoML
            Console.WriteLine();
            PrintTopModels(experimentResult);

            //// STEP 4: Evaluate the model and print metrics

            ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data =====");
            RunDetail <BinaryClassificationMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel = best.Model;

            //// STEP 5: Save/persist the trained model - convonnx


            using (var stream = File.Create(MODEL_NAME))
            {
                mlContext.Model.ConvertToOnnx(trainedModel, dataview, stream);
            }
            Console.WriteLine("The model is saved to {0}", MODEL_NAME);

            return(trainedModel);
        }