private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            var textLoader       = SentimentAnalysysTextLoaderFactory.CreateTextLoader(mlContext);
            var trainingDataView = textLoader.Read(TrainDataPath);
            var testDataView     = textLoader.Read(TestDataPath);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText("Text", "Features");

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            var modelBuilder = new Common.ModelBuilder <SentimentIssue, SentimentPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.BinaryClassification.Trainers.FastTree(label: "Label", features: "Features");

            modelBuilder.AddTrainer(trainer);

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            var metrics = modelBuilder.EvaluateBinaryClassificationModel(testDataView, "Label", "Score");

            Common.ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            return(modelBuilder.TrainedModel);
        }
示例#2
0
        static void Main(string[] args)
        {
            //Create MLContext to be shared across the model creation workflow objects
            //Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext();

            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(TrainDataPath);
            var        testDataView     = dataLoader.GetDataView(TestDataPath);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <SentimentIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            var modelBuilder = new Common.ModelBuilder <SentimentIssue, SentimentPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(label: "Label", features: "Features");

            //Other way: var trainer = new LinearClassificationTrainer(mlContext, "Features", "Label");
            modelBuilder.AddTrainer(trainer);

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            var metrics = modelBuilder.EvaluateBinaryClassificationModel(testDataView, "Label", "Score");

            Common.ConsoleHelper.PrintBinaryClassificationMetrics("StochasticDualCoordinateAscent", metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            // (OPTIONAL) Try/test a single prediction by loding the model from the file, first.
            SentimentIssue sampleStatement = new SentimentIssue {
                Text = "This is a very rude movie"
            };
            var modelScorer = new Common.ModelScorer <SentimentIssue, SentimentPrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);
            var resultprediction = modelScorer.PredictSingle(sampleStatement);

            Console.WriteLine($"=============== Single Prediction  ===============");
            Console.WriteLine($"Text: {sampleStatement.Text} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "Toxic" : "Nice")} sentiment | Probability: {resultprediction.Probability} ");
            Console.WriteLine($"==================================================");
            //

            Common.ConsoleHelper.ConsoleWriteHeader("=============== End of training process, hit any key to finish ===============");
            Console.ReadKey();
        }
示例#3
0
        public static void CreateTrainAndEvaluateModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(TrainDataPath);
            var        testDataView     = dataLoader.GetDataView(TestDataPath);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <TitanicData>(mlContext, trainingDataView, dataProcessPipeline, 2);
            Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            // FastTreeBinaryClassifier is an algorithm that will be used to train the model.
            // It has three hyperparameters for tuning decision tree performance.
            //pipeline.Add(new FastTreeBinaryClassifier());// {NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2});
            var modelBuilder = new Common.ModelBuilder <TitanicData, TitanicPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.BinaryClassification.Trainers.FastTree(label: "Label", features: "Features", numLeaves: 10, numTrees: 5, minDatapointsInLeafs: 10);

            modelBuilder.AddTrainer(trainer);

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            var metrics = modelBuilder.EvaluateBinaryClassificationModel(testDataView, "Label", "Score");

            Common.ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);
        }
示例#4
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(TrainDataPath);
            var        testDataView     = dataLoader.GetDataView(TestDataPath);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <SentimentIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            var modelBuilder = new Common.ModelBuilder <SentimentIssue, SentimentPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.BinaryClassification.Trainers.FastTree(label: "Label", features: "Features");

            modelBuilder.AddTrainer(trainer);

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            var metrics = modelBuilder.EvaluateBinaryClassificationModel(testDataView, "Label", "Score");

            Common.ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            return(modelBuilder.TrainedModel);
        }