Ejemplo n.º 1
0
        /// <summary>
        /// Initialize (or reinitialize) the stopwords remover.  Also called by the constructor.
        /// </summary>
        /// <param name="CustomStopWords"></param>
        public void InitializeStopWordsRemover(string[] CustomStopWords = null)
        {
            _mlContext        = new MLContext();
            _emptySamplesList = new List <TextData>();
            _emptyDataView    = _mlContext.Data.LoadFromEnumerable(_emptySamplesList);

            // stop words
            _stopWordsTextPipeline = _mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text")
                                     .Append(_mlContext.Transforms.Text.RemoveDefaultStopWords("WordsWithoutStopWords", "Words", language: StopWordsRemovingEstimator.Language.English));

            if (CustomStopWords == null)
            {
                _textPipeline = _stopWordsTextPipeline.Fit(_emptyDataView);
                _mode         = removeStopWordswMode.Default;
            }
            else
            {
                var textPipeline = _mlContext.Transforms.Text.TokenizeIntoWords("Words",
                                                                                "Text")
                                   .Append(_mlContext.Transforms.Text.RemoveStopWords(
                                               "WordsWithoutStopWords", "Words", stopwords:
                                               CustomStopWords));
                _mode = removeStopWordswMode.Custom;
            }
            _predictionEngine = _mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(_textPipeline);
        }
Ejemplo n.º 2
0
        public void Train(string trainingFileName)
        {
            //1. First, we check to make sure that the training data filename exists
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            //2. Use the LoadFromTextFile helper method that ML.NET provides to assist with
            //the loading of text files into an IDataView object
            IDataView trainingDataView = MlContext.Data.LoadFromTextFile <RestaurantFeedback>(trainingFileName);

            //3. Use the TrainTestSplit method that ML.NET provides to create a test set from the
            //main training data
            DataOperationsCatalog.TrainTestData dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2);

            //4. Firstly, we create the pipeline
            Microsoft.ML.Transforms.Text.TextFeaturizingEstimator dataProcessPipeline = MlContext.Transforms.Text.FeaturizeText(
                outputColumnName: "Features",
                inputColumnName: nameof(RestaurantFeedback.Text));

            //5. Next, we instantiate our Trainer class
            Microsoft.ML.Trainers.SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = MlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
                labelColumnName: nameof(RestaurantFeedback.Label),
                featureColumnName: "Features");

            //6. Then, we complete the pipeline by appending the trainer we instantiated
            //previously
            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.BinaryPredictionTransformer <Microsoft.ML.Calibrators.CalibratedModelParametersBase <Microsoft.ML.Trainers.LinearBinaryModelParameters, Microsoft.ML.Calibrators.PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer);

            //7. Next, we train the model with the dataset we created earlier in the chapter
            ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

            MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

            //8. We save our newly created model to the filename specified, matching the
            //training set's schema
            IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

            //9. Now, we transform our newly created model with the test set we created earlier
            Microsoft.ML.Data.CalibratedBinaryClassificationMetrics modelMetrics = MlContext.BinaryClassification.Evaluate(
                data: testSetTransform,
                labelColumnName: nameof(RestaurantFeedback.Label),
                scoreColumnName: nameof(RestaurantPrediction.Score));

            //10. Finally, we will use the testSetTransform function created previously and
            //pass it into the BinaryClassification class's Evaluate method
            //This method allows us to generate model metrics. We then print the main metrics using the
            //trained model with the test set. We will dive into these properties specifically in the
            //Evaluating the Model section of this chapter.
            Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" +
                              $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}{Environment.NewLine}" +
                              $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" +
                              $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" +
                              $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" +
                              $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}");
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Train/create the ML model.
        /// </summary>
        /// <param name="trainDataView"></param>
        /// <param name="pipeline"></param>
        /// <returns>Trained Model</returns>
        private static ITransformer GetTrainedModel(IDataView trainDataView, Microsoft.ML.Data.EstimatorChain <KeyToValueMappingTransformer> pipeline)
        {
            Console.WriteLine("*** Training the image classification model with DNN Transfer Learning on top of the selected pre-trained model/architecture ***");
            var          watch        = Stopwatch.StartNew();
            ITransformer trainedModel = pipeline.Fit(trainDataView);

            watch.Stop();
            var elapsedMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"Training with transfer learning took: {elapsedMs / 1000} seconds");
            return(trainedModel);
        }
Ejemplo n.º 4
0
        public void Train(string trainingFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            //The first change is the use of a comma to separate the data
            IDataView trainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(trainingFileName, ',');

            DataOperationsCatalog.TrainTestData dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.4);

            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.TransformerChain <Microsoft.ML.Data.ColumnConcatenatingTransformer> > dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.IsMarried)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.BSDegree)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.MSDegree)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.YearsExperience))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.AgeAtHire)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.HasKids)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.WithinMonthOfVesting)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.DeskDecorations)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.LongCommute)))
                                                                                                                                                                    .Append(MlContext.Transforms.Concatenate("Features",
                                                                                                                                                                                                             typeof(EmploymentHistory).ToPropertyList <EmploymentHistory>(nameof(EmploymentHistory.DurationInMonths)))));

            //We can then create the Sdca trainer using the default parameters
            Microsoft.ML.Trainers.SdcaRegressionTrainer trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");

            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.RegressionPredictionTransformer <Microsoft.ML.Trainers.LinearRegressionModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

            MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

            IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

            //Lastly, we call the Regression.Evaluate method to provide regression specific metrics
            Microsoft.ML.Data.RegressionMetrics modelMetrics = MlContext.Regression.Evaluate(testSetTransform);

            Console.WriteLine($"Loss Function: {modelMetrics.LossFunction:0.##}{Environment.NewLine}" +
                              $"Mean Absolute Error: {modelMetrics.MeanAbsoluteError:#.##}{Environment.NewLine}" +
                              $"Mean Squared Error: {modelMetrics.MeanSquaredError:#.##}{Environment.NewLine}" +
                              $"RSquared: {modelMetrics.RSquared:0.##}{Environment.NewLine}" +
                              $"Root Mean Squared Error: {modelMetrics.RootMeanSquaredError:#.##}");
        }