/// <summary> /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss. /// </summary> /// <param name="catalog">The binary classification catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="options">Advanced arguments to the algorithm.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the /// result in any way; it is only a way for the caller to be informed about what was learnt.</param> /// <returns>The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)] /// ]]></format> /// </example> public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar <bool> label, Vector <float> features, Scalar <float> weights, SdcaLogisticRegressionBinaryTrainer.Options options, Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckValueOrNull(options); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; var trainer = new SdcaLogisticRegressionBinaryTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => { onFit(trans.Model); })); } return(trainer); }, label, features, weights); return(rec.Output); }
/// <summary> /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss. /// </summary> /// <param name="catalog">The binary classification catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="l2Regularization">The L2 regularization hyperparameter.</param> /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param> /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the /// result in any way; it is only a way for the caller to be informed about what was learnt.</param> /// <returns>The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)] /// ]]></format> /// </example> public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar <bool> label, Vector <float> features, Scalar <float> weights = null, float?l2Regularization = null, float?l1Threshold = null, int?numberOfIterations = null, Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified."); Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified."); Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified"); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { var trainer = new SdcaLogisticRegressionBinaryTrainer(env, labelName, featuresName, weightsName, l2Regularization, l1Threshold, numberOfIterations); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => { onFit(trans.Model); })); } return(trainer); }, label, features, weights); return(rec.Output); }
private static IDataScorerTransform _TrainSentiment() { bool normalize = true; var args = new TextLoader.Options() { Separators = new[] { '\t' }, HasHeader = true, Columns = new[] { new TextLoader.Column("Label", DataKind.Boolean, 0), new TextLoader.Column("SentimentText", DataKind.String, 1) } }; var args2 = new TextFeaturizingEstimator.Options() { KeepDiacritics = false, KeepPunctuations = false, CaseMode = TextNormalizingEstimator.CaseMode.Lower, OutputTokensColumnName = "tokens", Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None, CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, UseAllLengths = false }, WordFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 2, UseAllLengths = true }, }; var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv"); /*using (*/ var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1); { // Pipeline var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename)); var trans = TextFeaturizingEstimator.Create(env, args2, loader); // Train var trainer = new SdcaLogisticRegressionBinaryTrainer(env, new SdcaLogisticRegressionBinaryTrainer.Options { LabelColumnName = "Label", FeatureColumnName = "Features" }); var cached = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null); var predictor = trainer.Fit(cached); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema)); } }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true); DataOperationsCatalog.TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2); IDataView trainingData = trainTestSplit.TrainSet; IDataView testData = trainTestSplit.TestSet; // STEP 2: Common data process configuration with pipeline data transformations TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentIssue.Text)); // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2); //Peak the transformed features column //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1); // STEP 3: Set the training algorithm, then create and config the modelBuilder SdcaLogisticRegressionBinaryTrainer trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"); EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer); //Measure training time Stopwatch watch = Stopwatch.StartNew(); // STEP 4: Train the model fitting to the DataSet Console.WriteLine("=============== Training the model ==============="); ITransformer trainedModel = trainingPipeline.Fit(trainingData); //Stop measuring time watch.Stop(); long elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****"); // STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); IDataView predictions = trainedModel.Transform(testData); CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score"); ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }
void IAiTest.Train() { Console.WriteLine("=============== Binary Classification - TextSentiment Prediction ==============="); IDataView dataView = _context.Data.LoadFromTextFile <SentimentData>($"{RootFolder}/{TrainDataFile}", hasHeader: true); var dataProcessPipeline = _context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.Text)); _trainer = _context.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(_trainer); Stopwatch stop = new Stopwatch(); Console.WriteLine("=============== Create and Train the Model ==============="); stop.Start(); _model = trainingPipeline.Fit(dataView); stop.Stop(); Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms"); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); }
public ML <T, T2> Run() { IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(this.data); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data .TrainTestSplit(trainingDataView, testFraction: 0.2); switch (this.type) { case MLType.TextFeaturizingEstimator: { TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text .FeaturizeText(outputColumnName: "Features", inputColumnName: this.inputName); SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = mlContext.BinaryClassification.Trainers .SdcaLogisticRegression(labelColumnName: this.labelName, featureColumnName: "Features"); EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer); trainedModel = trainingPipeline.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); this.modelMetrics = mlContext.BinaryClassification .Evaluate(data: testSetTransform, labelColumnName: this.labelName, scoreColumnName: this.scoreName); break; } case MLType.LightGbm: { var fields = this.thisType .GetType() .GetProperties(BindingFlags.Public | BindingFlags.Instance) .Select(p => p.Name); //mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel") var featurePipeline = this.isFeaturesIncluded ? null : mlContext.Transforms.Concatenate("Features", fields.ToArray()); var trainer = mlContext.Regression.Trainers .LightGbm(new LightGbmRegressionTrainer.Options() { NumberOfIterations = 100, LearningRate = 0.3227682f, NumberOfLeaves = 55, MinimumExampleCountPerLeaf = 10, UseCategoricalSplit = false, HandleMissingValue = true, UseZeroAsMissingValue = false, MinimumExampleCountPerGroup = 50, MaximumCategoricalSplitPointCount = 32, CategoricalSmoothing = 20, L2CategoricalRegularization = 5, Booster = new GradientBooster.Options() { L2Regularization = 0, L1Regularization = 0.5 }, LabelColumnName = this.labelName, FeatureColumnName = "Features" }); var pipeline2 = featurePipeline == null ? null : featurePipeline.Append(trainer); if (pipeline2 == null) { trainedModel = trainer.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); var crossValidationResults = mlContext.Regression .CrossValidate(trainingDataView, trainer, numberOfFolds: 5, labelColumnName: this.labelName); } else { trainedModel = pipeline2.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); var crossValidationResults = mlContext.Regression .CrossValidate(trainingDataView, pipeline2, numberOfFolds: 5, labelColumnName: this.labelName); } //this.modelMetrics = mlContext.Regression //.Evaluate(data: testSetTransform, // labelColumnName: this.labelName, // scoreColumnName: this.scoreName); break; } } //var msg = $"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" + // $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}" + // $"{Environment.NewLine}" + // $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" + // $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" + // $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" + // $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}"; this.isTaught = true; return(this); }