private ITransformer BuildAndTrainUsingParams(ColumnEnum column) { List <string> features = new List <string>(); TextFeaturizingEstimator textTransformer = null; EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null; EstimatorChain <ITransformer> estimatorTransformer = null; if (_includeDay) { textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day"); features.Add("DayString"); } if (_includeMonth) { if (textTransformer != null) { estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month")); } else { textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"); } features.Add("MonthString"); } if (_includeWeek) { features.Add("Week"); } if (textTransformer == null) { var res = _mlContext.Transforms.Concatenate("Features", features.ToArray()) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res.Fit(_trainData)); } if (estimatorTransformer != null) { var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray())) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res2.Fit(_trainData)); } var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray())) .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column))) .Append(_mlContext.Regression.Trainers.FastTreeTweedie()); return(res3.Fit(_trainData)); }
public ML <T, T2> Run() { IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(this.data); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data .TrainTestSplit(trainingDataView, testFraction: 0.2); switch (this.type) { case MLType.TextFeaturizingEstimator: { TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text .FeaturizeText(outputColumnName: "Features", inputColumnName: this.inputName); SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = mlContext.BinaryClassification.Trainers .SdcaLogisticRegression(labelColumnName: this.labelName, featureColumnName: "Features"); EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer); trainedModel = trainingPipeline.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); this.modelMetrics = mlContext.BinaryClassification .Evaluate(data: testSetTransform, labelColumnName: this.labelName, scoreColumnName: this.scoreName); break; } case MLType.LightGbm: { var fields = this.thisType .GetType() .GetProperties(BindingFlags.Public | BindingFlags.Instance) .Select(p => p.Name); //mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel") var featurePipeline = this.isFeaturesIncluded ? null : mlContext.Transforms.Concatenate("Features", fields.ToArray()); var trainer = mlContext.Regression.Trainers .LightGbm(new LightGbmRegressionTrainer.Options() { NumberOfIterations = 100, LearningRate = 0.3227682f, NumberOfLeaves = 55, MinimumExampleCountPerLeaf = 10, UseCategoricalSplit = false, HandleMissingValue = true, UseZeroAsMissingValue = false, MinimumExampleCountPerGroup = 50, MaximumCategoricalSplitPointCount = 32, CategoricalSmoothing = 20, L2CategoricalRegularization = 5, Booster = new GradientBooster.Options() { L2Regularization = 0, L1Regularization = 0.5 }, LabelColumnName = this.labelName, FeatureColumnName = "Features" }); var pipeline2 = featurePipeline == null ? null : featurePipeline.Append(trainer); if (pipeline2 == null) { trainedModel = trainer.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); var crossValidationResults = mlContext.Regression .CrossValidate(trainingDataView, trainer, numberOfFolds: 5, labelColumnName: this.labelName); } else { trainedModel = pipeline2.Fit(dataSplit.TrainSet); mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName); IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet); var crossValidationResults = mlContext.Regression .CrossValidate(trainingDataView, pipeline2, numberOfFolds: 5, labelColumnName: this.labelName); } //this.modelMetrics = mlContext.Regression //.Evaluate(data: testSetTransform, // labelColumnName: this.labelName, // scoreColumnName: this.scoreName); break; } } //var msg = $"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" + // $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}" + // $"{Environment.NewLine}" + // $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" + // $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" + // $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" + // $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}"; this.isTaught = true; return(this); }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true); DataOperationsCatalog.TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2); IDataView trainingData = trainTestSplit.TrainSet; IDataView testData = trainTestSplit.TestSet; // STEP 2: Common data process configuration with pipeline data transformations TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentIssue.Text)); // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2); //Peak the transformed features column //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1); // STEP 3: Set the training algorithm, then create and config the modelBuilder SdcaLogisticRegressionBinaryTrainer trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"); EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer); //Measure training time Stopwatch watch = Stopwatch.StartNew(); // STEP 4: Train the model fitting to the DataSet Console.WriteLine("=============== Training the model ==============="); ITransformer trainedModel = trainingPipeline.Fit(trainingData); //Stop measuring time watch.Stop(); long elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****"); // STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); IDataView predictions = trainedModel.Transform(testData); CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score"); ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }