public ITransformer Train(IDataView trainingData) { TrainedModel = _trainingPipeline.Fit(trainingData); PredictionFunction = TrainedModel.MakePredictionFunction <DemandObservation, DemandPrediction>(_mlcontext); return(TrainedModel); }
public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); var samples = new List <DataPoint>() { new DataPoint() { Label = 3, Features = new float[3] { 1, 1, 0 } }, new DataPoint() { Label = 32, Features = new float[3] { 0, float.NaN, 1 } }, new DataPoint() { Label = float.NaN, Features = new float[3] { -1, float.NaN, -3 } }, }; // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // IndicateMissingValues is used to create a boolean containing // 'true' where the value in the input column is NaN. This value can be used // to replace missing values with other values. IEstimator <ITransformer> pipeline = mlContext.Transforms.IndicateMissingValues("MissingIndicator", "Features"); // Now we can transform the data and look at the output to confirm the behavior of the estimator. // This operation doesn't actually evaluate data until we read the data below. var tansformer = pipeline.Fit(data); var transformedData = tansformer.Transform(data); // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. var rowEnumerable = mlContext.Data.CreateEnumerable <SampleDataTransformed>(transformedData, reuseRowObject: false); // a small printing utility Func <object[], string> vectorPrinter = (object[] vector) => { string preview = "["; foreach (var slot in vector) { preview += $"{slot} "; } return(preview += "]"); }; // And finally, we can write out the rows of the dataset, looking at the columns of interest. foreach (var row in rowEnumerable) { Console.WriteLine($"Label: {row.Label} Features: {vectorPrinter(row.Features.Cast<object>().ToArray())} MissingIndicator: {vectorPrinter(row.MissingIndicator.Cast<object>().ToArray())}"); } // Expected output: // // Label: 3 Features: [1 1 0] MissingIndicator: [False False False] // Label: 32 Features: [0 NaN 1] MissingIndicator: [False True False] // Label: NaN Features: [-1 NaN -3 ] MissingIndicator: [False True False] }
public void IrisLightGbm() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // https://github.com/dotnet/machinelearning/issues/4156 return; } var mlContext = new MLContext(seed: 1); var connectionString = GetConnectionString(TestDatasets.irisDb.name); var commandText = $@"SELECT * FROM ""{TestDatasets.irisDb.trainFilename}"""; var loaderColumns = new DatabaseLoader.Column[] { new DatabaseLoader.Column() { Name = "Label", Type = DbType.Int32 }, new DatabaseLoader.Column() { Name = "SepalLength", Type = DbType.Single }, new DatabaseLoader.Column() { Name = "SepalWidth", Type = DbType.Single }, new DatabaseLoader.Column() { Name = "PetalLength", Type = DbType.Single }, new DatabaseLoader.Column() { Name = "PetalWidth", Type = DbType.Single } }; var loader = mlContext.Data.CreateDatabaseLoader(loaderColumns); var databaseSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, commandText); var trainingData = loader.Load(databaseSource); IEstimator <ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey("Label") .Append(mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")) .Append(mlContext.MulticlassClassification.Trainers.LightGbm()) .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); var model = pipeline.Fit(trainingData); var engine = mlContext.Model.CreatePredictionEngine <IrisData, IrisPrediction>(model); Assert.Equal(0, engine.Predict(new IrisData() { SepalLength = 4.5f, SepalWidth = 5.6f, PetalLength = 0.5f, PetalWidth = 0.5f, }).PredictedLabel); Assert.Equal(1, engine.Predict(new IrisData() { SepalLength = 4.9f, SepalWidth = 2.4f, PetalLength = 3.3f, PetalWidth = 1.0f, }).PredictedLabel); }
public void TrainModel(string testImagePath = null) { #region Notes: Fundamental components /* Main components: * IDataView, * ITransformer, * IEstimator */ //IDataView demoDataView; //ITransformer demoITransformer; //IEstimator<ITransformer> demoIEstimator; #endregion Notes: Fundamental components #region Notes: Conventional column names /* Conventional column names: * Input: * Label * Features * Output: * PredictedLabel * Score */ #endregion Notes: Conventional column names #region Notes: Usual training process /* Usual training process: * 1. Load training/test datasets (IDataView) * 2. Build training pipeline (IEstimator) * 2.1 Construct preProcessing pipeline (IEstimator) (optional) * 2.2 Configure trainer (IEstimator) * 2.3 Construct postProcessing pipeline (optional) * 2.4 Construct training pipeline (preProcessing pipelin + trainer + postProcessing pipline * 3. Train model using training dataset (ITransformer) * 4. Evaluate model perfomance * 4.1 Make predictions on test data using trained model (IDataView) * 4.2 Compute evaluation metrics (Metrics staticsitcs) * (optional) Retrain on full dataset (Itransformer) * 5. Save model to filesystem * 6. Make single prediction */ #endregion Notes: Usual training process // Load data IDataView imagesInfo = LoadData(_dataFolder); imagesInfo = mlContext.Data.ShuffleRows(imagesInfo); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(imagesInfo, testFraction: 0.2); // Pre processing IEstimator <ITransformer> e_preProcessing_readImageBytes = mlContext.Transforms.LoadRawImageBytes( inputColumnName: nameof(ImageFileInputModel.ImagePath), outputColumnName: nameof(ImageInputModel.Image), imageFolder: _dataFolder); IEstimator <ITransformer> e_preProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapValueToKey( inputColumnName: nameof(BaseInputModel.Label), outputColumnName: "LabelAsKey", keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue); ITransformer t_preProcessing_labelKeyMapping = e_preProcessing_labelKeyMapping.Fit(imagesInfo); ITransformer t_preProcessing_readImageBytes = e_preProcessing_readImageBytes.Fit(imagesInfo); ITransformer t_preProcessingPipeline = t_preProcessing_labelKeyMapping.Append(t_preProcessing_readImageBytes); // Core Model training pipeline IDataView testSetTransformed = t_preProcessingPipeline.Transform(dataSplit.TestSet); ImageClassificationTrainer.Options trainerSettings = new ImageClassificationTrainer.Options { FeatureColumnName = nameof(ImageInputModel.Image), LabelColumnName = "LabelAsKey", Arch = ImageClassificationTrainer.Architecture.ResnetV2101, Epoch = 100, BatchSize = 200, LearningRate = 0.05f, MetricsCallback = (m) => Console.WriteLine(m), ValidationSet = testSetTransformed, WorkspacePath = _workspaceFolder }; IEstimator <ITransformer> e_trainer = mlContext.MulticlassClassification.Trainers.ImageClassification(trainerSettings); IEstimator <ITransformer> e_postProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapKeyToValue( inputColumnName: "PredictedLabel", outputColumnName: nameof(PredictionModel.PredictedLabel)); IEstimator <ITransformer> trainingPipeline = e_trainer.Append(e_postProcessing_labelKeyMapping); // Train #region Notes: On metadata /* * Metadata source: https://aka.ms/mlnet-resources/resnet_v2_101_299.meta * System.IO.Path.GetTempPath() - C:\Users\User\AppData\Local\Temp\ */ #endregion ITransformer trainedModel = Train(trainingPipeline, t_preProcessingPipeline.Transform(dataSplit.TrainSet)); #region Notes: Model composition //var extractPixelsEst = mlContext.Transforms.ExtractPixels(); //var resizeEst = mlContext.Transforms.ResizeImages(); //IEstimator<ITransformer> est = mlContext.Model.LoadTensorFlowModel("MODEL_PATH") //.ScoreTensorFlowModel( //outputColumnNames: new[] { "some-name" }, //inputColumnNames: new[] { "Features" }, addBatchDimensionInput: true); #endregion Model composition // Evaluate/Save FileSystemModel ITransformer fileSystemModel = t_preProcessingPipeline.Append(trainedModel); Evaluate(fileSystemModel, dataSplit.TestSet); SaveModel(fileSystemModel, new DataViewSchema.Column[] { imagesInfo.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)), imagesInfo.Schema.First(x => x.Name == nameof(BaseInputModel.Label)) }, ResolveModelFileName("fromFile")); // Evaluate/Save InMemoryModel IDataView testSetImageExtracted = t_preProcessing_readImageBytes.Transform(dataSplit.TrainSet); ITransformer inMemoryModel = t_preProcessing_labelKeyMapping.Append(trainedModel); Evaluate(inMemoryModel, testSetImageExtracted); SaveModel(inMemoryModel, new DataViewSchema.Column[] { testSetImageExtracted.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)), testSetImageExtracted.Schema.First(x => x.Name == nameof(BaseInputModel.Label)) }, ResolveModelFileName("inMemory")); //Try a single prediction if (!string.IsNullOrWhiteSpace(testImagePath)) { MakeSinglePrediction(testImagePath); } }
private ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator <ITransformer> trainingPipeline) { ITransformer model = trainingPipeline.Fit(trainingDataView); return(model); }
protected virtual ITransformer BuildAndTrainModel(IDataView trainingData, IEstimator <ITransformer> pipeline) { return(pipeline.Fit(trainingData)); }
static void Main(string[] args) { Helper.PrintLine("创建 MLContext..."); MLContext mlContext = new MLContext(seed: 0); ITransformer model; IDataView testDataView = mlContext.Data.LoadFromTextFile <MovieRating>(TestingDataPath, hasHeader: true, separatorChar: ','); if (File.Exists(ModelPath)) { Helper.PrintLine("加载神经网络模型..."); model = mlContext.Model.Load(ModelPath, out DataViewSchema inputScema); } else { // 数据集合 IDataView trainingDataView = mlContext.Data.LoadFromTextFile <MovieRating>(TrainingDataPath, hasHeader: true, separatorChar: ','); // 创建神经网络管道 Helper.PrintLine("创建神经网络管道..."); IEstimator <ITransformer> estimator = mlContext.Transforms.Conversion .MapValueToKey(outputColumnName: "userIdEncoded", inputColumnName: "userId") .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "movieIdEncoded", inputColumnName: "movieId")) .Append(mlContext.Recommendation().Trainers.MatrixFactorization( new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = "userIdEncoded", MatrixRowIndexColumnName = "movieIdEncoded", LabelColumnName = "Label", NumberOfIterations = 20, ApproximationRank = 100 })); // 开始训练神经网络 Helper.PrintSplit(); Helper.PrintLine("开始训练神经网络..."); model = estimator.Fit(trainingDataView); Helper.PrintLine("训练神经网络完成"); Helper.PrintSplit(); Helper.PrintLine($"导出神经网络模型..."); mlContext.Model.Save(model, trainingDataView.Schema, ModelPath); } // 预测 Helper.PrintLine("预测:"); var prediction = model.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(prediction, labelColumnName: "Label", scoreColumnName: "Score"); Helper.PrintLine($"R^2: {metrics.RSquared:0.##}"); Helper.PrintLine($"RMS error: {metrics.RootMeanSquaredError:0.##}"); var predictionEngine = mlContext.Model.CreatePredictionEngine <MovieRating, MovieRatingPrediction>(model); var testInput = new MovieRating { userId = 6, movieId = 10 }; var movieRatingPrediction = predictionEngine.Predict(testInput); if (Math.Round(movieRatingPrediction.Score, 1) > 3.5) { Helper.PrintLine($"Movie {testInput.movieId} is recommended for user {testInput.userId}"); } else { Helper.PrintLine($"Movie {testInput.movieId} is not recommended for user {testInput.userId}"); } Helper.Exit(0); }
public void Train(string trainingFileName, string testFileName) { if (!File.Exists(trainingFileName)) { Console.WriteLine($"Failed to find training data file ({trainingFileName}"); return; } if (!File.Exists(testFileName)) { Console.WriteLine($"Failed to find test data file ({testFileName}"); return; } var trainingDataView = MlContext.Data.LoadFromTextFile <CarInventory>(trainingFileName, ',', hasHeader: false); IEstimator <ITransformer> dataProcessPipeline = MlContext.Transforms.Concatenate("Features", typeof(CarInventory).ToPropertyList <CarInventory>(nameof(CarInventory.Label))) .Append(MlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features", outputColumnName: "FeaturesNormalizedByMeanVar")); var dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2); var trainer = MlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(CarInventory.Label), featureColumnName: "FeaturesNormalizedByMeanVar", numberOfLeaves: 2, numberOfTrees: 1000, minimumExampleCountPerLeaf: 1, learningRate: 0.2); var trainingPipeline = dataProcessPipeline.Append(trainer); var trainedModel = trainingPipeline.Fit(trainingDataView); MlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath); var evaluationPipeline = trainedModel.Append(MlContext.Transforms .CalculateFeatureContribution(trainedModel.LastTransformer) .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView))); var testDataView = MlContext.Data.LoadFromTextFile <CarInventory>(testFileName, ',', hasHeader: false); var testSetTransform = evaluationPipeline.Transform(testDataView); var modelMetrics = MlContext.BinaryClassification.Evaluate(data: testSetTransform, labelColumnName: nameof(CarInventory.Label), scoreColumnName: "Score"); Console.WriteLine($"Accuracy: {modelMetrics.Accuracy:P2}"); Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}"); Console.WriteLine($"Area under Precision recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}"); Console.WriteLine($"F1Score: {modelMetrics.F1Score:P2}"); Console.WriteLine($"LogLoss: {modelMetrics.LogLoss:#.##}"); Console.WriteLine($"LogLossReduction: {modelMetrics.LogLossReduction:#.##}"); Console.WriteLine($"PositivePrecision: {modelMetrics.PositivePrecision:#.##}"); Console.WriteLine($"PositiveRecall: {modelMetrics.PositiveRecall:#.##}"); Console.WriteLine($"NegativePrecision: {modelMetrics.NegativePrecision:#.##}"); Console.WriteLine($"NegativeRecall: {modelMetrics.NegativeRecall:P2}"); }
static void Main(string[] args) { //############################################################### //INICIALIZACIÓN DEL PROCESO //############################################################### //Inicialización de mlContext; utilización del seed para replicidad MLContext mlContext = new MLContext(seed: 1); //Definición de las clases de los datos de entrada: // -Clase Observaciones: TransactionObservation //Carga de datos IDataView originalFullData = mlContext.Data.LoadFromTextFile <TransactionObservation>( _DataPath, separatorChar: ';', hasHeader: true); //############################################################### //CONSTRUYE EL CONJUNTO DE DATOS (DATASET) //############################################################### //División del IDataView originalFullData: // -entrenamiento (trainingDataView): 70% // -testeo (testDataView): 20% // -Consumo (ConsumoDataView): 10% //Split dataset: train = 0.7 + test_Consumo = 0.3 double testFraction = 0.3; TrainTestData Split_TrainTestConsumoData = mlContext.Data.TrainTestSplit(originalFullData, testFraction: testFraction, seed: 1); IDataView trainingDataView = Split_TrainTestConsumoData.TrainSet; IDataView testConsumoData = Split_TrainTestConsumoData.TestSet; //Split dataset tes_val: test = 0.7 (0.7*0.3 = 0.21) + val = 0.3 (0.3*0.3 = 0.09) testFraction = 0.3; TrainTestData Split_TestConsumoData = mlContext.Data.TrainTestSplit(testConsumoData, testFraction: testFraction, seed: 1); IDataView testDataView = Split_TestConsumoData.TrainSet; IDataView ConsumoDataView = Split_TestConsumoData.TestSet; //save train split using (var fileStream = File.Create(_salida_trainDataPath)) { mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //save test split using (var fileStream = File.Create(_salida_testDataPath)) { mlContext.Data.SaveAsText(testDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //save Consumo split using (var fileStream = File.Create(_salida_ConsumoDataPath)) { mlContext.Data.SaveAsText(ConsumoDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DE VARIABLES //############################################################### //Suprimimos del esquema IDataView lo que no seleccionemos como features string[] featureColumnNames = trainingDataView.Schema.AsQueryable() .Select(column => column.Name) .Where(name => name != "Label" && //atributo de salida name != "Time") //no aporta información .ToArray(); //############################################################### //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline //############################################################### //Concatena IEstimator <ITransformer> pipeline = mlContext.Transforms.Concatenate("Features", featureColumnNames) //Surpime del IDataView .Append(mlContext.Transforms.DropColumns(new string[] { "Time" })) //Normalizado de las Features .Append(mlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features", outputColumnName: "FeaturesNormalized")); //Guardar dataset transformedData --> Validación Cruzada IDataView transformedData = pipeline.Fit(trainingDataView).Transform(trainingDataView); using (var fileStream = File.Create(_salida_transformationData)) { mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DE ALGORITMOS DE ENTRENAMIENTO --> trainingPipeline //############################################################### //*************************************************************** //1. SVM (Suport Vector Machine) //*************************************************************** var trainer_svm = mlContext.BinaryClassification.Trainers .LinearSvm(labelColumnName: "Label", featureColumnName: "FeaturesNormalized", numberOfIterations: 10); //Se añade el Algoritmo al pipeline de transformación de datos IEstimator <ITransformer> trainingPipeline_svm = pipeline.Append(trainer_svm); //*************************************************************** //2. GBA (Gradient Boosting Algorithm) //*************************************************************** var trainer_boost = mlContext.BinaryClassification.Trainers .FastTree(labelColumnName: "Label", featureColumnName: "FeaturesNormalized", numberOfLeaves: 20, numberOfTrees: 100, minimumExampleCountPerLeaf: 10, learningRate: 0.2); //Se añade el Algoritmo al pipeline de transformación de datos IEstimator <ITransformer> trainingPipeline_boost = pipeline.Append(trainer_boost); //############################################################### //ENTRENAMIENTO DE LOS MODELOS //############################################################### Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo SVM "); Console.WriteLine($"*-----------------------------------------------------------"); var watch_svm = System.Diagnostics.Stopwatch.StartNew(); var model_svm = trainingPipeline_svm.Fit(trainingDataView); watch_svm.Stop(); var elapseds_svm = watch_svm.ElapsedMilliseconds * 0.001; Console.WriteLine($"El entrenamiento SVM ha tardado: {elapseds_svm:#.##} s\n"); Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GBA "); Console.WriteLine($"*-----------------------------------------------------------"); var watch_boost = System.Diagnostics.Stopwatch.StartNew(); var model_boost = trainingPipeline_boost.Fit(trainingDataView); watch_boost.Stop(); var elapseds_boost = watch_boost.ElapsedMilliseconds * 0.001; Console.WriteLine($"El entrenamiento GBA ha tardado: {elapseds_boost:#.##} s\n"); //############################################################### //EVALUACIÓN DE LOS MODELOS //############################################################### //Transformación del IDataView testDataView a paritr de ambos Modelos var predictions_svm = model_svm.Transform(testDataView); var predictions_boost = model_boost.Transform(testDataView); //Calculo de las métricas de cada Modelo var metrics_svm = mlContext.BinaryClassification //SVM es un Modelo no basado en PROBABILIDAD -> NonCalibrated .EvaluateNonCalibrated(data: predictions_svm, labelColumnName: "Label", scoreColumnName: "Score"); var metrics_boost = mlContext.BinaryClassification .Evaluate(data: predictions_boost, labelColumnName: "Label", scoreColumnName: "Score"); //Muestra las métricas SVM Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo SVM "); Console.WriteLine($"*-----------------------------------------------------------"); Console.WriteLine($"* SVM Positive Precision: {metrics_svm.PositivePrecision:0.##}"); Console.WriteLine($"* SVM Positive Recall: {metrics_svm.PositiveRecall:0.##}"); Console.WriteLine($"* SVM Negative Precision: {metrics_svm.NegativePrecision:0.##}"); Console.WriteLine($"* SVM Negative Recall: {metrics_svm.NegativeRecall:0.##}"); Console.WriteLine($"* SVM Accuracy: {metrics_svm.Accuracy:P2}"); Console.WriteLine($"* SVM F1Score: {metrics_svm.F1Score:P2}\n"); //Muestra las métricas GBA Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GBA "); Console.WriteLine($"*-----------------------------------------------------------"); Console.WriteLine($"* GBA Positive Precision: {metrics_boost.PositivePrecision:0.##}"); Console.WriteLine($"* GBA Positive Recall: {metrics_boost.PositiveRecall:0.##}"); Console.WriteLine($"* GBA Negative Precision: {metrics_boost.NegativePrecision:0.##}"); Console.WriteLine($"* GBA Negative Recall: {metrics_boost.NegativeRecall:0.##}"); Console.WriteLine($"* GBA Accuracy: {metrics_boost.Accuracy:P2}"); Console.WriteLine($"* GBA F1Score: {metrics_boost.F1Score:P2}\n"); //############################################################### //VALIDACIÓN CRUZADA //############################################################### Console.WriteLine($"\n*****************************************"); Console.WriteLine($"* Validación Cruzada del Algoritmo SVM "); Console.WriteLine($"*----------------------------------------"); var watch_CV_SVM = System.Diagnostics.Stopwatch.StartNew(); var cvResults_svm = mlContext.BinaryClassification //SVM es un Modelo no basado en PROBABILIDAD -> NonCalibrated .CrossValidateNonCalibrated( transformedData, trainer_svm, numberOfFolds: 10, labelColumnName: "Label"); watch_CV_SVM.Stop(); var elapseds_CV_SVM = watch_CV_SVM.ElapsedMilliseconds * 0.001; Console.WriteLine($"La Validación Cruzada del Algoritmo SVM ha tardado: {elapseds_CV_SVM:#.##} s\n"); //Vamos a supervisar el resultado de la Validación Cruzada para la métrica: F1 Score Double[] F1_models = cvResults_svm .OrderByDescending(fold => fold.Metrics.F1Score) .Select(fold => fold.Metrics.F1Score) .ToArray(); //Calculamos la media del F1 Score Double media_F1 = F1_models.Average(); //Vamos a supervisar el resultado de la Validación Cruzada para la métrica: Accuracy Double[] Accu_models = cvResults_svm .OrderByDescending(fold => fold.Metrics.F1Score) .Select(fold => fold.Metrics.Accuracy) .ToArray(); //Calculamos la media del Accuracy Double media_Accu = Accu_models.Average(); //Mostramos métricas y media Console.WriteLine($"\n**********************************************************"); Console.WriteLine($"* Resultado de la Validación Cruzada del Algoritmo SVM "); Console.WriteLine($"*---------------------------------------------------------"); Console.WriteLine($"| MODEL_N | MEDIDA F1 SCORE | MEDIDA ACCURACY |"); Console.WriteLine($"| Model_1 | {F1_models[0]:P2} | {Accu_models[0]:P2} |"); Console.WriteLine($"| Model_2 | {F1_models[1]:P2} | {Accu_models[1]:P2} |"); Console.WriteLine($"| Model_3 | {F1_models[2]:P2} | {Accu_models[2]:P2} |"); Console.WriteLine($"| Model_4 | {F1_models[3]:P2} | {Accu_models[3]:P2} |"); Console.WriteLine($"| Model_5 | {F1_models[4]:P2} | {Accu_models[4]:P2} |"); Console.WriteLine($"| Model_6 | {F1_models[5]:P2} | {Accu_models[5]:P2} |"); Console.WriteLine($"| Model_7 | {F1_models[6]:P2} | {Accu_models[6]:P2} |"); Console.WriteLine($"| Model_8 | {F1_models[7]:P2} | {Accu_models[7]:P2} |"); Console.WriteLine($"| Model_9 | {F1_models[8]:P2} | {Accu_models[8]:P2} |"); Console.WriteLine($"| Model_10 | {F1_models[9]:P2} | {Accu_models[9]:P2} |"); Console.WriteLine($"La F1 Score media es igual a: {media_F1:P2}"); Console.WriteLine($"La Accuracy media es igual a: {media_Accu:P2}\n"); //############################################################### //SELECCIÓN MODELO //############################################################### //Tomamos todos los Modelos calculados con la Validación Cruzada ITransformer[] models = cvResults_svm .OrderByDescending(fold => fold.Metrics.F1Score) .Select(fold => fold.Model) .ToArray(); //Tomamos el mejor Modelo ITransformer topModel = models[0]; //Guardamos el Modelo para su posterior consumo mlContext.Model.Save(model_svm, trainingDataView.Schema, _salida_modelPath); //###################################### //CONSUMO DEL MODELO //###################################### //Definición de las clases de las predicciones: // -Clase Predicciones: TransactionPrediction //Definimos CreatePredictionEngine de TransactionObservation --> TransactionPrediction a través de model_svm var predictionEngine = mlContext.Model .CreatePredictionEngine <TransactionObservation, TransactionPrediction>( model_svm); Console.WriteLine($"\n**********************************"); Console.WriteLine($"--- Predicción ConsumoDataView ---"); Console.WriteLine($"----------------------------------"); mlContext.Data.CreateEnumerable <TransactionObservation>(ConsumoDataView, reuseRowObject: false) .Select(ConsumoData => ConsumoData) .ToList() .ForEach(ConsumoData => { //Predict() predicción única instancia var prediction = predictionEngine.Predict(ConsumoData); Console.WriteLine($"Label: {prediction.Label:.##}"); Console.WriteLine($"Predicted Label: {prediction.PredictedLabel:#.##}"); //SVM no está basado en determinar Probabilidad //Console.WriteLine("Probability: {prediction.Probability:#.##}"); Console.WriteLine($"Score: {prediction.Score:.##}"); Console.WriteLine($"-------------------"); }); }
//calibrated version below public static IReadOnlyList <TrainCatalogBase.CrossValidationResult <CalibratedBinaryClassificationMetrics> > BuildAndTrain(MLContext mlContext, IDataView splitTrainSet) //public static IReadOnlyList<TrainCatalogBase.CrossValidationResult<BinaryClassificationMetrics>> BuildAndTrain(MLContext mlContext, IDataView splitTrainSet) { //var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(QuestionData.QuestionText)) //transformedData used to be pipeline when algorithm was in it also IEstimator <ITransformer> transformedDataStage1 = mlContext.Transforms.Text.FeaturizeText(inputColumnName: "UserAbility", outputColumnName: "UserAbilityFeaturized") .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "QuestionDifficulty", outputColumnName: "QuestionDifficultyFeaturized") .Append(mlContext.Transforms.Concatenate("Features", "UserAbilityFeaturized", "QuestionDifficultyFeaturized"))); //used below to help with stop watch //https://docs.microsoft.com/en-us/dotnet/api/system.diagnostics.stopwatch?redirectedfrom=MSDN&view=netframework-4.8 Console.WriteLine("===================== Starting Stopwatch ===================="); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); //what does the fit method do? These two methods involve transforming the data for use in the algorithm, unspecific though var dataPrepTransformer = transformedDataStage1.Fit(splitTrainSet); transformedDataStage2 = dataPrepTransformer.Transform(splitTrainSet); Console.WriteLine(splitTrainSet.Schema.ToString()); //LogReg Stopchastic used as this was given in the sample, which used calibrated model, will change to SVM IEstimator <ITransformer> svmLinAlg = mlContext.BinaryClassification.Trainers.Prior(); var cvResults = mlContext.BinaryClassification.CrossValidate(transformedDataStage2, svmLinAlg, numberOfFolds: 10); //svm linear now used //IEstimator<ITransformer> svmLinAlg = mlContext.BinaryClassification.Trainers.FastForest(); //var cvResults = mlContext.BinaryClassification.CrossValidateNonCalibrated(transformedDataStage2, svmLinAlg, numberOfFolds: 10); //Console.WriteLine("=============== Create and Train the Model ==============="); //Console.WriteLine("=============== End of training ==============="); //Console.WriteLine(); //the cvResults object will contain a lot of things: // 1. TrainTestData object for each fold of data // 2. a model for each fold // 3. a metric for each fold Console.WriteLine("===================== Stopwatch Stopped========================="); stopWatch.Stop(); TimeSpan ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10); Console.WriteLine($"Time taken (hh:mm:ss:msms): {elapsedTime}"); return(cvResults); }
static void Main() { MLContext mlContext = new MLContext(); // 1. Uvezi ili stvori trening podatke HouseData[] houseData = { new HouseData { Size = 1.1F, Price = 1.2F }, new HouseData { Size = 1.9F, Price = 2.3F }, new HouseData { Size = 2.8F, Price = 3.0F }, new HouseData { Size = 3.4F, Price = 3.7F } }; IDataView trainingData = mlContext.Data.LoadFromEnumerable(houseData); // 2. Specificiraj pipeline za pripremu podataka i trening IEstimator <ITransformer> pipeline = mlContext .Transforms .Concatenate( outputColumnName: "Features", inputColumnNames: new[] { "Size" }) .Append(mlContext.Regression.Trainers.Sdca( labelColumnName: "Price", featureColumnName: "Features", maximumNumberOfIterations: 100)); // 3. Treniraj model ITransformer model = pipeline.Fit(trainingData); // 4. Testiraj model HouseData[] testHouseData = { new HouseData { Size = 1.1F, Price = 0.98F }, new HouseData { Size = 1.9F, Price = 2.1F }, new HouseData { Size = 2.8F, Price = 2.9F }, new HouseData { Size = 3.4F, Price = 3.6F } }; IDataView testHouseDataView = mlContext.Data.LoadFromEnumerable(testHouseData); IDataView testPriceDataView = model.Transform(testHouseDataView); var debug = testPriceDataView.Preview(); var metrics = mlContext.Regression.Evaluate( testPriceDataView, labelColumnName: "Price"); Console.WriteLine($"R^2: {metrics.RSquared:0.##}"); Console.WriteLine($"RMS error: {metrics.RootMeanSquaredError:0.##}"); // 5. Spremi model mlContext.Model.Save(model, trainingData.Schema, "model.zip"); // 6. Radi predikciju MakePrediction(); }
public List <FeatureImportance> ComputePermutationMetrics(string trainingDataPath) { IEstimator <ITransformer> pipeline = MLContext.Transforms.ReplaceMissingValues( outputColumnName: "FixedAcidity", replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean) .Append(MLContext.Transforms.Concatenate("Features", new[] { "FixedAcidity", "VolatileAcidity", "CitricAcid", "ResidualSugar", "Chlorides", "FreeSulfurDioxide", "TotalSulfurDioxide", "Density", "Ph", "Sulphates", "Alcohol" })) .Append(MLContext.Transforms.NormalizeMeanVariance("Features")); var trainData = MLContext.Data.LoadFromTextFile <FeatureImportanceData>( path: trainingDataPath, separatorChar: ';', hasHeader: true); // Cache the data view in memory. For an iterative algorithm such as SDCA this makes a huge difference. trainData = MLContext.Data.Cache(trainData); var transformationModel = pipeline.Fit(trainData); // Prepare the data for the algorithm. var transformedData = transformationModel.Transform(trainData); // Choose a regression algorithm. var algorithm = MLContext.Regression.Trainers.Sdca(); // Train the model and score it on the transformed data. var regressionModel = algorithm.Fit(transformedData); // Calculate the PFI metrics. var permutationMetrics = MLContext.Regression.PermutationFeatureImportance( regressionModel, transformedData, permutationCount: 50); // List of evaluation metrics: // https://docs.microsoft.com/en-us/dotnet/machine-learning/resources/metrics var result = new List <FeatureImportance> { new FeatureImportance("FixedAcidity"), new FeatureImportance("VolatileAcidity"), new FeatureImportance("CitricAcid"), new FeatureImportance("ResidualSugar"), new FeatureImportance("Chlorides"), new FeatureImportance("FreeSulfurDioxide"), new FeatureImportance("TotalSulfurDioxide"), new FeatureImportance("Density"), new FeatureImportance("Ph"), new FeatureImportance("Sulphates"), new FeatureImportance("Alcohol") }; for (int i = 0; i < permutationMetrics.Length; i++) { result[i].R2Decrease = permutationMetrics[i].RSquared.Mean; } return(result); }
public IEnumerable <RatioAnalysis> Predict(IEnumerable <RatioAnalysis> data) { var mlContext = new MLContext(seed: 1); IEstimator <ITransformer> costofgoodsForcaster = mlContext.Forecasting.ForecastBySsa( outputColumnName: nameof(RatioAnalysisPrediction.Forecasted), inputColumnName: nameof(RatioAnalysis.CostOfGoods), // This is the column being forecasted. windowSize: 12, // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3. seriesLength: data.Count(), // This parameter specifies the number of data points that are used when performing a forecast. trainSize: data.Count(), // This parameter specifies the total number of data points in the input time series, starting from the beginning. horizon: 3, // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted. confidenceLevel: 0.75f, // Indicates the likelihood the real observed value will fall within the specified interval bounds. confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound), //This is the name of the column that will be used to store the lower interval bound for each forecasted value. confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value. IEstimator <ITransformer> inventoryForcaster = mlContext.Forecasting.ForecastBySsa( outputColumnName: nameof(RatioAnalysisPrediction.Forecasted), inputColumnName: nameof(RatioAnalysis.Inventory), // This is the column being forecasted. windowSize: 12, // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3. seriesLength: data.Count(), // This parameter specifies the number of data points that are used when performing a forecast. trainSize: data.Count(), // This parameter specifies the total number of data points in the input time series, starting from the beginning. horizon: 3, // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted. confidenceLevel: 0.75f, // Indicates the likelihood the real observed value will fall within the specified interval bounds. confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound), //This is the name of the column that will be used to store the lower interval bound for each forecasted value. confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value. IEstimator <ITransformer> turnoverForcaster = mlContext.Forecasting.ForecastBySsa( outputColumnName: nameof(RatioAnalysisPrediction.Forecasted), inputColumnName: nameof(RatioAnalysis.Turnover), // This is the column being forecasted. windowSize: 12, // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3. seriesLength: data.Count(), // This parameter specifies the number of data points that are used when performing a forecast. trainSize: data.Count(), // This parameter specifies the total number of data points in the input time series, starting from the beginning. horizon: 3, // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted. confidenceLevel: 0.75f, // Indicates the likelihood the real observed value will fall within the specified interval bounds. confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound), //This is the name of the column that will be used to store the lower interval bound for each forecasted value. confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value. // Fit the forecasting model to the specified product's data series. ITransformer costofgoodsTransformer = costofgoodsForcaster.Fit(mlContext.Data.LoadFromEnumerable(data)); ITransformer inventoryTransformer = inventoryForcaster.Fit(mlContext.Data.LoadFromEnumerable(data)); ITransformer turnoverTransformer = turnoverForcaster.Fit(mlContext.Data.LoadFromEnumerable(data)); // Create the forecast engine used for creating predictions. TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> inventoryEngine = inventoryTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext); TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> turneroverEngine = turnoverTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext); TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> costofgoodEngine = costofgoodsTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext); // Get the prediction; this will include the forecasted turnover for the next 3 months since this //the time period specified in the `horizon` parameter when the forecast estimator was originally created. var turnoverPrediction = turneroverEngine.Predict(); var costPrediction = costofgoodEngine.Predict(); var inventoryPrediction = inventoryEngine.Predict(); var last = data.Last(); var retVal = data.ToList(); for (int i = 0; i < turnoverPrediction.Forecasted.Count(); i++) { retVal.Add(new RatioAnalysis { Date = last.Date.AddMonths(i + 1), CostOfGoods = costPrediction.Forecasted[i], CostOfGoodsDelta = costPrediction.ConfidenceUpperBound[i] - costPrediction.Forecasted[i], Inventory = inventoryPrediction.Forecasted[i], InventoryDelta = inventoryPrediction.ConfidenceUpperBound[i] - inventoryPrediction.Forecasted[i], Turnover = turnoverPrediction.Forecasted[i], TurnoverDelta = turnoverPrediction.ConfidenceUpperBound[i] - turnoverPrediction.Forecasted[i] }); } return(retVal); }
/// <summary> /// Fits the training data to the training pipeline. /// </summary> /// <returns> The model </returns> private static ITransformer TrainModel() { return(TrainingPipeline.Fit(TrainingDataView)); }
public ITransformer TrainFeaturizeText() { var textColumns = new List <string>(); for (int i = 0; i < 20; i++) // Only load first 20 columns { textColumns.Add($"Column{i}"); } var featurizers = new List <TextFeaturizingEstimator>(); foreach (var textColumn in textColumns) { var featurizer = _mlContext.Transforms.Text.FeaturizeText(textColumn, new TextFeaturizingEstimator.Options() { CharFeatureExtractor = null, WordFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 2, MaximumNgramsCount = new int[] { 200000 } } }); featurizers.Add(featurizer); } IEstimator <ITransformer> pipeline = featurizers.First(); foreach (var featurizer in featurizers.Skip(1)) { pipeline = pipeline.Append(featurizer); } var model = pipeline.Fit(_dataset); // BENCHMARK OUTPUT // * Summary * //BenchmarkDotNet = v0.11.3, OS = Windows 10.0.18363 //Intel Xeon W - 2133 CPU 3.60GHz, 1 CPU, 12 logical and 6 physical cores //.NET Core SDK = 3.0.100 //[Host] : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), 64bit RyuJIT //Job - KDKCUJ : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), 64bit RyuJIT //Arguments =/ p:Configuration = Release Toolchain = netcoreapp2.1 IterationCount = 1 //LaunchCount = 3 MaxIterationCount = 20 RunStrategy = ColdStart //UnrollFactor = 1 WarmupCount = 1 // Method | Mean | Error | StdDev | Extra Metric | Gen 0 / 1k Op | Gen 1 / 1k Op | Gen 2 / 1k Op | Allocated Memory / Op | //------------------- | --------:| --------:| ---------:| -------------:| -------------:| ------------: | ------------: | --------------------: | // TrainFeaturizeText | 17.00 s | 6.337 s | 0.3474 s | - | 1949000.0000 | 721000.0000 | 36000.0000 | 315.48 MB | //// * Legends * // Mean : Arithmetic mean of all measurements // Error : Half of 99.9 % confidence interval // StdDev : Standard deviation of all measurements // Extra Metric: Value of the provided extra metric // Gen 0 / 1k Op : GC Generation 0 collects per 1k Operations // Gen 1 / 1k Op : GC Generation 1 collects per 1k Operations // Gen 2 / 1k Op : GC Generation 2 collects per 1k Operations // Allocated Memory/ Op : Allocated memory per single operation(managed only, inclusive, 1KB = 1024B) // 1 s: 1 Second(1 sec) //// * Diagnostic Output - MemoryDiagnoser * //// ***** BenchmarkRunner: End ***** // Run time: 00:01:52(112.92 sec), executed benchmarks: 1 //// * Artifacts cleanup * // Global total time: 00:01:59(119.89 sec), executed benchmarks: 1 return(model); }
/// <summary> /// Fits the training data to the training pipeline. /// </summary> /// <returns> model </returns> public static ITransformer TrainModel() { ITransformer model = TrainingPipeline.Fit(TrainingDataView); return(model); }
// Build and train model public static ITransformer GenerateModel(MLContext mlContext) { // <SnippetImageTransforms> IEstimator <ITransformer> pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: _imagesFolder, inputColumnName: nameof(ImageData.ImagePath)) // The image transforms transform the images into the model's expected format. .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: InceptionSettings.ImageWidth, imageHeight: InceptionSettings.ImageHeight, inputColumnName: "input")) .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: InceptionSettings.ChannelsLast, offsetImage: InceptionSettings.Mean)) // </SnippetImageTransforms> // The ScoreTensorFlowModel transform scores the TensorFlow model and allows communication // <SnippetScoreTensorFlowModel> .Append(mlContext.Model.LoadTensorFlowModel(_inceptionTensorFlowModel). ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }, addBatchDimensionInput: true)) // </SnippetScoreTensorFlowModel> // <SnippetMapValueToKey> .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelKey", inputColumnName: "Label")) // </SnippetMapValueToKey> // <SnippetAddTrainer> .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "LabelKey", featureColumnName: "softmax2_pre_activation")) // </SnippetAddTrainer> // <SnippetMapKeyToValue> .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabelValue", "PredictedLabel")) .AppendCacheCheckpoint(mlContext); // </SnippetMapKeyToValue> // <SnippetLoadData> IDataView trainingData = mlContext.Data.LoadFromTextFile <ImageData>(path: _trainTagsTsv, hasHeader: false); // </SnippetLoadData> // Train the model Console.WriteLine("=============== Training classification model ==============="); // Create and train the model // <SnippetTrainModel> ITransformer model = pipeline.Fit(trainingData); // </SnippetTrainModel> // Generate predictions from the test data, to be evaluated // <SnippetLoadAndTransformTestData> IDataView testData = mlContext.Data.LoadFromTextFile <ImageData>(path: _testTagsTsv, hasHeader: false); IDataView predictions = model.Transform(testData); // Create an IEnumerable for the predictions for displaying results IEnumerable <ImagePrediction> imagePredictionData = mlContext.Data.CreateEnumerable <ImagePrediction>(predictions, true); DisplayResults(imagePredictionData); // </SnippetLoadAndTransformTestData> // Get performance metrics on the model using training data Console.WriteLine("=============== Classification metrics ==============="); // <SnippetEvaluate> MulticlassClassificationMetrics metrics = mlContext.MulticlassClassification.Evaluate(predictions, labelColumnName: "LabelKey", predictedLabelColumnName: "PredictedLabel"); // </SnippetEvaluate> //<SnippetDisplayMetrics> Console.WriteLine($"LogLoss is: {metrics.LogLoss}"); Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}"); //</SnippetDisplayMetrics> // <SnippetReturnModel> return(model); // </SnippetReturnModel> }
static void Main(string[] args) { //############################################################### //INICIALIZACIÓN DEL PROCESO //############################################################### //Inicialización de mlContext; utilización del seed para replicidad MLContext mlContext = new MLContext(seed: 1); //Definición de las clases de los datos de entrada: // -Clase Observaciones: BorderCrossObservation //Carga de datos IDataView originalFullData = mlContext.Data.LoadFromTextFile <BorderCrossObservation>( _DataPath, separatorChar: ';', hasHeader: true); //############################################################### //CONSTRUYE EL CONJUNTO DE DATOS (DATASET) //############################################################### //División del IDataView originalFullData: // -entrenamiento (trainingDataView): 80% // -testeo (testDataView): 20% //Selección de porcentaje para el test double testFraction = 0.2; //Aplicacón de la División TrainTestData Split_trainTestData = mlContext.Data.TrainTestSplit(originalFullData, testFraction: testFraction, seed: 1); //IDataView resultantes IDataView trainingDataView = Split_trainTestData.TrainSet; IDataView testDataView = Split_trainTestData.TestSet; //Guardar IDataView trainingDataView para una posible viasualización (extensión csv) using (var fileStream = File.Create(_salida_trainDataPath)) { mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //Guardar IDataView testDataView para una posible viasualización (extensión csv) using (var fileStream = File.Create(_salida_testDataPath)) { mlContext.Data.SaveAsText(testDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DE VARIABLES //############################################################### //Suprimimos del esquema IDataView lo que no seleccionemos como features var listfeatureColumnNames = trainingDataView.Schema.AsQueryable() .Select(column => column.Name) .Where(name => name != "Label" && //atributo de salida name != "Port_Name" && //solo existe un valor name != "State" && //un valor name != "Port_Code" && //un valor name != "Border" && //un valor name != "Longitud" && //un valor name != "Latitud" && //un valor name != "Mes" && //transformar name != "Year" && //transformar name != "Measure").ToList(); //transformar //Añadimos las Transformaciones de los atributos suprimidos anteriormente listfeatureColumnNames.Add("MesInd"); listfeatureColumnNames.Add("YearInd"); listfeatureColumnNames.Add("MeasureInd"); //Conversión a array para su posterior utlización string[] featureColumnNames = listfeatureColumnNames.ToArray(); //############################################################### //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline //############################################################### //Indicadoras IEstimator <ITransformer> pipeline = mlContext.Transforms.Categorical.OneHotEncoding( outputColumnName: "MesInd", inputColumnName: "Mes") //Indicadoras .Append(mlContext.Transforms.Categorical.OneHotEncoding( outputColumnName: "YearInd", inputColumnName: "Year")) //Indicadoras .Append(mlContext.Transforms.Categorical.OneHotEncoding( outputColumnName: "MeasureInd", inputColumnName: "Measure")) //Concatena .Append(mlContext.Transforms.Concatenate( "Features", featureColumnNames)) //Surpime del IDataView .Append(mlContext.Transforms.DropColumns( new string[] { "Mes", "Year", "Measure" })) //Normalizado del atributo de salida .Append(mlContext.Transforms.NormalizeMeanVariance( inputColumnName: "Label", outputColumnName: "LabelNormalized")); //Guardar datos transformedData IDataView transformedData = pipeline.Fit(trainingDataView).Transform(trainingDataView); using (var fileStream = File.Create(_salida_transformationData)) { mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DE ALGORITMOS DE ENTRENAMIENTO --> trainingPipeline //############################################################### //*************************************************************** //1. GAM (Generalized Additive Models) //*************************************************************** var trainer_gam = mlContext.Regression.Trainers .Gam(labelColumnName: "LabelNormalized", featureColumnName: "Features", learningRate: 0.02, numberOfIterations: 2100); //Se añade el Algoritmo al pipeline de transformación de datos IEstimator <ITransformer> trainingPipeline_gam = pipeline.Append(trainer_gam); //*************************************************************** //2. GBA (Gradient Boosting Algorithm) //*************************************************************** var trainer_boost = mlContext.Regression.Trainers .FastTree(labelColumnName: "LabelNormalized", featureColumnName: "Features", numberOfLeaves: 20, numberOfTrees: 100, minimumExampleCountPerLeaf: 10, learningRate: 0.2); //Se añade el Algoritmo al pipeline de transformación de datos IEstimator <ITransformer> trainingPipeline_boost = pipeline.Append(trainer_boost); //############################################################### //ENTRENAMIENTO DE LOS MODELOS //############################################################### Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GAM "); Console.WriteLine($"*-----------------------------------------------------------"); var watch_gam = System.Diagnostics.Stopwatch.StartNew(); var model_gam = trainingPipeline_gam.Fit(trainingDataView); watch_gam.Stop(); var elapseds_gam = watch_gam.ElapsedMilliseconds * 0.001; Console.WriteLine($"El entrenamiento GAM ha tardado: {elapseds_gam:#.##} s\n"); Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GBA "); Console.WriteLine($"*-----------------------------------------------------------"); var watch_boost = System.Diagnostics.Stopwatch.StartNew(); var model_boost = trainingPipeline_boost.Fit(trainingDataView); watch_boost.Stop(); var elapseds_boost = watch_boost.ElapsedMilliseconds * 0.001; Console.WriteLine($"El entrenamiento GBA ha tardado: {elapseds_boost:#.##} s\n"); //############################################################### //EVALUACIÓN DE LOS MODELOS //############################################################### //Transformación del IDataView testDataView a paritr de ambos modelos var predictions_gam = model_gam.Transform(testDataView); var predictions_boost = model_boost.Transform(testDataView); //Calculo de las métricas de cada Modelo var metrics_gam = mlContext.Regression .Evaluate(data: predictions_gam, labelColumnName: "LabelNormalized", scoreColumnName: "Score"); var metrics_boost = mlContext.Regression .Evaluate(data: predictions_boost, labelColumnName: "LabelNormalized", scoreColumnName: "Score"); //Muestra las métricas GAM Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GAM "); Console.WriteLine($"*-----------------------------------------------------------"); Console.WriteLine($"* GAM RSquared Score: {metrics_gam.RSquared:0.##}"); Console.WriteLine($"* GAM Root Mean Squared Error Score: {metrics_gam.RootMeanSquaredError:#.##}"); Console.WriteLine($"* GAM MAE Score: {metrics_gam.MeanAbsoluteError:#.##}"); Console.WriteLine($"* GAM MSE Score: {metrics_gam.MeanSquaredError:#.##}\n"); //Muestra las métricas GBA Console.WriteLine($"\n************************************************************"); Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GBA "); Console.WriteLine($"*-----------------------------------------------------------"); Console.WriteLine($"* GBA RSquared Score: {metrics_boost.RSquared:0.##}"); Console.WriteLine($"* GBA Root Mean Squared Error Score: {metrics_boost.RootMeanSquaredError:#.##}"); Console.WriteLine($"* GBA MAE Score: {metrics_boost.MeanAbsoluteError:#.##}"); Console.WriteLine($"* GBA MSE Score: {metrics_boost.MeanSquaredError:#.##}\n"); //############################################################### //SELECCIÓN DEL MEJOR MODELO //############################################################### //Guardamos el Modelo para su posterior consumo mlContext.Model.Save(model_boost, trainingDataView.Schema, _salida_modelPath); }
//------------------------------------------------------------------------------------------------------------------ // Основная функция по построению регрессионных моделей для каждого из массивов и по получению прогнозных значений public static void BuildPredictModel(ref float[] arrPredict1, ref float[] arrPredict2, ref float[] arrPredict3, ref float[] arrPredict4) { //Работа с первой моделью(первым массивом данных) //Подготовка данных для модели IDataView trainingDataView_1 = mlContext1.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH_1, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); IEstimator <ITransformer> dataProcessPipeline = mlContext1.Transforms.Concatenate("Features", new[] { "id" }) .Append(mlContext1.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext1); // Выбор тренировочного алгоритма var trainer = mlContext1.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options() { DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features" }); IEstimator <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer); // Тренировка модели ITransformer model1 = trainingPipeline.Fit(trainingDataView_1); // Оценка модели и вывод результатов оценки (для использования только в консоли) //var crossValidationResults = mlContext1.Regression.CrossValidate(trainingDataView_1, trainingPipeline, numberOfFolds: 5, labelColumnName: "name"); //PrintStatistics(crossValidationResults); // Сохранение модели mlContext1.Model.Save(model1, trainingDataView_1.Schema, GetAbsolutePath(MODEL_FILEPATH_1)); // Получение множественного прогноза (4 прогнозных значения) IDataView inputPredict = mlContext1.Data.LoadFromEnumerable <ModelInput>(inputModelData); IDataView predictions = model1.Transform(inputPredict); arrPredict1 = predictions.GetColumn <float>("Score").ToArray(); //------------------------------------------------------------------------------------------------------------------ // Работа со второй моделью (вторым массивом данных) // Подготовка данных для модели IDataView trainingDataView_2 = mlContext2.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH_2, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); IEstimator <ITransformer> dataProcessPipeline2 = mlContext2.Transforms.Concatenate("Features", new[] { "id" }) .Append(mlContext2.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext2); // Выбор тренировочного алгоритма var trainer2 = mlContext2.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options() { DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features" }); IEstimator <ITransformer> trainingPipeline2 = dataProcessPipeline2.Append(trainer2); // Тренировка модели ITransformer model2 = trainingPipeline2.Fit(trainingDataView_2); // Оценка модели и вывод результатов оценки (для использования только в консоли) //var crossValidationResults2 = mlContext2.Regression.CrossValidate(trainingDataView_2, trainingPipeline2, numberOfFolds: 5, labelColumnName: "name"); //PrintStatistics(crossValidationResults2); // Сохранение модели mlContext2.Model.Save(model2, trainingDataView_2.Schema, GetAbsolutePath(MODEL_FILEPATH_2)); // Получение множественного прогноза (4 прогнозных значения) IDataView inputPredict2 = mlContext2.Data.LoadFromEnumerable <ModelInput>(inputModelData); IDataView predictions2 = model2.Transform(inputPredict2); arrPredict2 = predictions2.GetColumn <float>("Score").ToArray(); //------------------------------------------------------------------------------------------------------------------ // Работа с третьей моделью (третьим массивом данных) // Подготовка данных для модели IDataView trainingDataView_3 = mlContext3.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH_3, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); IEstimator <ITransformer> dataProcessPipeline3 = mlContext3.Transforms.Concatenate("Features", new[] { "id" }) .Append(mlContext3.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext3); // Выбор тренировочного алгоритма var trainer3 = mlContext3.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options() { DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features" }); IEstimator <ITransformer> trainingPipeline3 = dataProcessPipeline3.Append(trainer3); // Тренировка модели ITransformer model3 = trainingPipeline3.Fit(trainingDataView_3); // Оценка модели и вывод результатов оценки (для использования только в консоли) //var crossValidationResults3 = mlContext3.Regression.CrossValidate(trainingDataView_3, trainingPipeline3, numberOfFolds: 5, labelColumnName: "name"); //PrintStatistics(crossValidationResults3); // Сохранение модели mlContext3.Model.Save(model3, trainingDataView_3.Schema, GetAbsolutePath(MODEL_FILEPATH_3)); // Получение множественного прогноза (4 прогнозных значения) IDataView inputPredict3 = mlContext3.Data.LoadFromEnumerable <ModelInput>(inputModelData); IDataView predictions3 = model3.Transform(inputPredict3); arrPredict3 = predictions3.GetColumn <float>("Score").ToArray(); //------------------------------------------------------------------------------------------------------------------ // Работа с четвертой моделью (четвертым массивом данных) // Подготовка данных для модели IDataView trainingDataView_4 = mlContext4.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH_4, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); IEstimator <ITransformer> dataProcessPipeline4 = mlContext4.Transforms.Concatenate("Features", new[] { "id" }) .Append(mlContext4.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext4); // Выбор тренировочного алгоритма var trainer4 = mlContext4.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options() { DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features" }); IEstimator <ITransformer> trainingPipeline4 = dataProcessPipeline4.Append(trainer4); // Тренировка модели ITransformer model4 = trainingPipeline4.Fit(trainingDataView_4); // Оценка модели и вывод результатов оценки (для использования только в консоли) //var crossValidationResults4 = mlContext4.Regression.CrossValidate(trainingDataView_4, trainingPipeline4, numberOfFolds: 5, labelColumnName: "name"); //PrintStatistics(crossValidationResults4); // Сохранение модели mlContext4.Model.Save(model4, trainingDataView_4.Schema, GetAbsolutePath(MODEL_FILEPATH_4)); // Получение множественного прогноза (4 прогнозных значения) IDataView inputPredict4 = mlContext4.Data.LoadFromEnumerable <ModelInput>(inputModelData); IDataView predictions4 = model4.Transform(inputPredict4); arrPredict4 = predictions4.GetColumn <float>("Score").ToArray(); }
private static ITransformer Train(IDataView trainingDataView, IEstimator <ITransformer> pipeLine) { // Train your model based on the data set return(pipeLine.Fit(trainingDataView)); }
public ITransformer Train(IDataView trainingData) { TrainedModel = _trainingPipeline.Fit(trainingData); return(TrainedModel); }
/// <summary> /// Train the model. /// </summary> /// <param name="trainingPipeline">Pipeline for model training.</param> /// <param name="trainData">Training data view.</param> /// <returns>The trained model.</returns> public static ITransformer TrainModel(IEstimator <ITransformer> trainingPipeline, IDataView trainData) { return(trainingPipeline.Fit(trainData)); }
//creates the training algorithm class, trains the model, predicts area from training data then returns model public static void TrainAndSaveModel(IDataView trainingDataView, IEstimator <ITransformer> pipeline) { var trainedModel = pipeline.Fit(trainingDataView); _mlContext.Model.Save(trainedModel, trainingDataView.Schema, _modelPath); }
static void Main(string[] args) { var mlContext = new MLContext(); IDataView attritionData = mlContext.Data.LoadFromTextFile <Employee>(path: "./data/attrition.csv", hasHeader: true, separatorChar: ','); var split = mlContext.Data.TrainTestSplit(attritionData, testFraction: 0.2); var trainData = split.TrainSet; var testData = split.TestSet; var numFields = attritionData.Schema.AsEnumerable() .Select(column => new { column.Name, column.Type }) .Where(column => (column.Name != nameof(Employee.Attrition)) && (column.Type.ToString() == "Single")) .ToArray(); var numFieldNames = numFields.AsEnumerable() .Select(column => column.Name) .ToList(); var oheFieldNames = new List <string>(); oheFieldNames.Add("OHE-" + nameof(Employee.BusinessTravel)); oheFieldNames.Add("OHE-" + nameof(Employee.Department)); oheFieldNames.Add("OHE-" + nameof(Employee.EducationField)); oheFieldNames.Add("OHE-" + nameof(Employee.MaritalStatus)); oheFieldNames.Add("OHE-" + nameof(Employee.JobLevel)); oheFieldNames.Add("OHE-" + nameof(Employee.JobRole)); oheFieldNames.Add("OHE-" + nameof(Employee.OverTime)); var allFeatureFields = new List <string>(); allFeatureFields.AddRange(oheFieldNames); string[] numFeatures = numFieldNames.ToArray(); allFeatureFields.AddRange(numFeatures); string[] allFeatureNames = allFeatureFields.ToArray(); IEstimator <ITransformer> featurizePipeline = mlContext.Transforms.Categorical.OneHotEncoding( new[] { new InputOutputColumnPair("OHE-" + nameof(Employee.BusinessTravel), nameof(Employee.BusinessTravel)), new InputOutputColumnPair("OHE-" + nameof(Employee.Department), nameof(Employee.Department)), new InputOutputColumnPair("OHE-" + nameof(Employee.EducationField), nameof(Employee.EducationField)), new InputOutputColumnPair("OHE-" + nameof(Employee.MaritalStatus), nameof(Employee.MaritalStatus)), new InputOutputColumnPair("OHE-" + nameof(Employee.JobLevel), nameof(Employee.JobLevel)), new InputOutputColumnPair("OHE-" + nameof(Employee.JobRole), nameof(Employee.JobRole)), new InputOutputColumnPair("OHE-" + nameof(Employee.OverTime), nameof(Employee.OverTime)) }, OneHotEncodingEstimator.OutputKind.Indicator); featurizePipeline = featurizePipeline.Append(mlContext.Transforms.Concatenate("Features", allFeatureNames)) .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")); ConsoleHelper.ConsoleWriteHeader("=============== Begin to train the model ==============="); var trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression( labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features"); /* ----- Tried with other trainers below and compared the outcome ------ */ // var trainer = mlContext.BinaryClassification.Trainers.LightGbm(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features"); // var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features"); // var trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features"); // var trainer = mlContext.BinaryClassification.Trainers.SgdCalibrated(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features"); /* ------------------------------------------------------------------- */ var trainPipeline = featurizePipeline.Append(trainer); var trainedModel = trainPipeline.Fit(trainData); ConsoleHelper.ConsoleWriteHeader("=============== Trained model successfully ==============="); /* * var viewTrainPipeline = mlContext.Transforms * .CalculateFeatureContribution(model.LastTransformer) * .Fit(dataPipeline.Fit(trainData).Transform(trainData)); */ Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); var testDataPredictions = trainedModel.Transform(testData); var evaluateMetrics = mlContext.BinaryClassification.Evaluate(data: testDataPredictions, labelColumnName: nameof(Employee.Attrition), scoreColumnName: "Score"); ConsoleHelper.PrintBinaryClassificationMetrics(trainedModel.ToString(), evaluateMetrics); Console.WriteLine("===== Permutation Test ====="); var permuteTestData = featurizePipeline.Fit(trainData).Transform(trainData); var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance( predictionTransformer: trainedModel.LastTransformer, data: permuteTestData, labelColumnName: nameof(Employee.Attrition), permutationCount: 50); var mapFields = new List <string>(); for (int i = 0; i < allFeatureNames.Count(); i++) { var slotField = new VBuffer <ReadOnlyMemory <char> >(); if (permuteTestData.Schema[allFeatureNames[i]].HasSlotNames()) { permuteTestData.Schema[allFeatureNames[i]].GetSlotNames(ref slotField); for (int j = 0; j < slotField.Length; j++) { mapFields.Add(allFeatureNames[i]); } } else { mapFields.Add(allFeatureNames[i]); } } // Now let's look at which features are most important to the model // overall. Get the feature indices sorted by their impact on AUC. var sortedIndices = permutationMetrics .Select((metrics, index) => new { index, metrics.AreaUnderRocCurve }) .OrderByDescending( feature => Math.Abs(feature.AreaUnderRocCurve.Mean)); foreach (var feature in sortedIndices) { Console.WriteLine($"{mapFields[feature.index],-20}|\t{Math.Abs(feature.AreaUnderRocCurve.Mean):F6}"); } }
// </SnippetDeclareGlobalVariables> static void Main(string[] args) { // Create MLContext to be shared across the model creation workflow objects // <SnippetCreateMLContext> MLContext mlContext = new MLContext(); // </SnippetCreateMLContext> // Dictionary to encode words as integers. // <SnippetCreateLookupMap> var lookupMap = mlContext.Data.LoadFromTextFile(Path.Combine(_modelPath, "imdb_word_index.csv"), columns: new[] { new TextLoader.Column("Words", DataKind.String, 0), new TextLoader.Column("Ids", DataKind.Int32, 1), }, separatorChar: ',' ); // </SnippetCreateLookupMap> // The model expects the input feature vector to be a fixed length vector. // This action resizes the variable length array generated by the lookup map // to a fixed length vector. If there are less than 600 words in the sentence, // the remaining indices will be filled with zeros. If there are more than // 600 words in the sentence, then the array is truncated at 600. // <SnippetResizeFeatures> Action <VariableLength, FixedLength> ResizeFeaturesAction = (s, f) => { var features = s.VariableLengthFeatures; Array.Resize(ref features, FeatureLength); f.Features = features; }; // </SnippetResizeFeatures> // Load the TensorFlow model. // <SnippetLoadTensorFlowModel> TensorFlowModel tensorFlowModel = mlContext.Model.LoadTensorFlowModel(_modelPath); // </SnippetLoadTensorFlowModel> // <SnippetGetModelSchema> DataViewSchema schema = tensorFlowModel.GetModelSchema(); Console.WriteLine(" =============== TensorFlow Model Schema =============== "); var featuresType = (VectorDataViewType)schema["Features"].Type; Console.WriteLine($"Name: Features, Type: {featuresType.ItemType.RawType}, Size: ({featuresType.Dimensions[0]})"); var predictionType = (VectorDataViewType)schema["Prediction/Softmax"].Type; Console.WriteLine($"Name: Prediction/Softmax, Type: {predictionType.ItemType.RawType}, Size: ({predictionType.Dimensions[0]})"); // </SnippetGetModelSchema> // <SnippetTokenizeIntoWords> IEstimator <ITransformer> pipeline = // Split the text into individual words mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "ReviewText") // </SnippetTokenizeIntoWords> // <SnippetMapValue> // Map each word to an integer value. The array of integer makes up the input features. .Append(mlContext.Transforms.Conversion.MapValue("VariableLengthFeatures", lookupMap, lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords")) // </SnippetMapValue> // <SnippetCustomMapping> // Resize variable length vector to fixed length vector. .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) // </SnippetCustomMapping> // <SnippetScoreTensorFlowModel> // Passes the data to TensorFlow for scoring .Append(tensorFlowModel.ScoreTensorFlowModel("Prediction/Softmax", "Features")) // </SnippetScoreTensorFlowModel> // <SnippetCopyColumns> // Retrieves the 'Prediction' from TensorFlow and copies to a column .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax")); // </SnippetCopyColumns> // <SnippetCreateModel> // Create an executable model from the estimator pipeline IDataView dataView = mlContext.Data.LoadFromEnumerable(new List <MovieReview>()); ITransformer model = pipeline.Fit(dataView); // </SnippetCreateModel> // <SnippetCallPredictSentiment> PredictSentiment(mlContext, model); // </SnippetCallPredictSentiment> }
// =========================================================================================================== public void BuildTrainingPipelineAndModel(ClassificationMode classificationMode) { if (ErrorHasOccured) { return; } try { switch (classificationMode) { case ClassificationMode.OneVersusAll: // first time this project was builot // Data process configuration with pipeline data transformations EstimatorChain <NormalizingTransformer> dataProcessPipeline1 = _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label") .Append(_mlContext.Transforms.Concatenate( outputColumnName: "Features", inputColumnNames: FeatureNames.ToArray())) .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(_mlContext); // Set the training algorithm EstimatorChain <KeyToValueMappingTransformer> trainer1 = _mlContext.MulticlassClassification.Trainers.OneVersusAll( _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label") .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel")); IEstimator <ITransformer> trainingPipeline1 = dataProcessPipeline1.Append(trainer1); // Train Model _mlModel = trainingPipeline1.Fit(_trainingDataView); break; case ClassificationMode.LightGbm: // Data process configuration with pipeline data transformations EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline2 = _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label") .Append(_mlContext.Transforms.Concatenate( outputColumnName: "Features", inputColumnNames: FeatureNames.ToArray())); // Set the training algorithm EstimatorChain <KeyToValueMappingTransformer> trainer2 = _mlContext.MulticlassClassification.Trainers.LightGbm(labelColumnName: "Label", featureColumnName: "Features") .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel")); EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline2 = dataProcessPipeline2.Append(trainer2); // Train Model _mlModel = trainingPipeline2.Fit(_trainingDataView); break; default: throw new ArgumentOutOfRangeException(nameof(classificationMode), classificationMode, null); } // TODO //// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate) //// in order to evaluate and get the model's accuracy metrics //Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ==============="); //var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Label"); //PrintMulticlassClassificationFoldsAverageMetrics(crossValidationResults); } catch (Exception ex) { Debug.WriteLine(ex.Message); ErrorHasOccured = true; FailureInformation = ex.Message; return; } }
public static IDataView FitAndTransform(this IEstimator <ITransformer> est, IDataView data) => est.Fit(data).Transform(data);
static ITransformer TrainModel(IDataView trainingData, IEstimator <ITransformer> trainingPipeline) { return(trainingPipeline.Fit(trainingData)); }
private void MixMatch(string dataPath) { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Read the data as an IDataView. // First, we define the loader: specify the data columns and where to find them in the text file. var loader = mlContext.Data.CreateTextLoader(ctx => ( // The four features of the Iris dataset. SepalLength: ctx.LoadFloat(0), SepalWidth: ctx.LoadFloat(1), PetalLength: ctx.LoadFloat(2), PetalWidth: ctx.LoadFloat(3), // Label: kind of iris. Label: ctx.LoadText(4) ), // Default separator is tab, but the dataset has comma. separator: ','); // Read the data. var data = loader.Load(dataPath); // Build the pre-processing pipeline. var pipeline = loader.MakeNewEstimator() .Append(r => ( // Convert string label to a key. Label: r.Label.ToKey(), // Concatenate all the features together into one column 'Features'. Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth))); // Now, at the time of writing, there is no static pipeline for OVA (one-versus-all). So, let's // append the OVA learner to the dynamic pipeline. IEstimator <ITransformer> dynamicPipe = pipeline.AsDynamic; // Create a binary classification trainer. var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features"); // Append the OVA learner to the pipeline. dynamicPipe = dynamicPipe.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer)); // At this point, we have a choice. We could continue working with the dynamically-typed pipeline, and // ultimately call dynamicPipe.Fit(data.AsDynamic) to get the model, or we could go back into the static world. // Here's how we go back to the static pipeline: var staticFinalPipe = dynamicPipe.AssertStatic(mlContext, // Declare the shape of the input. As you can see, it's identical to the shape of the loader: // four float features and a string label. c => ( SepalLength: c.R4.Scalar, SepalWidth: c.R4.Scalar, PetalLength: c.R4.Scalar, PetalWidth: c.R4.Scalar, Label: c.Text.Scalar), // Declare the shape of the output (or a relevant subset of it). // In our case, we care only about the predicted label column (a key type), and scores (vector of floats). c => ( Score: c.R4.Vector, // Predicted label is a key backed by uint, with text values (since original labels are text). PredictedLabel: c.KeyU4.TextValues.Scalar)) // Convert the predicted label from key back to the original string value. .Append(r => r.PredictedLabel.ToValue()); // Train the model in a statically typed way. var model = staticFinalPipe.Fit(data); // And here is how we could've stayed in the dynamic pipeline and train that way. dynamicPipe = dynamicPipe.Append(new KeyToValueMappingEstimator(mlContext, "PredictedLabel")); var dynamicModel = dynamicPipe.Fit(data.AsDynamic); // Now 'dynamicModel', and 'model.AsDynamic' are equivalent. }
public void Train(string trainingFileName, string testFileName) { System.Diagnostics.Debug.WriteLine("Reached Train Method"); //Check if training file exists if (!File.Exists(trainingFileName)) { System.Diagnostics.Debug.WriteLine($"Failed to find training data file ({trainingFileName}"); return; } //Check if test file exists if (!File.Exists(testFileName)) { System.Diagnostics.Debug.WriteLine($"Failed to find test data file ({testFileName}"); return; } //Convert training file into IDataView object (ready for processing) var trainingDataView = MlContext.Data.LoadFromTextFile <CarInventory>(trainingFileName, ',', hasHeader: false); //Normalise Mean Variance on the inputted values IEstimator <ITransformer> dataProcessPipeline = MlContext.Transforms .Concatenate("Features", typeof(CarInventory) .ToPropertyList <CarInventory>(nameof(CarInventory.Label))) .Append(MlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features", outputColumnName: "FeaturesNormalizedByMeanVar")); //Create a trainer object with the label from the car inventory class + normalised mean variance var trainer = MlContext.BinaryClassification.Trainers.FastTree( labelColumnName: nameof(CarInventory.Label), featureColumnName: "FeaturesNormalizedByMeanVar", numberOfLeaves: 2, numberOfTrees: 800, minimumExampleCountPerLeaf: 1, learningRate: 0.2); //Append the trainer to the pipeline var trainingPipeline = dataProcessPipeline.Append(trainer); //Save the model var trainedModel = trainingPipeline.Fit(trainingDataView); MlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath); //Evaluate the model like we trained it var evaluationPipeline = trainedModel.Append(MlContext.Transforms .CalculateFeatureContribution(trainedModel.LastTransformer) .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView))); var testDataView = MlContext.Data.LoadFromTextFile <CarInventory>(testFileName, ',', hasHeader: false); var testSetTransform = evaluationPipeline.Transform(testDataView); var modelMetrics = MlContext.BinaryClassification.Evaluate(data: testSetTransform, labelColumnName: nameof(CarInventory.Label), scoreColumnName: "Score"); System.Diagnostics.Debug.WriteLine($"Accuracy: {modelMetrics.Accuracy:P2}"); System.Diagnostics.Debug.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}"); System.Diagnostics.Debug.WriteLine($"Area under Precision recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}"); System.Diagnostics.Debug.WriteLine($"F1Score: {modelMetrics.F1Score:P2}"); System.Diagnostics.Debug.WriteLine($"LogLoss: {modelMetrics.LogLoss:#.##}"); System.Diagnostics.Debug.WriteLine($"LogLossReduction: {modelMetrics.LogLossReduction:#.##}"); System.Diagnostics.Debug.WriteLine($"PositivePrecision: {modelMetrics.PositivePrecision:#.##}"); System.Diagnostics.Debug.WriteLine($"PositiveRecall: {modelMetrics.PositiveRecall:#.##}"); System.Diagnostics.Debug.WriteLine($"NegativePrecision: {modelMetrics.NegativePrecision:#.##}"); System.Diagnostics.Debug.WriteLine($"NegativeRecall: {modelMetrics.NegativeRecall:P2}"); }