public static void TrainAndSave() { MLContext mlContext = new MLContext(seed: 1); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true); var progressHandler = new RegressionExperimentProgressHandler(); uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainData, "Label", progressHandler: progressHandler); Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 Console.WriteLine("====== Save model to local file ========="); mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath); }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration /* contents of csv file * vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount * VTS,1,1,1140,3.75,CRD,15.5 * VTS,1,1,480,2.72,CRD,10.0 * VTS,1,1,1680,7.8,CSH,26.5 * VTS,1,1,600,4.73,CSH,14.5 */ IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 2: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 3: Run AutoML regression experiment ConsoleHelper.ConsoleWriteHeader("=============== Training the model ==============="); Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 4: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score"); // Print metrics from top model ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics); // STEP 5: Save/persist the trained model - convonnx using (var stream = File.Create(MODEL_NAME)) { mlContext.Model.ConvertToOnnx(trainedModel, trainingDataView, stream); } Console.WriteLine("The model is saved to {0}", MODEL_NAME); return(trainedModel); }
private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext, ColumnInferenceResults columnInference) { // STEP 1: Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView); // STEP 2: Build a pre-featurizer for use in the AutoML experiment. // (Internally, AutoML uses one or more train/validation data splits to // evaluate the models it produces. The pre-featurizer is fit only on the // training data split to produce a trained transform. Then, the trained transform // is applied to both the train and validation data splits.) IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash", new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type"); // STEP 3: Customize column information returned by InferColumns API ColumnInformation columnInformation = columnInference.ColumnInformation; columnInformation.CategoricalColumnNames.Remove("payment_type"); columnInformation.IgnoredColumnNames.Add("payment_type"); // STEP 4: Initialize a cancellation token source to stop the experiment. var cts = new CancellationTokenSource(); // STEP 5: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 6: Create experiment settings var experimentSettings = CreateExperimentSettings(mlContext, cts); // STEP 7: Run AutoML regression experiment var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML regression experiment..."); var stopwatch = Stopwatch.StartNew(); // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler); Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}"); // Print top models found by AutoML PrintTopModels(experimentResult); return(experimentResult); }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // STEP 2: Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 3: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 4: Run AutoML regression experiment ConsoleHelper.ConsoleWriteHeader("=============== Training the model ==============="); Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 5: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score"); // Print metrics from top model ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }
public void AutoMLTest() { string content = File.ReadAllText(@".\Datas\NOPTrainData.json"); List <NOPInput> trainData = SerializerHelper.Deserialize <List <NOPInput> >(content); content = File.ReadAllText(@".\Datas\NOPTestData.json"); List <NOPInput> testData = SerializerHelper.Deserialize <List <NOPInput> >(content); Console.WriteLine($"训练数据:{trainData.Count} 个,测试数据:{testData.Count} 个"); MLContext mlContext = new MLContext(); var progressHandler = new RegressionExperimentProgressHandler(); uint ExperimentTime = 200; IDataView trainDataView = mlContext.Data.LoadFromEnumerable(trainData); IDataView testDataView = mlContext.Data.LoadFromEnumerable(testData); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainDataView, nameof(NOPInput.NextOpenningPrice), progressHandler: progressHandler); var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.RSquared)) .OrderByDescending(r => r.ValidationMetrics.RSquared) .ToList(); Console.WriteLine("训练模型按照 R-Squared 排序:"); foreach (var run in topRuns) { Console.WriteLine($"{run.TrainerName}\t{run.RuntimeInSeconds}s\t{run.ValidationMetrics.RSquared}"); } Console.WriteLine("最佳模型:"); Console.WriteLine(experimentResult.BestRun.TrainerName); var predictionEngine = mlContext.Model.CreatePredictionEngine <NOPInput, NOPOutput>(experimentResult.BestRun.Model); var output = predictionEngine.Predict(testData.First()); Console.WriteLine($"预测结果:{output.NextOpenPrice}"); }