//public static void RunRandomExclusion(String usedResourcesFile) //{ // PreExperimentSetUp(); // int experimentNumber = 0; // List<ExperimentResult> resultList = ParseExperimentResult(usedResourcesFile); // foreach (ExperimentResult expResult in resultList) // { // //Do 5 random experiments per normal experiment // for (int i = 0; i < 10; i++) // { // experimentNumber++; // startTime = getCurrentTime(); // String originalExperimentName = expResult.experimentName; // int numExcluded = expResult.numSpectraExcluded; // int numAnalyzed = expResult.numSpectraAnalyzed; // ExclusionProfile exclusionProfile = new RandomExclusion_Fast(database, ms2SpectraList, numExcluded, numAnalyzed, 12); // if (experimentNumber == 1) // { // WriterClass.writeln(exclusionProfile.GetPerformanceEvaluator().getHeader()); // } // String experimentName = "EXP_" + experimentNumber + String.Format("Random:originalExperiment_{0}", originalExperimentName); // if (GlobalVar.IsSimulation) // { // new DataReceiverSimulation().DoJob(exclusionProfile, ms2SpectraList); // } // else // { // new DataReceiver().DoJob(exclusionProfile); // } // analysisTime = getCurrentTime() - startTime; // PostExperimentProcessing(exclusionProfile, experimentName, experimentNumber); // exclusionProfile.reset(); // reset(); // } // } //} //Parse Spectra usage information in past experiments for random exclusion public static List <ExperimentResult> ParseExperimentResult(params String[] resultFiles) { List <ExperimentResult> resultList = new List <ExperimentResult>(); foreach (String resultFile in resultFiles) { StreamReader reader = new StreamReader(resultFile); String header = reader.ReadLine(); while (!header.StartsWith("ExperimentName")) { header = reader.ReadLine(); } String line = reader.ReadLine(); while (line != null) { if (line.Equals("") || !line.Contains("\t")) { line = reader.ReadLine(); continue; } ExperimentResult expResult = new ExperimentResult(line, header); resultList.Add(expResult); line = reader.ReadLine(); } reader.Close(); } return(resultList); }
public static void AutoTrain() { // STEP 1: data loading IDataView trainingDataView = LoadDataFromCsv(trainingCsv); IDataView testingDataView = LoadDataFromCsv(testingCsv); // STEP 2: run an AutoML multiclass classification experiment WriteLineColor($"{Environment.NewLine}AutoML multiclass classification experiment for {ExperimentTime} seconds...", ConsoleColor.Yellow); var progressHandler = new MulticlassExperimentProgressHandler(); ExperimentResult <MulticlassClassificationMetrics> experimentResult = Context.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainingDataView, Label, progressHandler: progressHandler); // STEP 3: evaluate the model and print metrics RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; WriteLineColor($"{Environment.NewLine}Top Trainer (by accuracy)", ConsoleColor.Yellow); PrintTopModels(experimentResult); WriteLineColor($"{Environment.NewLine}TRAINING USING: {bestRun.TrainerName}", ConsoleColor.Cyan); Model = bestRun.Model; var predictions = Model.Transform(testingDataView); var metrics = Context.MulticlassClassification.Evaluate(data: predictions, labelColumnName: Label, scoreColumnName: Score, predictedLabelColumnName: PredictedLabel); PrintMultiClassClassificationMetrics(bestRun.TrainerName, metrics); // STEP 4: save the model Context.Model.Save(Model, trainingDataView.Schema, modelPath); }
static async void LogRun(int experimentId, ExperimentResult <MulticlassClassificationMetrics> experimentResults) { // Define run var runObject = new CreateRunRequest(); runObject.ExperimentId = experimentId; runObject.StartTime = ((DateTimeOffset)DateTime.UtcNow).ToUnixTimeMilliseconds(); runObject.UserId = Environment.UserName; runObject.SourceType = SourceType.LOCAL; // Create new run in MLFlow var runRequest = await _mlFlowService.CreateRun(runObject); // Get information for best run var runDetails = experimentResults.BestRun; // Log trainer name await _mlFlowService.LogParameter(runRequest.Run.Info.RunUuid, nameof(runDetails.TrainerName), runDetails.TrainerName); // Log metrics await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.RuntimeInSeconds), (float)runDetails.RuntimeInSeconds); await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.LogLoss), (float)runDetails.ValidationMetrics.LogLoss); await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.MacroAccuracy), (float)runDetails.ValidationMetrics.MacroAccuracy); await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.MicroAccuracy), (float)runDetails.ValidationMetrics.MicroAccuracy); }
private void ApplyPotion(Potion potion, bool addScoreOnSuccess) { if (!resolvingPotion) { resolvingPotion = true; OnCanBrewStateChange?.Invoke(CanBrew); // Store the patients symptoms for later HashSet <eSymptom> symptomsBefore = new HashSet <eSymptom>(patientSymptomManager.symptoms); // Apply potion to patient and score accordingly bool isPatientCured = patientSymptomManager.ApplyPotionToPatient(potion.GetSymptomChange()); PatientsCured += (addScoreOnSuccess && isPatientCured) ? 1 : 0; // Log experiment results HashSet <eSymptom> symptomsAfter = new HashSet <eSymptom>(patientSymptomManager.symptoms); ExperimentResult newResult = new ExperimentResult(symptomsBefore, potion.PotionComposition, symptomsAfter); experimentResults.Add(newResult); ExperimentResultsChanged?.Invoke(experimentResults); } else { Debug.Log("OnPatientFinished must be called before another potion can be applied!"); } }
public static void Train(MLContext mlContext) { try { // STEP 1: Load the data var trainData = mlContext.Data.LoadFromTextFile(path: "AgeRangeData03_AgeGenderLabelEncodedMoreData.csv", columns: new[] { new TextLoader.Column("Age", DataKind.Single, 0), new TextLoader.Column("Gender", DataKind.Single, 1) , new TextLoader.Column("Label", DataKind.Single, 2) }, hasHeader: true, separatorChar: ',' ); var progressHandler = new MulticlassExperimentProgressHandler(); ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainData, "Label", progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); Console.WriteLine(); } catch (Exception ex) { Console.WriteLine(ex); } }
public static void DoAutoML() { // Load Data IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") }) .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" })); ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline); var metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}"); Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}"); // Save model SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema); }
public async Task <ExperimentResult> GetExperimentResult(int experimentId) { ExperimentResult result = new ExperimentResult(); var experiment = await _dbContext.Experiments.Include(x => x.Results).SingleOrDefaultAsync(x => x.Id == experimentId); if (experiment == null) { return(result); } Dictionary <int, BacteriaScore> scores = new Dictionary <int, BacteriaScore>(); foreach (var testResult in experiment.Results) { var bacterias = _dbContext.TestReactions .Where(x => x.Test.Id == testResult.Id && x.Result == testResult.Result) .Select(x => x.Bacteria); foreach (var bac in bacterias) { if (scores.ContainsKey(bac.Id)) { bacterias[bac.Id] = } else { } } } return(result); }
/* static readonly string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-train.csv"); * static readonly string TestDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-test.csv");*/ public static void TrainAndSave(string label, string trainDataPath, uint experimentTime) { MLContext mlContext = new MLContext(seed: 0); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <ModelInput>(path: trainDataPath, separatorChar: ',', hasHeader: true); //var testData = mlContext.Data.LoadFromTextFile<ModelInput>(path: TestDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.TrainTestSplit(trainData, testFraction: 0.2).TestSet; var progressHandler = new RegressionExperimentProgressHandler(); //uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(experimentTime) .Execute(trainData, label, progressHandler: progressHandler); //Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: label, scoreColumnName: "Score"); //Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 using (var stream = System.IO.File.Create(ModelFilePath)) { mlContext.Model.Save(trainedModel, trainData.Schema, stream); } }
public static void TrainAndSave() { MLContext mlContext = new MLContext(seed: 1); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true); var progressHandler = new RegressionExperimentProgressHandler(); uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainData, "Label", progressHandler: progressHandler); Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 Console.WriteLine("====== Save model to local file ========="); mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath); }
public static ITransformer Retrain(MLContext mlContext, ExperimentResult <MulticlassClassificationMetrics> experimentResult, ColumnInferenceResults columnInference, DataFilePaths paths, bool fixedBug = false) { ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ==============="); var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions); var combinedDataView = textLoader.Load(new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.TestPath)); var bestRun = experimentResult.BestRun; if (fixedBug) { // TODO: retry: below gave error but I thought it would work: //refitModel = MulticlassExperiment.Retrain(experimentResult, // "final model", // new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.FittedPath), // paths.TestPath, // paths.FinalPath, textLoader, mlContext); // but if failed before fixing this maybe the problem was in *EvaluateTrainedModelAndPrintMetrics* } var refitModel = bestRun.Estimator.Fit(combinedDataView); EvaluateTrainedModelAndPrintMetrics(mlContext, refitModel, "production model", textLoader.Load(paths.TestPath)); // Save the re-fit model to a.ZIP file SaveModel(mlContext, refitModel, paths.FinalModelPath, textLoader.Load(paths.TestPath)); Trace.WriteLine("The model is saved to {0}", paths.FinalModelPath); return(refitModel); }
static void Main(string[] args) { MLContext mlContext = new MLContext(); IDataView trainDataView = mlContext.Data.LoadFromTextFile <TrafficData>(GetAbsolutePath("../../../Data/Metro_Interstate_Traffic_Volume.csv"), hasHeader: true, separatorChar: ','); //configure experiment settings var experimentSettings = new RegressionExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; var cts = new CancellationTokenSource(); experimentSettings.CancellationToken = cts.Token; experimentSettings.OptimizingMetric = RegressionMetric.MeanSquaredError; experimentSettings.CacheDirectory = null; // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); //create experiment RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); var handler = new RegressionExperimentProgressHandler(); //execute experiment ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(trainDataView, labelColumnName: "Label", progressHandler: handler); //Evaluate RegressionMetrics metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Best Algorthm: {experimentResult.BestRun.TrainerName}"); Console.WriteLine($"R-Squared: {metrics.RSquared:0.##}"); Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:0.##}"); Console.ReadKey(); }
private static MulticlassClassificationMetrics GetAndPrintValidationMetricsForData( ExperimentResult <MulticlassClassificationMetrics> experimentResult, string fileName) { Console.WriteLine($@"Running experiment for dataset {fileName}"); var validationMetrics = AutoMlModelCreation.EvaluateModel(experimentResult, fileName); Console.WriteLine(@"Experiment ran with the following results"); Console.WriteLine( $@"LogLoss={validationMetrics.LogLoss} the closer to 0 the better"); Console.WriteLine($@"Confusion Matrix Actuals\Predicted"); var confusionMatrix = validationMetrics.ConfusionMatrix; for (var i = 0; i < confusionMatrix.NumberOfClasses; i++) { for (var j = 0; j < confusionMatrix.NumberOfClasses; j++) { Console.Write(confusionMatrix.Counts[i][j] + "\t"); } Console.WriteLine(); } return(validationMetrics); }
public static void Train(MLContext mlContext) { try { // STEP 1: Load the data var trainData = mlContext.Data.LoadFromTextFile(path: TrainDataPath, columns: new[] { new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63), new TextLoader.Column("Number", DataKind.Single, 64) }, hasHeader: false, separatorChar: ',' ); var testData = mlContext.Data.LoadFromTextFile(path: TestDataPath, columns: new[] { new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63), new TextLoader.Column("Number", DataKind.Single, 64) }, hasHeader: false, separatorChar: ',' ); // STEP 2: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new MulticlassExperimentProgressHandler(); // STEP 3: Run an AutoML multiclass classification experiment ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainData, "Number", progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 4: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; ITransformer trainedModel = bestRun.Model; var predictions = trainedModel.Transform(testData); var metrics = mlContext.MulticlassClassification.Evaluate(data: predictions, labelColumnName: "Number", scoreColumnName: "Score"); ConsoleHelper.PrintMulticlassClassificationMetrics(bestRun.TrainerName, metrics); // STEP 5: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainData.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); } catch (Exception ex) { Console.WriteLine(ex); } }
public void Start() { //Infer columns and load train data var columnInferenceResult = mlContext.Auto().InferColumns( path: TRAIN_DATA_FILEPATH, labelColumnName: "next", groupColumns: false); TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions); trainData = textLoader.Load(TRAIN_DATA_FILEPATH); //Modify infered columns information columnInformation = columnInferenceResult.ColumnInformation; columnInformation.CategoricalColumnNames.Add("productId"); columnInformation.NumericColumnNames.Remove("productId"); columnInformation.CategoricalColumnNames.Add("year"); columnInformation.NumericColumnNames.Remove("year"); columnInformation.NumericColumnNames.Remove("units"); columnInformation.IgnoredColumnNames.Add("units"); var experimentSettings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 10, OptimizingMetric = RegressionMetric.RootMeanSquaredError, CacheDirectory = new DirectoryInfo(CACHE_DIRECTORY), CancellationToken = cancelationTokenSource.Token }; //Exclude trainers from experiment experimentSettings.Trainers.Remove(RegressionTrainer.Ols); RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute( trainData: trainData, columnInformation: columnInformation, progressHandler: new RegressionProgressHandler(), preFeaturizer: null); ITransformer model = experimentResult.BestRun.Model; IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator; //Make batch predictions IDataView predictionsDataView = model.Transform(trainData); PrintPredictions(predictionsDataView); PrintPredictionsEnumerable(predictionsDataView); model = estimator.Fit(trainData); mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH); Console.WriteLine("Done"); }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration /* contents of csv file * vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount * VTS,1,1,1140,3.75,CRD,15.5 * VTS,1,1,480,2.72,CRD,10.0 * VTS,1,1,1680,7.8,CSH,26.5 * VTS,1,1,600,4.73,CSH,14.5 */ IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 2: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 3: Run AutoML regression experiment ConsoleHelper.ConsoleWriteHeader("=============== Training the model ==============="); Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 4: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score"); // Print metrics from top model ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics); // STEP 5: Save/persist the trained model - convonnx using (var stream = File.Create(MODEL_NAME)) { mlContext.Model.ConvertToOnnx(trainedModel, trainingDataView, stream); } Console.WriteLine("The model is saved to {0}", MODEL_NAME); return(trainedModel); }
/// <summary> /// Re-fit best pipeline on all available data. /// </summary> private static ITransformer RefitBestPipeline(MLContext mlContext, ExperimentResult <RegressionMetrics> experimentResult, ColumnInferenceResults columnInference) { ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ==============="); var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions); var combinedDataView = textLoader.Load(new MultiFileSource(TrainDataPath, TestDataPath)); RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; return(bestRun.Estimator.Fit(combinedDataView)); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainDataView, LabelColumnName); // STEP 3: Print metric from best model RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 5: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); // STEP 6: Save the best model for later deployment and inferencing using (FileStream fs = File.Create(ModelPath)) mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); // STEP 7: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model); // STEP 8: Initialize a new test taxi trip, and get the predicted fare var testTaxiTrip = new TaxiTrip { VendorId = "VTS", RateCode = 1, PassengerCount = 1, TripTimeInSeconds = 1140, TripDistance = 3.75f, PaymentType = "CRD" }; var prediction = predictionEngine.Predict(testTaxiTrip); Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}"); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
public void AutoFitRecommendationTest() { // Specific column names of the considered data set string labelColumnName = "Label"; string userColumnName = "User"; string itemColumnName = "Item"; string scoreColumnName = "Score"; MLContext mlContext = new MLContext(); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); var trainDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); var testDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // STEP 2: Run AutoML experiment ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRecommendationExperiment(5) .Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, UserIdColumnName = userColumnName, ItemIdColumnName = itemColumnName }); RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 1); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.RSquared != 0)); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); // Retrieve label column's index from the test IDataView testDataView.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); // Retrieve score column's index from the IDataView produced by the trained model testDataViewWithBestScore.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName); Assert.NotEqual(0, metrices.MeanSquaredError); }
/// <summary> /// Print top models from AutoML experiment. /// </summary> public static void PrintTopModels(ExperimentResult <MulticlassClassificationMetrics> experimentResult) { // Get top few runs ranked by accuracy var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy)) .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3); PrintMulticlassClassificationMetricsHeader(); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds); } }
public ExperimentResult RunExperiment() { MultithreadedAlgorithmRunner runner = new MultithreadedAlgorithmRunner(); ExperimentResult result = new ExperimentResult(); runner.StartParallelRunner(this); foreach (var experimentProblem in ExperimentProblems) { foreach (var algorithmDictionary in experimentProblem.AlgorithmDictionary) { var algorithmList = algorithmDictionary.Value; foreach (var algorithm in algorithmList) { runner.AddTaskForExecution(new object[] { algorithm }); } } } //run algorithms long initTime = Environment.TickCount; runner.ParallelExecution(); long elapsedTime = Environment.TickCount - initTime; result.ElapsedTime = elapsedTime; //process results result.Indicators = new QualityIndicatorTables(this).GetIndicators(); if (this.GenerateQualityTables) { result.LatexPath = Path.Combine(this.ExperimentBaseDirectory, this.Name + ".tex"); result.QualityTables = new LatexTables(this, result.LatexPath, result.Indicators).Generate(); } if (this.GenerateBoxPlots) { result.BoxPlots = new BoxPlots(this, result.Indicators).Generate(); } if (this.GenerateFriedmanTables) { new FriedmanTables(this, this.ExperimentBaseDirectory, result.Indicators).generate(); } return(result); }
public static void CreateExperiment() { // Load Data var tmpPath = GetAbsolutePath(TRAIN_DATA_FILEPATH); IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: tmpPath, hasHeader: true, separatorChar: '\t', allowQuoting: true, allowSparse: false); IDataView testDataView = mlContext.Data.BootstrapSample(trainingDataView); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML Multiclass classification experiment for {ExperimentTime} seconds..."); //ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto() // .CreateMulticlassClassificationExperiment(ExperimentTime) // .Execute(trainingDataView, labelColumnName: "reservation_status"); MulticlassClassificationExperiment experiment = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime); ExperimentResult <MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "reservation_status"); // STEP 3: Print metric from the best model RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMulticlassClassificationMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); try { var testMetrics = mlContext.MulticlassClassification.CrossValidate(testDataViewWithBestScore, bestRun.Estimator, numberOfFolds: 5, labelColumnName: "reservation_status"); Console.WriteLine($"Metrics of best model on test data --"); PrintMulticlassClassificationFoldsAverageMetrics(testMetrics); } catch { Console.WriteLine($"Metrics not supported in this version"); } // Save model tmpPath = GetAbsolutePath(MODEL_FILEPATH2); SaveModel(mlContext, bestRun.Model, tmpPath, trainingDataView.Schema); }
/// <summary> /// Print top models from AutoML experiment. /// </summary> private static void PrintTopModels(ExperimentResult <RegressionMetrics> experimentResult) { // Get top few runs ranked by root mean squared error. var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.RootMeanSquaredError)) .OrderBy(r => r.ValidationMetrics.RootMeanSquaredError).Take(3); Console.WriteLine("Top models ranked by root mean squared error --"); ConsoleHelper.PrintRegressionMetricsHeader(); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds); } }
/// <summary> /// Prints top models from AutoML experiment. /// </summary> private static void PrintTopModels(ExperimentResult <BinaryClassificationMetrics> experimentResult) { // Get top few runs ranked by accuracy var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.Accuracy)) .OrderByDescending(r => r.ValidationMetrics.Accuracy).Take(3); Console.WriteLine("Top models ranked by accuracy --"); ConsoleHelper.PrintBinaryClassificationMetricsHeader(); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds); } }
public static ITransformer Retrain(ExperimentResult <MulticlassClassificationMetrics> experimentResult, string trainerName, MultiFileSource multiFileSource, string dataPath, string modelPath, TextLoader textLoader, MLContext mlContext) { var dataView = textLoader.Load(dataPath); ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ==============="); var combinedDataView = textLoader.Load(multiFileSource); var bestRun = experimentResult.BestRun; var refitModel = bestRun.Estimator.Fit(combinedDataView); EvaluateTrainedModelAndPrintMetrics(mlContext, refitModel, trainerName, dataView); SaveModel(mlContext, refitModel, modelPath, dataView); return(refitModel); }
private static void PrintTopModels(ExperimentResult <RankingMetrics> experimentResult, uint optimizationMetricTruncationLevel) { // Get top few runs ordered by NDCG var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1])) .OrderByDescending(r => r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1]).Take(5); Console.WriteLine($"Top models ordered by NDCG@{optimizationMetricTruncationLevel}"); ConsoleHelper.PrintRankingMetricsHeader(); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds); } }
public static void AutoML(MLContext mlContext, IEstimator <ITransformer> pipeline, IDataView file, Progress <RunDetail <BinaryClassificationMetrics> > progress, BinaryExperimentSettings settings) { var transdata = pipeline.Fit(file).Transform(file); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(settings) .Execute(trainData: transdata, labelColumnName: nameof(transformOutput.Label), progressHandler: progress); Console.WriteLine(); Console.WriteLine($"Trainername- {experimentResult.BestRun.TrainerName}"); Console.WriteLine($"Accuracy- {experimentResult.BestRun.ValidationMetrics.Accuracy}"); Console.WriteLine($"AreaUnderRocCurve- {experimentResult.BestRun.ValidationMetrics.AreaUnderRocCurve}"); Console.WriteLine(); var model = experimentResult.BestRun.Model as TransformerChain <ITransformer>; //mlContext.Model.Save(model, transdata.Schema, @"C:\Users\ludwi\source\repos\JugendForscht"); }
public static void Print(this ExperimentResult <RegressionMetrics> result) { Console.WriteLine("Regession metric..."); var details = result.RunDetails.ToList(); Console.WriteLine("details"); foreach (var detail in details) { Console.WriteLine(detail); } Console.WriteLine("best run"); Console.WriteLine(result.BestRun); Console.WriteLine("trainerName " + result.BestRun.TrainerName); Console.WriteLine("validationMetrics " + result.BestRun.ValidationMetrics); }
private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext, ColumnInferenceResults columnInference) { // STEP 1: Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView); // STEP 2: Build a pre-featurizer for use in the AutoML experiment. // (Internally, AutoML uses one or more train/validation data splits to // evaluate the models it produces. The pre-featurizer is fit only on the // training data split to produce a trained transform. Then, the trained transform // is applied to both the train and validation data splits.) IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash", new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type"); // STEP 3: Customize column information returned by InferColumns API ColumnInformation columnInformation = columnInference.ColumnInformation; columnInformation.CategoricalColumnNames.Remove("payment_type"); columnInformation.IgnoredColumnNames.Add("payment_type"); // STEP 4: Initialize a cancellation token source to stop the experiment. var cts = new CancellationTokenSource(); // STEP 5: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 6: Create experiment settings var experimentSettings = CreateExperimentSettings(mlContext, cts); // STEP 7: Run AutoML regression experiment var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML regression experiment..."); var stopwatch = Stopwatch.StartNew(); // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler); Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}"); // Print top models found by AutoML PrintTopModels(experimentResult); return(experimentResult); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TrainDataPath, hasHeader: true); IDataView testDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TestDataPath, hasHeader: true); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds..."); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(ExperimentTime) .Execute(trainDataView); // STEP 3: Print metric from the best model RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); // STEP 5: Save the best model for later deployment and inferencing using (FileStream fs = File.Create(ModelPath)) mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); // STEP 6: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <SentimentIssue, SentimentPrediction>(bestRun.Model); // STEP 7: Initialize a new sentiment issue, and get the predicted sentiment var testSentimentIssue = new SentimentIssue { Text = "I hope this helps." }; var prediction = predictionEngine.Predict(testSentimentIssue); Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}"); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, separatorChar: ',', hasHeader: true); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, separatorChar: ',', hasHeader: true); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainDataView, LabelColumnName); // STEP 3: Print metric from the best model RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 6: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model); // STEP 7: Initialize new pixel data, and get the predicted number var testTaxiTripData = new TaxiTrip { VendorId = "CMT", RateCode = 1, PassengerCount = 1, TripTimeInSeconds = 1271, TripDistance = 3.8F, PaymentType = "CRD", FareAmount = 17.5F, }; var prediction = predictionEngine.Predict(testTaxiTripData); Console.WriteLine($"Predicted number for test data:"); foreach (var x in prediction.Payment) { Console.WriteLine(x.ToString()); } Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }