/* static readonly string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-train.csv"); * static readonly string TestDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-test.csv");*/ public static void TrainAndSave(string label, string trainDataPath, uint experimentTime) { MLContext mlContext = new MLContext(seed: 0); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <ModelInput>(path: trainDataPath, separatorChar: ',', hasHeader: true); //var testData = mlContext.Data.LoadFromTextFile<ModelInput>(path: TestDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.TrainTestSplit(trainData, testFraction: 0.2).TestSet; var progressHandler = new RegressionExperimentProgressHandler(); //uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(experimentTime) .Execute(trainData, label, progressHandler: progressHandler); //Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: label, scoreColumnName: "Score"); //Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 using (var stream = System.IO.File.Create(ModelFilePath)) { mlContext.Model.Save(trainedModel, trainData.Schema, stream); } }
public static void TrainAndSave() { MLContext mlContext = new MLContext(seed: 1); // 准备数据 var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true); var testData = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true); var progressHandler = new RegressionExperimentProgressHandler(); uint ExperimentTime = 200; ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainData, "Label", progressHandler: progressHandler); Debugger.PrintTopModels(experimentResult); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; // 评估 BestRun var predictions = trainedModel.Transform(testData); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); Debugger.PrintRegressionMetrics(best.TrainerName, metrics); // 保存模型 Console.WriteLine("====== Save model to local file ========="); mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath); }
private void ReadResponse(Task <HttpResponseMessage> requestTask, RequestModel rm) { int pendingUrls = 0; int totalUrls = 0; lock (UrlList) { pendingUrls = UrlList.Where(l => l.CheckingFinished == false || l.StatusCode == REQUEST_IN_PROGRESS_STATUS_CODE).Count(); totalUrls = UrlList.Count; } string topXWords = ReadTopXWords(); try { HttpResponseMessage response = requestTask.Result; if (response == null) { return; } RunDetail runDetails = new RunDetail("", totalUrls - pendingUrls, totalUrls, (int)this.timer.Elapsed.TotalSeconds, topXWords); var responseModel = new ResponseModel(response, rm, _settings, runDetails); ProcessResponse(responseModel); } catch (Exception ex) { Console.WriteLine(ex.InnerException.Message + " for " + rm.Url); HttpResponseMessage response = new HttpResponseMessage(0); RunDetail runDetails = new RunDetail(ex.InnerException.Message, totalUrls - pendingUrls, pendingUrls, (int)this.timer.Elapsed.TotalSeconds, topXWords); var responseModel = new ResponseModel(response, rm, _settings, runDetails); ProcessResponse(responseModel); } }
public static void AutoTrain() { // STEP 1: data loading IDataView trainingDataView = LoadDataFromCsv(trainingCsv); IDataView testingDataView = LoadDataFromCsv(testingCsv); // STEP 2: run an AutoML multiclass classification experiment WriteLineColor($"{Environment.NewLine}AutoML multiclass classification experiment for {ExperimentTime} seconds...", ConsoleColor.Yellow); var progressHandler = new MulticlassExperimentProgressHandler(); ExperimentResult <MulticlassClassificationMetrics> experimentResult = Context.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainingDataView, Label, progressHandler: progressHandler); // STEP 3: evaluate the model and print metrics RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; WriteLineColor($"{Environment.NewLine}Top Trainer (by accuracy)", ConsoleColor.Yellow); PrintTopModels(experimentResult); WriteLineColor($"{Environment.NewLine}TRAINING USING: {bestRun.TrainerName}", ConsoleColor.Cyan); Model = bestRun.Model; var predictions = Model.Transform(testingDataView); var metrics = Context.MulticlassClassification.Evaluate(data: predictions, labelColumnName: Label, scoreColumnName: Score, predictedLabelColumnName: PredictedLabel); PrintMultiClassClassificationMetrics(bestRun.TrainerName, metrics); // STEP 4: save the model Context.Model.Save(Model, trainingDataView.Schema, modelPath); }
public static void Train(MLContext mlContext) { try { // STEP 1: Load the data var trainData = mlContext.Data.LoadFromTextFile(path: TrainDataPath, columns: new[] { new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63), new TextLoader.Column("Number", DataKind.Single, 64) }, hasHeader: false, separatorChar: ',' ); var testData = mlContext.Data.LoadFromTextFile(path: TestDataPath, columns: new[] { new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63), new TextLoader.Column("Number", DataKind.Single, 64) }, hasHeader: false, separatorChar: ',' ); // STEP 2: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new MulticlassExperimentProgressHandler(); // STEP 3: Run an AutoML multiclass classification experiment ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainData, "Number", progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 4: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; ITransformer trainedModel = bestRun.Model; var predictions = trainedModel.Transform(testData); var metrics = mlContext.MulticlassClassification.Evaluate(data: predictions, labelColumnName: "Number", scoreColumnName: "Score"); ConsoleHelper.PrintMulticlassClassificationMetrics(bestRun.TrainerName, metrics); // STEP 5: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainData.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); } catch (Exception ex) { Console.WriteLine(ex); } }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration /* contents of csv file * vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount * VTS,1,1,1140,3.75,CRD,15.5 * VTS,1,1,480,2.72,CRD,10.0 * VTS,1,1,1680,7.8,CSH,26.5 * VTS,1,1,600,4.73,CSH,14.5 */ IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 2: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 3: Run AutoML regression experiment ConsoleHelper.ConsoleWriteHeader("=============== Training the model ==============="); Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 4: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score"); // Print metrics from top model ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics); // STEP 5: Save/persist the trained model - convonnx using (var stream = File.Create(MODEL_NAME)) { mlContext.Model.ConvertToOnnx(trainedModel, trainingDataView, stream); } Console.WriteLine("The model is saved to {0}", MODEL_NAME); return(trainedModel); }
public TaskAgnosticIterationResult(RunDetail <RegressionMetrics> runDetail, string primaryMetricName = "RSquared") : this(runDetail, runDetail.ValidationMetrics, primaryMetricName) { if (runDetail.Exception == null) { Model = runDetail.Model; } Exception = runDetail.Exception; }
public TaskAgnosticIterationResult(RunDetail <MulticlassClassificationMetrics> runDetail, string primaryMetricName = "MicroAccuracy") : this(runDetail, runDetail.ValidationMetrics, primaryMetricName) { if (runDetail.Exception == null) { Model = runDetail.Model; } Exception = runDetail.Exception; }
/// <summary> /// Re-fit best pipeline on all available data. /// </summary> private static ITransformer RefitBestPipeline(MLContext mlContext, ExperimentResult <RegressionMetrics> experimentResult, ColumnInferenceResults columnInference) { ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ==============="); var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions); var combinedDataView = textLoader.Load(new MultiFileSource(TrainDataPath, TestDataPath)); RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; return(bestRun.Estimator.Fit(combinedDataView)); }
public void Report(RunDetail <RegressionMetrics> iterationResult) { _iterationIndex++; Console.WriteLine($"Report index:{_iterationIndex},TrainerName:{iterationResult.TrainerName},RuntimeInSeconds:{iterationResult.RuntimeInSeconds}"); if (iterationResult.Exception != null) { Console.WriteLine($"Exception during AutoML iteration: {iterationResult.Exception}"); } }
public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "GroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment var experiment = mlContext.Auto() .CreateRankingExperiment(5); ExperimentResult <RankingMetrics>[] experimentResults = { experiment.Execute(trainDataView, labelColumnName, groupIdColumnName), experiment.Execute(trainDataView, testDataView), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, }), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, SamplingKeyColumnName = groupIdColumnName }) }; for (int i = 0; i < experimentResults.Length; i++) { RunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun; Assert.True(experimentResults[i].RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains.Last() > 0.4); Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 20); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } } }
public static LearningResult CreateFromRunDetail <T>(RunDetail <T> runDetail, double result, DataViewSchema schema = null, string name = null) { var learningResult = CreateFromRunDetail(runDetail as RunDetail, result, schema, name); learningResult.ValidationMetrics = runDetail.ValidationMetrics; learningResult.Exception = runDetail.Exception; learningResult.Model = runDetail.Model; return(learningResult); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainDataView, LabelColumnName); // STEP 3: Print metric from best model RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 5: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); // STEP 6: Save the best model for later deployment and inferencing using (FileStream fs = File.Create(ModelPath)) mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); // STEP 7: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model); // STEP 8: Initialize a new test taxi trip, and get the predicted fare var testTaxiTrip = new TaxiTrip { VendorId = "VTS", RateCode = 1, PassengerCount = 1, TripTimeInSeconds = 1140, TripDistance = 3.75f, PaymentType = "CRD" }; var prediction = predictionEngine.Predict(testTaxiTrip); Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}"); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
public void AutoFitRecommendationTest() { // Specific column names of the considered data set string labelColumnName = "Label"; string userColumnName = "User"; string itemColumnName = "Item"; string scoreColumnName = "Score"; MLContext mlContext = new MLContext(); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); var trainDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); var testDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // STEP 2: Run AutoML experiment ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRecommendationExperiment(5) .Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, UserIdColumnName = userColumnName, ItemIdColumnName = itemColumnName }); RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 1); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.RSquared != 0)); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); // Retrieve label column's index from the test IDataView testDataView.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); // Retrieve score column's index from the IDataView produced by the trained model testDataViewWithBestScore.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName); Assert.NotEqual(0, metrices.MeanSquaredError); }
public void Report(RunDetail <MulticlassClassificationMetrics> value) { Progressed?.Invoke(this, new ProgressEventArgs { Model = new AutomationExperiment { Trainer = value.TrainerName, LogLoss = value.ValidationMetrics?.LogLoss, LogLossReduction = value.ValidationMetrics?.LogLossReduction, MicroAccuracy = value.ValidationMetrics?.MicroAccuracy, MacroAccuracy = value.ValidationMetrics?.MacroAccuracy } }); }
public static void CreateExperiment() { // Load Data var tmpPath = GetAbsolutePath(TRAIN_DATA_FILEPATH); IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: tmpPath, hasHeader: true, separatorChar: '\t', allowQuoting: true, allowSparse: false); IDataView testDataView = mlContext.Data.BootstrapSample(trainingDataView); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML Multiclass classification experiment for {ExperimentTime} seconds..."); //ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto() // .CreateMulticlassClassificationExperiment(ExperimentTime) // .Execute(trainingDataView, labelColumnName: "reservation_status"); MulticlassClassificationExperiment experiment = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime); ExperimentResult <MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "reservation_status"); // STEP 3: Print metric from the best model RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMulticlassClassificationMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); try { var testMetrics = mlContext.MulticlassClassification.CrossValidate(testDataViewWithBestScore, bestRun.Estimator, numberOfFolds: 5, labelColumnName: "reservation_status"); Console.WriteLine($"Metrics of best model on test data --"); PrintMulticlassClassificationFoldsAverageMetrics(testMetrics); } catch { Console.WriteLine($"Metrics not supported in this version"); } // Save model tmpPath = GetAbsolutePath(MODEL_FILEPATH2); SaveModel(mlContext, bestRun.Model, tmpPath, trainingDataView.Schema); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, separatorChar: ',', hasHeader: true); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, separatorChar: ',', hasHeader: true); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainDataView, LabelColumnName); // STEP 3: Print metric from the best model RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 6: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model); // STEP 7: Initialize new pixel data, and get the predicted number var testTaxiTripData = new TaxiTrip { VendorId = "CMT", RateCode = 1, PassengerCount = 1, TripTimeInSeconds = 1271, TripDistance = 3.8F, PaymentType = "CRD", FareAmount = 17.5F, }; var prediction = predictionEngine.Predict(testTaxiTripData); Console.WriteLine($"Predicted number for test data:"); foreach (var x in prediction.Payment) { Console.WriteLine(x.ToString()); } Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <PixelData>(TrainDataPath, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <PixelData>(TestDataPath, separatorChar: ','); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(ExperimentTime) .Execute(trainDataView, LabelColumnName); // STEP 3: Print metric from the best model RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); MulticlassClassificationMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); // STEP 5: Save the best model for later deployment and inferencing using (FileStream fs = File.Create(ModelPath)) mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); // STEP 6: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <PixelData, PixelPrediction>(bestRun.Model); // STEP 7: Initialize new pixel data, and get the predicted number var testPixelData = new PixelData { PixelValues = new float[] { 0, 0, 1, 8, 15, 10, 0, 0, 0, 3, 13, 15, 14, 14, 0, 0, 0, 5, 10, 0, 10, 12, 0, 0, 0, 0, 3, 5, 15, 10, 2, 0, 0, 0, 16, 16, 16, 16, 12, 0, 0, 1, 8, 12, 14, 8, 3, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0 } }; var prediction = predictionEngine.Predict(testPixelData); Console.WriteLine($"Predicted number for test pixels: {prediction.Prediction}"); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
public static void Run() { MLContext mlContext = new MLContext(); // STEP 1: Load data IDataView trainDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TrainDataPath, hasHeader: true); IDataView testDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TestDataPath, hasHeader: true); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds..."); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(ExperimentTime) .Execute(trainDataView); // STEP 3: Print metric from the best model RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); // STEP 5: Save the best model for later deployment and inferencing using (FileStream fs = File.Create(ModelPath)) mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); // STEP 6: Create prediction engine from the best trained model var predictionEngine = mlContext.Model.CreatePredictionEngine <SentimentIssue, SentimentPrediction>(bestRun.Model); // STEP 7: Initialize a new sentiment issue, and get the predicted sentiment var testSentimentIssue = new SentimentIssue { Text = "I hope this helps." }; var prediction = predictionEngine.Predict(testSentimentIssue); Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}"); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
/// <summary> /// Insert Run number detail Into RunDetails table /// </summary> /// <param name="appId">Application Id</param> /// <param name="runNumber"></param> /// <returns></returns> public int InsertRunDetails(int appId, string runNumber) { try { var runNumberInfo = new RunDetail(); runNumberInfo.ApplicationId = appId; runNumberInfo.RunNumber = runNumber; runNumberInfo.RunNumberStatusId = 0; runNumberInfo.Status = true; return(objRunDetailsRepository.Save(runNumberInfo)); } catch (Exception ex) { throw new Exception("Error in insert details in Run Details in job init component : " + ex); } }
public int Run(string runNo, string fileDownloadLocation) { try { IRunDetailsRepository repository = new RunDetailsRepository(); RunDetail detail = repository.GetRunDetailByRunNumber(runNo); PreProcessor objPreProcessor = new PreProcessor(); //Make entry for application components for each run number objPreProcessor.ProcessRunNumber(detail, fileDownloadLocation); } catch (Exception ex) { throw new Exception("Error in preprocessing" + ex); } return(0); }
public static LearningResult CreateFromRunDetail(RunDetail runDetail, double result, DataViewSchema schema = null, string name = null) { var learningResult = new LearningResult { Estimator = runDetail.Estimator, RuntimeInSeconds = runDetail.RuntimeInSeconds, TrainerName = runDetail.TrainerName, Result = result, Schema = schema, Name = name }; return(learningResult); }
public void Report(RunDetail <RegressionMetrics> iterationResult) { if (_iterationIndex++ == 0) { ConsoleHelper.PrintRegressionMetricsHeader(); } if (iterationResult.Exception != null) { ConsoleHelper.PrintIterationException(iterationResult.Exception); } else { ConsoleHelper.PrintIterationMetrics(_iterationIndex, iterationResult.TrainerName, iterationResult.ValidationMetrics, iterationResult.RuntimeInSeconds); } }
public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "CustomGroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto() .CreateRankingExperiment(new RankingExperimentSettings() { GroupIdColumnName = "CustomGroupId", MaxExperimentTimeInSeconds = 5 }) .Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName }); RunDetail <RankingMetrics> bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > .5)); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.DiscountedCumulativeGains.Max() > 34)); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); // STEP 2: Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 3: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 4: Run AutoML regression experiment ConsoleHelper.ConsoleWriteHeader("=============== Training the model ==============="); Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto() .CreateRegressionExperiment(ExperimentTime) .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 5: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data ====="); RunDetail <RegressionMetrics> best = experimentResult.BestRun; ITransformer trainedModel = best.Model; IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score"); // Print metrics from top model ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }
private TaskAgnosticIterationResult(RunDetail baseRunDetail, object validationMetrics, string primaryMetricName) { TrainerName = baseRunDetail.TrainerName; Estimator = baseRunDetail.Estimator; Pipeline = baseRunDetail.Pipeline; PipelineInferenceTimeInSeconds = (int)baseRunDetail.PipelineInferenceTimeInSeconds; RuntimeInSeconds = (int)baseRunDetail.RuntimeInSeconds; _primaryMetricName = primaryMetricName; PrimaryMetricValue = -1; // default value in case of exception. TODO: won't work for minimizing metrics, use nullable? if (validationMetrics == null) { return; } MetricValues = MetricValuesToDictionary(validationMetrics); PrimaryMetricValue = MetricValues[_primaryMetricName]; }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Load data IDataView trainingDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TrainDataPath, hasHeader: true); IDataView testDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TestDataPath, hasHeader: true); // STEP 2: Display first few rows of training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView); // STEP 3: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new BinaryExperimentProgressHandler(); // STEP 4: Run AutoML binary classification experiment ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds..."); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(ExperimentTime) .Execute(trainingDataView, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine(); PrintTopModels(experimentResult); // STEP 5: Evaluate the model and print metrics ConsoleHelper.ConsoleWriteHeader("=============== Evaluating model's accuracy with test data ==============="); RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun; ITransformer trainedModel = bestRun.Model; var predictions = trainedModel.Transform(testDataView); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(data: predictions, scoreColumnName: "Score"); ConsoleHelper.PrintBinaryClassificationMetrics(bestRun.TrainerName, metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }
public void Report(RunDetail <BinaryClassificationMetrics> iterationResult) { if (iterationIndex == 0) { PrintBinaryClassificationMetricsHeader(); } if (iterationResult.Exception is null) { PrintIterationMetrics( iterationIndex, iterationResult.TrainerName, iterationResult.ValidationMetrics, iterationResult.RuntimeInSeconds); } else { PrintIterationException(iterationResult.Exception); } iterationIndex += 1; }
public static void CreateExperiment() { var tmpPath = GetAbsolutePath(TRAIN_DATA_FILEPATH); // Load Data IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: tmpPath, hasHeader: true, separatorChar: '\t', allowQuoting: true, allowSparse: false); IDataView testDataView = mlContext.Data.BootstrapSample(trainingDataView); // STEP 2: Run AutoML experiment Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds..."); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(ExperimentTime) .Execute(trainingDataView, labelColumnName: "Sentiment"); // STEP 3: Print metric from the best model RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun; Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); Console.WriteLine($"Metrics of best model from validation data --"); PrintMetrics(bestRun.ValidationMetrics); // STEP 4: Evaluate test data IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore, labelColumnName: "Sentiment"); Console.WriteLine($"Metrics of best model on test data --"); PrintMetrics(testMetrics); tmpPath = GetAbsolutePath(MODEL_FILEPATH2); // Save model SaveModel(mlContext, bestRun.Model, tmpPath, trainingDataView.Schema); }
public void Report(RunDetail <BinaryClassificationMetrics> value) { var model = value.Model; ConsoleHelper.Print(value.TrainerName, value.ValidationMetrics); }