示例#1
0
        //public static void RunRandomExclusion(String usedResourcesFile)
        //{
        //	PreExperimentSetUp();
        //	int experimentNumber = 0;
        //	List<ExperimentResult> resultList = ParseExperimentResult(usedResourcesFile);
        //	foreach (ExperimentResult expResult in resultList)
        //	{

        //		//Do 5 random experiments per normal experiment
        //		for (int i = 0; i < 10; i++)
        //		{
        //			experimentNumber++;
        //			startTime = getCurrentTime();


        //			String originalExperimentName = expResult.experimentName;
        //			int numExcluded = expResult.numSpectraExcluded;
        //			int numAnalyzed = expResult.numSpectraAnalyzed;

        //			ExclusionProfile exclusionProfile = new RandomExclusion_Fast(database, ms2SpectraList, numExcluded, numAnalyzed, 12);
        //			if (experimentNumber == 1)
        //			{
        //				WriterClass.writeln(exclusionProfile.GetPerformanceEvaluator().getHeader());
        //			}
        //			String experimentName = "EXP_" + experimentNumber + String.Format("Random:originalExperiment_{0}", originalExperimentName);

        //			if (GlobalVar.IsSimulation)
        //			{
        //				new DataReceiverSimulation().DoJob(exclusionProfile, ms2SpectraList);
        //			}
        //			else
        //			{
        //				new DataReceiver().DoJob(exclusionProfile);
        //			}
        //			analysisTime = getCurrentTime() - startTime;
        //			PostExperimentProcessing(exclusionProfile, experimentName, experimentNumber);
        //			exclusionProfile.reset();
        //			reset();
        //		}

        //	}

        //}

        //Parse Spectra usage information in past experiments for random exclusion
        public static List <ExperimentResult> ParseExperimentResult(params String[] resultFiles)
        {
            List <ExperimentResult> resultList = new List <ExperimentResult>();

            foreach (String resultFile in resultFiles)
            {
                StreamReader reader = new StreamReader(resultFile);
                String       header = reader.ReadLine();
                while (!header.StartsWith("ExperimentName"))
                {
                    header = reader.ReadLine();
                }

                String line = reader.ReadLine();
                while (line != null)
                {
                    if (line.Equals("") || !line.Contains("\t"))
                    {
                        line = reader.ReadLine();
                        continue;
                    }
                    ExperimentResult expResult = new ExperimentResult(line, header);
                    resultList.Add(expResult);
                    line = reader.ReadLine();
                }
                reader.Close();
            }
            return(resultList);
        }
        public static void AutoTrain()
        {
            // STEP 1: data loading
            IDataView trainingDataView = LoadDataFromCsv(trainingCsv);
            IDataView testingDataView  = LoadDataFromCsv(testingCsv);

            // STEP 2: run an AutoML multiclass classification experiment
            WriteLineColor($"{Environment.NewLine}AutoML multiclass classification experiment for {ExperimentTime} seconds...", ConsoleColor.Yellow);
            var progressHandler = new MulticlassExperimentProgressHandler();
            ExperimentResult <MulticlassClassificationMetrics> experimentResult = Context.Auto()
                                                                                  .CreateMulticlassClassificationExperiment(ExperimentTime)
                                                                                  .Execute(trainingDataView, Label, progressHandler: progressHandler);

            // STEP 3: evaluate the model and print metrics
            RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;

            WriteLineColor($"{Environment.NewLine}Top Trainer (by accuracy)", ConsoleColor.Yellow);
            PrintTopModels(experimentResult);
            WriteLineColor($"{Environment.NewLine}TRAINING USING: {bestRun.TrainerName}", ConsoleColor.Cyan);
            Model = bestRun.Model;
            var predictions = Model.Transform(testingDataView);
            var metrics     = Context.MulticlassClassification.Evaluate(data: predictions, labelColumnName: Label, scoreColumnName: Score, predictedLabelColumnName: PredictedLabel);

            PrintMultiClassClassificationMetrics(bestRun.TrainerName, metrics);

            // STEP 4: save the model
            Context.Model.Save(Model, trainingDataView.Schema, modelPath);
        }
示例#3
0
        static async void LogRun(int experimentId, ExperimentResult <MulticlassClassificationMetrics> experimentResults)
        {
            // Define run
            var runObject = new CreateRunRequest();

            runObject.ExperimentId = experimentId;
            runObject.StartTime    = ((DateTimeOffset)DateTime.UtcNow).ToUnixTimeMilliseconds();
            runObject.UserId       = Environment.UserName;
            runObject.SourceType   = SourceType.LOCAL;

            // Create new run in MLFlow
            var runRequest = await _mlFlowService.CreateRun(runObject);

            // Get information for best run
            var runDetails = experimentResults.BestRun;

            // Log trainer name
            await _mlFlowService.LogParameter(runRequest.Run.Info.RunUuid, nameof(runDetails.TrainerName), runDetails.TrainerName);

            // Log metrics
            await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.RuntimeInSeconds), (float)runDetails.RuntimeInSeconds);

            await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.LogLoss), (float)runDetails.ValidationMetrics.LogLoss);

            await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.MacroAccuracy), (float)runDetails.ValidationMetrics.MacroAccuracy);

            await _mlFlowService.LogMetric(runRequest.Run.Info.RunUuid, nameof(runDetails.ValidationMetrics.MicroAccuracy), (float)runDetails.ValidationMetrics.MicroAccuracy);
        }
示例#4
0
    private void ApplyPotion(Potion potion, bool addScoreOnSuccess)
    {
        if (!resolvingPotion)
        {
            resolvingPotion = true;
            OnCanBrewStateChange?.Invoke(CanBrew);

            // Store the patients symptoms for later
            HashSet <eSymptom> symptomsBefore = new HashSet <eSymptom>(patientSymptomManager.symptoms);

            // Apply potion to patient and score accordingly
            bool isPatientCured = patientSymptomManager.ApplyPotionToPatient(potion.GetSymptomChange());
            PatientsCured += (addScoreOnSuccess && isPatientCured) ? 1 : 0;

            // Log experiment results
            HashSet <eSymptom> symptomsAfter = new HashSet <eSymptom>(patientSymptomManager.symptoms);
            ExperimentResult   newResult     = new ExperimentResult(symptomsBefore, potion.PotionComposition, symptomsAfter);
            experimentResults.Add(newResult);
            ExperimentResultsChanged?.Invoke(experimentResults);
        }
        else
        {
            Debug.Log("OnPatientFinished must be called before another potion can be applied!");
        }
    }
示例#5
0
        public static void Train(MLContext mlContext)
        {
            try
            {
                // STEP 1: Load the data
                var trainData = mlContext.Data.LoadFromTextFile(path: "AgeRangeData03_AgeGenderLabelEncodedMoreData.csv",
                                                                columns: new[]
                {
                    new TextLoader.Column("Age", DataKind.Single, 0),
                    new TextLoader.Column("Gender", DataKind.Single, 1)
                    ,
                    new TextLoader.Column("Label", DataKind.Single, 2)
                },
                                                                hasHeader: true,
                                                                separatorChar: ','
                                                                );

                var progressHandler = new MulticlassExperimentProgressHandler();

                ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
                Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
                ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
                                                                                      .CreateMulticlassClassificationExperiment(ExperimentTime)
                                                                                      .Execute(trainData, "Label", progressHandler: progressHandler);

                // Print top models found by AutoML
                Console.WriteLine();
                PrintTopModels(experimentResult);
                Console.WriteLine();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }
        }
示例#6
0
        public static void DoAutoML()
        {
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 10;

            MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }));

            ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline);
            var metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}");
            Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}");

            // Save model
            SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema);
        }
示例#7
0
        public async Task <ExperimentResult> GetExperimentResult(int experimentId)
        {
            ExperimentResult result = new ExperimentResult();
            var experiment          = await _dbContext.Experiments.Include(x => x.Results).SingleOrDefaultAsync(x => x.Id == experimentId);

            if (experiment == null)
            {
                return(result);
            }

            Dictionary <int, BacteriaScore> scores = new Dictionary <int, BacteriaScore>();

            foreach (var testResult in experiment.Results)
            {
                var bacterias = _dbContext.TestReactions
                                .Where(x => x.Test.Id == testResult.Id && x.Result == testResult.Result)
                                .Select(x => x.Bacteria);
                foreach (var bac in bacterias)
                {
                    if (scores.ContainsKey(bac.Id))
                    {
                        bacterias[bac.Id] =
                    }
                    else
                    {
                    }
                }
            }

            return(result);
        }
示例#8
0
        /* static readonly string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-train.csv");
         * static readonly string TestDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-test.csv");*/
        public static void TrainAndSave(string label, string trainDataPath, uint experimentTime)
        {
            MLContext mlContext = new MLContext(seed: 0);

            // 准备数据
            var trainData = mlContext.Data.LoadFromTextFile <ModelInput>(path: trainDataPath, separatorChar: ',', hasHeader: true);
            //var testData = mlContext.Data.LoadFromTextFile<ModelInput>(path: TestDataPath, separatorChar: ',', hasHeader: true);

            var testData        = mlContext.Data.TrainTestSplit(trainData, testFraction: 0.2).TestSet;
            var progressHandler = new RegressionExperimentProgressHandler();
            //uint ExperimentTime = 200;

            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(experimentTime)
                                                                    .Execute(trainData, label, progressHandler: progressHandler);

            //Debugger.PrintTopModels(experimentResult);

            RunDetail <RegressionMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel          = best.Model;

            // 评估 BestRun
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.Regression.Evaluate(predictions, labelColumnName: label, scoreColumnName: "Score");

            //Debugger.PrintRegressionMetrics(best.TrainerName, metrics);

            // 保存模型
            using (var stream = System.IO.File.Create(ModelFilePath))
            {
                mlContext.Model.Save(trainedModel, trainData.Schema, stream);
            }
        }
示例#9
0
        public static void TrainAndSave()
        {
            MLContext mlContext = new MLContext(seed: 1);

            // 准备数据
            var trainData = mlContext.Data.LoadFromTextFile <WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true);
            var testData  = mlContext.Data.LoadFromTextFile <WineData>(path: TestDataPath, separatorChar: ',', hasHeader: true);

            var  progressHandler = new RegressionExperimentProgressHandler();
            uint ExperimentTime  = 200;

            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(ExperimentTime)
                                                                    .Execute(trainData, "Label", progressHandler: progressHandler);

            Debugger.PrintTopModels(experimentResult);

            RunDetail <RegressionMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel          = best.Model;

            // 评估 BestRun
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");

            Debugger.PrintRegressionMetrics(best.TrainerName, metrics);

            // 保存模型
            Console.WriteLine("====== Save model to local file =========");
            mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath);
        }
示例#10
0
        public static ITransformer Retrain(MLContext mlContext, ExperimentResult <MulticlassClassificationMetrics> experimentResult,
                                           ColumnInferenceResults columnInference, DataFilePaths paths, bool fixedBug = false)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
            var textLoader       = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var combinedDataView = textLoader.Load(new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.TestPath));
            var bestRun          = experimentResult.BestRun;

            if (fixedBug)
            {
                // TODO: retry: below gave error but I thought it would work:
                //refitModel = MulticlassExperiment.Retrain(experimentResult,
                //    "final model",
                //    new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.FittedPath),
                //    paths.TestPath,
                //    paths.FinalPath, textLoader, mlContext);
                // but if failed before fixing this maybe the problem was in *EvaluateTrainedModelAndPrintMetrics*
            }
            var refitModel = bestRun.Estimator.Fit(combinedDataView);

            EvaluateTrainedModelAndPrintMetrics(mlContext, refitModel, "production model", textLoader.Load(paths.TestPath));
            // Save the re-fit model to a.ZIP file
            SaveModel(mlContext, refitModel, paths.FinalModelPath, textLoader.Load(paths.TestPath));

            Trace.WriteLine("The model is saved to {0}", paths.FinalModelPath);
            return(refitModel);
        }
示例#11
0
        static void Main(string[] args)
        {
            MLContext mlContext     = new MLContext();
            IDataView trainDataView = mlContext.Data.LoadFromTextFile <TrafficData>(GetAbsolutePath("../../../Data/Metro_Interstate_Traffic_Volume.csv"), hasHeader: true, separatorChar: ',');
            //configure experiment settings
            var experimentSettings = new RegressionExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 10;
            var cts = new CancellationTokenSource();

            experimentSettings.CancellationToken = cts.Token;
            experimentSettings.OptimizingMetric  = RegressionMetric.MeanSquaredError;
            experimentSettings.CacheDirectory    = null;

            // Cancel experiment after the user presses any key
            CancelExperimentAfterAnyKeyPress(cts);
            //create experiment
            RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
            var handler = new RegressionExperimentProgressHandler();
            //execute experiment
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(trainDataView, labelColumnName: "Label", progressHandler: handler);
            //Evaluate
            RegressionMetrics metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"Best Algorthm: {experimentResult.BestRun.TrainerName}");
            Console.WriteLine($"R-Squared: {metrics.RSquared:0.##}");
            Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:0.##}");

            Console.ReadKey();
        }
示例#12
0
        private static MulticlassClassificationMetrics GetAndPrintValidationMetricsForData(
            ExperimentResult <MulticlassClassificationMetrics> experimentResult, string fileName)
        {
            Console.WriteLine($@"Running experiment for dataset {fileName}");
            var validationMetrics =
                AutoMlModelCreation.EvaluateModel(experimentResult,
                                                  fileName);

            Console.WriteLine(@"Experiment ran with the following results");
            Console.WriteLine(
                $@"LogLoss={validationMetrics.LogLoss} the closer to 0 the better");
            Console.WriteLine($@"Confusion Matrix Actuals\Predicted");

            var confusionMatrix = validationMetrics.ConfusionMatrix;

            for (var i = 0; i < confusionMatrix.NumberOfClasses; i++)
            {
                for (var j = 0; j < confusionMatrix.NumberOfClasses; j++)
                {
                    Console.Write(confusionMatrix.Counts[i][j] + "\t");
                }

                Console.WriteLine();
            }

            return(validationMetrics);
        }
示例#13
0
        public static void Train(MLContext mlContext)
        {
            try
            {
                // STEP 1: Load the data
                var trainData = mlContext.Data.LoadFromTextFile(path: TrainDataPath,
                                                                columns: new[]
                {
                    new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63),
                    new TextLoader.Column("Number", DataKind.Single, 64)
                },
                                                                hasHeader: false,
                                                                separatorChar: ','
                                                                );

                var testData = mlContext.Data.LoadFromTextFile(path: TestDataPath,
                                                               columns: new[]
                {
                    new TextLoader.Column(nameof(InputData.PixelValues), DataKind.Single, 0, 63),
                    new TextLoader.Column("Number", DataKind.Single, 64)
                },
                                                               hasHeader: false,
                                                               separatorChar: ','
                                                               );

                // STEP 2: Initialize our user-defined progress handler that AutoML will
                // invoke after each model it produces and evaluates.
                var progressHandler = new MulticlassExperimentProgressHandler();

                // STEP 3: Run an AutoML multiclass classification experiment
                ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
                Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
                ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
                                                                                      .CreateMulticlassClassificationExperiment(ExperimentTime)
                                                                                      .Execute(trainData, "Number", progressHandler: progressHandler);

                // Print top models found by AutoML
                Console.WriteLine();
                PrintTopModels(experimentResult);

                // STEP 4: Evaluate the model and print metrics
                ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data =====");
                RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;
                ITransformer trainedModel = bestRun.Model;
                var          predictions  = trainedModel.Transform(testData);
                var          metrics      = mlContext.MulticlassClassification.Evaluate(data: predictions, labelColumnName: "Number", scoreColumnName: "Score");
                ConsoleHelper.PrintMulticlassClassificationMetrics(bestRun.TrainerName, metrics);

                // STEP 5: Save/persist the trained model to a .ZIP file
                mlContext.Model.Save(trainedModel, trainData.Schema, ModelPath);

                Console.WriteLine("The model is saved to {0}", ModelPath);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }
        }
示例#14
0
        public void Start()
        {
            //Infer columns and load train data
            var columnInferenceResult = mlContext.Auto().InferColumns(
                path: TRAIN_DATA_FILEPATH,
                labelColumnName: "next",
                groupColumns: false);

            TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions);

            trainData = textLoader.Load(TRAIN_DATA_FILEPATH);

            //Modify infered columns information
            columnInformation = columnInferenceResult.ColumnInformation;

            columnInformation.CategoricalColumnNames.Add("productId");
            columnInformation.NumericColumnNames.Remove("productId");

            columnInformation.CategoricalColumnNames.Add("year");
            columnInformation.NumericColumnNames.Remove("year");

            columnInformation.NumericColumnNames.Remove("units");
            columnInformation.IgnoredColumnNames.Add("units");


            var experimentSettings = new RegressionExperimentSettings()
            {
                MaxExperimentTimeInSeconds = 10,
                OptimizingMetric           = RegressionMetric.RootMeanSquaredError,
                CacheDirectory             = new DirectoryInfo(CACHE_DIRECTORY),
                CancellationToken          = cancelationTokenSource.Token
            };

            //Exclude trainers from experiment
            experimentSettings.Trainers.Remove(RegressionTrainer.Ols);

            RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(
                trainData: trainData,
                columnInformation: columnInformation,
                progressHandler: new RegressionProgressHandler(),
                preFeaturizer: null);

            ITransformer model = experimentResult.BestRun.Model;
            IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator;

            //Make batch predictions
            IDataView predictionsDataView = model.Transform(trainData);

            PrintPredictions(predictionsDataView);
            PrintPredictionsEnumerable(predictionsDataView);


            model = estimator.Fit(trainData);
            mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH);
            Console.WriteLine("Done");
        }
示例#15
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration

            /* contents of csv file
             * vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount
             * VTS,1,1,1140,3.75,CRD,15.5
             * VTS,1,1,480,2.72,CRD,10.0
             * VTS,1,1,1680,7.8,CSH,26.5
             * VTS,1,1,600,4.73,CSH,14.5
             */

            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
            IDataView testDataView     = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

            // Display first few rows of the training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, trainingDataView);

            // STEP 2: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new RegressionExperimentProgressHandler();

            // STEP 3: Run AutoML regression experiment
            ConsoleHelper.ConsoleWriteHeader("=============== Training the model ===============");
            Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(ExperimentTime)
                                                                    .Execute(trainingDataView, LabelColumnName, progressHandler: progressHandler);

            // Print top models found by AutoML
            Console.WriteLine();
            PrintTopModels(experimentResult);

            // STEP 4: Evaluate the model and print metrics

            ConsoleHelper.ConsoleWriteHeader("===== Evaluating model's accuracy with test data =====");
            RunDetail <RegressionMetrics> best = experimentResult.BestRun;
            ITransformer trainedModel          = best.Model;
            IDataView    predictions           = trainedModel.Transform(testDataView);
            var          metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score");

            // Print metrics from top model
            ConsoleHelper.PrintRegressionMetrics(best.TrainerName, metrics);

            // STEP 5: Save/persist the trained model - convonnx


            using (var stream = File.Create(MODEL_NAME))
            {
                mlContext.Model.ConvertToOnnx(trainedModel, trainingDataView, stream);
            }
            Console.WriteLine("The model is saved to {0}", MODEL_NAME);


            return(trainedModel);
        }
示例#16
0
        /// <summary>
        /// Re-fit best pipeline on all available data.
        /// </summary>
        private static ITransformer RefitBestPipeline(MLContext mlContext, ExperimentResult <RegressionMetrics> experimentResult,
                                                      ColumnInferenceResults columnInference)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
            var textLoader       = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var combinedDataView = textLoader.Load(new MultiFileSource(TrainDataPath, TestDataPath));
            RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun;

            return(bestRun.Estimator.Fit(combinedDataView));
        }
        public static void Run()
        {
            MLContext mlContext = new MLContext();

            // STEP 1: Load data
            IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
            IDataView testDataView  = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

            // STEP 2: Run AutoML experiment
            Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRegressionExperiment(ExperimentTime)
                                                                    .Execute(trainDataView, LabelColumnName);

            // STEP 3: Print metric from best model
            RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun;

            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
            Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
            Console.WriteLine($"Metrics of best model from validation data --");
            PrintMetrics(bestRun.ValidationMetrics);

            // STEP 5: Evaluate test data
            IDataView         testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
            RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);

            Console.WriteLine($"Metrics of best model on test data --");
            PrintMetrics(testMetrics);

            // STEP 6: Save the best model for later deployment and inferencing
            using (FileStream fs = File.Create(ModelPath))
                mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

            // STEP 7: Create prediction engine from the best trained model
            var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);

            // STEP 8: Initialize a new test taxi trip, and get the predicted fare
            var testTaxiTrip = new TaxiTrip
            {
                VendorId          = "VTS",
                RateCode          = 1,
                PassengerCount    = 1,
                TripTimeInSeconds = 1140,
                TripDistance      = 3.75f,
                PaymentType       = "CRD"
            };
            var prediction = predictionEngine.Predict(testTaxiTrip);

            Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }
示例#18
0
        public void AutoFitRecommendationTest()
        {
            // Specific column names of the considered data set
            string    labelColumnName = "Label";
            string    userColumnName  = "User";
            string    itemColumnName  = "Item";
            string    scoreColumnName = "Score";
            MLContext mlContext       = new MLContext();

            // STEP 1: Load data
            var reader        = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName));
            var trainDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));
            var testDataView  = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));

            // STEP 2: Run AutoML experiment
            ExperimentResult <RegressionMetrics> experimentResult = mlContext.Auto()
                                                                    .CreateRecommendationExperiment(5)
                                                                    .Execute(trainDataView, testDataView,
                                                                             new ColumnInformation()
            {
                LabelColumnName  = labelColumnName,
                UserIdColumnName = userColumnName,
                ItemIdColumnName = itemColumnName
            });

            RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun;

            Assert.True(experimentResult.RunDetails.Count() > 1);
            Assert.NotNull(bestRun.ValidationMetrics);
            Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.RSquared != 0));

            var outputSchema        = bestRun.Model.GetOutputSchema(trainDataView.Schema);
            var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName };

            foreach (var col in outputSchema)
            {
                Assert.True(col.Name == expectedOutputNames[col.Index]);
            }

            IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);

            // Retrieve label column's index from the test IDataView
            testDataView.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId);
            // Retrieve score column's index from the IDataView produced by the trained model
            testDataViewWithBestScore.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId);

            var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName);

            Assert.NotEqual(0, metrices.MeanSquaredError);
        }
        /// <summary>
        /// Print top models from AutoML experiment.
        /// </summary>
        public static void PrintTopModels(ExperimentResult <MulticlassClassificationMetrics> experimentResult)
        {
            // Get top few runs ranked by accuracy
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy))
                          .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3);

            PrintMulticlassClassificationMetricsHeader();
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds);
            }
        }
示例#20
0
        public ExperimentResult RunExperiment()
        {
            MultithreadedAlgorithmRunner runner = new MultithreadedAlgorithmRunner();
            ExperimentResult             result = new ExperimentResult();

            runner.StartParallelRunner(this);

            foreach (var experimentProblem in ExperimentProblems)
            {
                foreach (var algorithmDictionary in experimentProblem.AlgorithmDictionary)
                {
                    var algorithmList = algorithmDictionary.Value;

                    foreach (var algorithm in algorithmList)
                    {
                        runner.AddTaskForExecution(new object[] { algorithm });
                    }
                }
            }

            //run algorithms
            long initTime = Environment.TickCount;

            runner.ParallelExecution();
            long elapsedTime = Environment.TickCount - initTime;

            result.ElapsedTime = elapsedTime;

            //process results
            result.Indicators = new QualityIndicatorTables(this).GetIndicators();

            if (this.GenerateQualityTables)
            {
                result.LatexPath     = Path.Combine(this.ExperimentBaseDirectory, this.Name + ".tex");
                result.QualityTables = new LatexTables(this, result.LatexPath, result.Indicators).Generate();
            }

            if (this.GenerateBoxPlots)
            {
                result.BoxPlots = new BoxPlots(this, result.Indicators).Generate();
            }

            if (this.GenerateFriedmanTables)
            {
                new FriedmanTables(this, this.ExperimentBaseDirectory, result.Indicators).generate();
            }

            return(result);
        }
示例#21
0
        public static void CreateExperiment()
        {
            // Load Data
            var       tmpPath          = GetAbsolutePath(TRAIN_DATA_FILEPATH);
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                path: tmpPath,
                hasHeader: true,
                separatorChar: '\t',
                allowQuoting: true,
                allowSparse: false);

            IDataView testDataView = mlContext.Data.BootstrapSample(trainingDataView);

            // STEP 2: Run AutoML experiment
            Console.WriteLine($"Running AutoML Multiclass classification experiment for {ExperimentTime} seconds...");
            //ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
            //    .CreateMulticlassClassificationExperiment(ExperimentTime)
            //    .Execute(trainingDataView, labelColumnName: "reservation_status");
            MulticlassClassificationExperiment experiment = mlContext.Auto()
                                                            .CreateMulticlassClassificationExperiment(ExperimentTime);
            ExperimentResult <MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "reservation_status");

            // STEP 3: Print metric from the best model
            RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;

            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
            Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
            Console.WriteLine($"Metrics of best model from validation data --");
            PrintMulticlassClassificationMetrics(bestRun.ValidationMetrics);

            // STEP 4: Evaluate test data
            IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);

            try
            {
                var testMetrics = mlContext.MulticlassClassification.CrossValidate(testDataViewWithBestScore, bestRun.Estimator, numberOfFolds: 5, labelColumnName: "reservation_status");
                Console.WriteLine($"Metrics of best model on test data --");
                PrintMulticlassClassificationFoldsAverageMetrics(testMetrics);
            }
            catch
            {
                Console.WriteLine($"Metrics not supported in this version");
            }


            // Save model
            tmpPath = GetAbsolutePath(MODEL_FILEPATH2);
            SaveModel(mlContext, bestRun.Model, tmpPath, trainingDataView.Schema);
        }
示例#22
0
        /// <summary>
        /// Print top models from AutoML experiment.
        /// </summary>
        private static void PrintTopModels(ExperimentResult <RegressionMetrics> experimentResult)
        {
            // Get top few runs ranked by root mean squared error.
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.RootMeanSquaredError))
                          .OrderBy(r => r.ValidationMetrics.RootMeanSquaredError).Take(3);

            Console.WriteLine("Top models ranked by root mean squared error --");
            ConsoleHelper.PrintRegressionMetricsHeader();
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds);
            }
        }
        /// <summary>
        /// Prints top models from AutoML experiment.
        /// </summary>
        private static void PrintTopModels(ExperimentResult <BinaryClassificationMetrics> experimentResult)
        {
            // Get top few runs ranked by accuracy
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.Accuracy))
                          .OrderByDescending(r => r.ValidationMetrics.Accuracy).Take(3);

            Console.WriteLine("Top models ranked by accuracy --");
            ConsoleHelper.PrintBinaryClassificationMetricsHeader();
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds);
            }
        }
示例#24
0
        public static ITransformer Retrain(ExperimentResult <MulticlassClassificationMetrics> experimentResult,
                                           string trainerName, MultiFileSource multiFileSource, string dataPath, string modelPath, TextLoader textLoader, MLContext mlContext)
        {
            var dataView = textLoader.Load(dataPath);

            ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
            var combinedDataView = textLoader.Load(multiFileSource);

            var bestRun    = experimentResult.BestRun;
            var refitModel = bestRun.Estimator.Fit(combinedDataView);

            EvaluateTrainedModelAndPrintMetrics(mlContext, refitModel, trainerName, dataView);
            SaveModel(mlContext, refitModel, modelPath, dataView);
            return(refitModel);
        }
        private static void PrintTopModels(ExperimentResult <RankingMetrics> experimentResult, uint optimizationMetricTruncationLevel)
        {
            // Get top few runs ordered by NDCG
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1]))
                          .OrderByDescending(r => r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1]).Take(5);

            Console.WriteLine($"Top models ordered by NDCG@{optimizationMetricTruncationLevel}");
            ConsoleHelper.PrintRankingMetricsHeader();
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds);
            }
        }
示例#26
0
        public static void AutoML(MLContext mlContext, IEstimator <ITransformer> pipeline, IDataView file, Progress <RunDetail <BinaryClassificationMetrics> > progress, BinaryExperimentSettings settings)
        {
            var transdata = pipeline.Fit(file).Transform(file);
            ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto()
                                                                              .CreateBinaryClassificationExperiment(settings)
                                                                              .Execute(trainData: transdata, labelColumnName: nameof(transformOutput.Label), progressHandler: progress);

            Console.WriteLine();
            Console.WriteLine($"Trainername- {experimentResult.BestRun.TrainerName}");
            Console.WriteLine($"Accuracy- {experimentResult.BestRun.ValidationMetrics.Accuracy}");
            Console.WriteLine($"AreaUnderRocCurve- {experimentResult.BestRun.ValidationMetrics.AreaUnderRocCurve}");
            Console.WriteLine();
            var model = experimentResult.BestRun.Model as TransformerChain <ITransformer>;
            //mlContext.Model.Save(model, transdata.Schema, @"C:\Users\ludwi\source\repos\JugendForscht");
        }
示例#27
0
        public static void Print(this ExperimentResult <RegressionMetrics> result)
        {
            Console.WriteLine("Regession metric...");
            var details = result.RunDetails.ToList();

            Console.WriteLine("details");
            foreach (var detail in details)
            {
                Console.WriteLine(detail);
            }
            Console.WriteLine("best run");
            Console.WriteLine(result.BestRun);
            Console.WriteLine("trainerName " + result.BestRun.TrainerName);
            Console.WriteLine("validationMetrics " + result.BestRun.ValidationMetrics);
        }
示例#28
0
        private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
                                                                                               new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type");

            // STEP 3: Customize column information returned by InferColumns API
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation.CategoricalColumnNames.Remove("payment_type");
            columnInformation.IgnoredColumnNames.Add("payment_type");

            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML regression experiment
            var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML
            PrintTopModels(experimentResult);

            return(experimentResult);
        }
示例#29
0
        public static void Run()
        {
            MLContext mlContext = new MLContext();

            // STEP 1: Load data
            IDataView trainDataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(TrainDataPath, hasHeader: true);
            IDataView testDataView  = mlContext.Data.LoadFromTextFile <SentimentIssue>(TestDataPath, hasHeader: true);

            // STEP 2: Run AutoML experiment
            Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
            ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto()
                                                                              .CreateBinaryClassificationExperiment(ExperimentTime)
                                                                              .Execute(trainDataView);

            // STEP 3: Print metric from the best model
            RunDetail <BinaryClassificationMetrics> bestRun = experimentResult.BestRun;

            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
            Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
            Console.WriteLine($"Metrics of best model from validation data --");
            PrintMetrics(bestRun.ValidationMetrics);

            // STEP 4: Evaluate test data
            IDataView testDataViewWithBestScore     = bestRun.Model.Transform(testDataView);
            BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore);

            Console.WriteLine($"Metrics of best model on test data --");
            PrintMetrics(testMetrics);

            // STEP 5: Save the best model for later deployment and inferencing
            using (FileStream fs = File.Create(ModelPath))
                mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

            // STEP 6: Create prediction engine from the best trained model
            var predictionEngine = mlContext.Model.CreatePredictionEngine <SentimentIssue, SentimentPrediction>(bestRun.Model);

            // STEP 7: Initialize a new sentiment issue, and get the predicted sentiment
            var testSentimentIssue = new SentimentIssue
            {
                Text = "I hope this helps."
            };
            var prediction = predictionEngine.Predict(testSentimentIssue);

            Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}");

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }
示例#30
0
            public static void Run()
            {
                MLContext mlContext = new MLContext();

                // STEP 1: Load data
                IDataView trainDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, separatorChar: ',', hasHeader: true);
                IDataView testDataView  = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, separatorChar: ',', hasHeader: true);

                // STEP 2: Run AutoML experiment
                Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
                ExperimentResult <MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
                                                                                      .CreateMulticlassClassificationExperiment(ExperimentTime)
                                                                                      .Execute(trainDataView, LabelColumnName);

                // STEP 3: Print metric from the best model
                RunDetail <MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;

                Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
                Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
                Console.WriteLine($"Metrics of best model from validation data --");
                PrintMetrics(bestRun.ValidationMetrics);

                // STEP 6: Create prediction engine from the best trained model
                var predictionEngine = mlContext.Model.CreatePredictionEngine <TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);

                // STEP 7: Initialize new pixel data, and get the predicted number
                var testTaxiTripData = new TaxiTrip
                {
                    VendorId          = "CMT",
                    RateCode          = 1,
                    PassengerCount    = 1,
                    TripTimeInSeconds = 1271,
                    TripDistance      = 3.8F,
                    PaymentType       = "CRD",
                    FareAmount        = 17.5F,
                };
                var prediction = predictionEngine.Predict(testTaxiTripData);

                Console.WriteLine($"Predicted number for test data:");

                foreach (var x in prediction.Payment)
                {
                    Console.WriteLine(x.ToString());
                }

                Console.WriteLine("Press any key to continue...");
                Console.ReadKey();
            }