예제 #1
0
        static void Main(string[] args)
        {
            var csvPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), "tasks.csv");

            var context = new MLContext();

            var data = context.Data.LoadFromTextFile <TaskInput>(csvPath, hasHeader: true, separatorChar: ',');

            var settings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 600,
                OptimizingMetric           = MulticlassClassificationMetric.LogLoss
            };

            var experiment = context.Auto().CreateMulticlassClassificationExperiment(settings);

            var result = experiment.Execute(data, new ColumnInformation {
                LabelColumnName = "Tags"
            });

            var bestModel = result.BestRun.Model;

            var predictionEngine = context.Model.CreatePredictionEngine <TaskInput, TaskOutput>(bestModel);

            var prediction = predictionEngine.Predict(new TaskInput {
                TaskName = "Introduction to ML.NET"
            });

            Console.WriteLine($"Predicted label - {prediction.PredictedLabel}");

            context.Model.Save(bestModel, data.Schema, "./clickup-model.zip");
        }
예제 #2
0
        public static ExperimentResult <MulticlassClassificationMetrics> RunAutoMLExperiment(
            MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings,
            MulticlassExperimentProgressHandler progressHandler, IDataView dataView)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Trace.WriteLine($"Running AutoML multiclass classification experiment for {experimentSettings.MaxExperimentTimeInSeconds} seconds...");
            var experimentResult = mlContext.Auto()
                                   .CreateMulticlassClassificationExperiment(experimentSettings)
                                   .Execute(dataView, labelColumnName, progressHandler: progressHandler);

            Trace.WriteLine(Environment.NewLine);
            Trace.WriteLine($"num models created: {experimentResult.RunDetails.Count()}");

            // Get top few runs ranked by accuracy
            var topRuns = experimentResult.RunDetails
                          .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy))
                          .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3);

            Trace.WriteLine("Top models ranked by accuracy --");
            CreateRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width);
            for (var i = 0; i < topRuns.Count(); i++)
            {
                var run = topRuns.ElementAt(i);
                CreateRow($"{i,-4} {run.TrainerName,-35} {run.ValidationMetrics?.MicroAccuracy ?? double.NaN,14:F4} {run.ValidationMetrics?.MacroAccuracy ?? double.NaN,14:F4} {run.RuntimeInSeconds,9:F1}", Width);
            }
            return(experimentResult);
        }
예제 #3
0
        public static (ColumnInferenceResults columnInference, MulticlassExperimentSettings experimentSettings) SetupExperiment(
            MLContext mlContext, ExperimentModifier st, DataFilePaths paths, bool forPrs)
        {
            var columnInference   = InferColumns(mlContext, paths.TrainPath, st.LabelColumnName);
            var columnInformation = columnInference.ColumnInformation;

            st.ColumnSetup(columnInformation, forPrs);

            var experimentSettings = new MulticlassExperimentSettings();

            st.TrainerSetup(experimentSettings.Trainers);
            experimentSettings.MaxExperimentTimeInSeconds = st.ExperimentTime;

            var cts = new System.Threading.CancellationTokenSource();

            experimentSettings.CancellationToken = cts.Token;

            // Set the cache directory to null.
            // This will cause all models produced by AutoML to be kept in memory
            // instead of written to disk after each run, as AutoML is training.
            // (Please note: for an experiment on a large dataset, opting to keep all
            // models trained by AutoML in memory could cause your system to run out
            // of memory.)
            experimentSettings.CacheDirectory   = new DirectoryInfo(Path.GetTempPath());
            experimentSettings.OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy;
            return(columnInference, experimentSettings);
        }
예제 #4
0
        public static void DoAutoML()
        {
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 10;

            MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }));

            ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline);
            var metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}");
            Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}");

            // Save model
            SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema);
        }
        static void Main(string[] args)
        {
            // Define source data directory paths
            string solutionDirectory = "/home/lqdev/Development/RestaurantInspectionsSparkMLNET";
            string dataLocation      = Path.Combine(solutionDirectory, "RestaurantInspectionsETL", "Output");

            // Initialize MLContext
            MLContext mlContext = new MLContext();

            // Get directory name of most recent ETL output
            var latestOutput =
                Directory
                .GetDirectories(dataLocation)
                .Select(directory => new DirectoryInfo(directory))
                .OrderBy(directoryInfo => directoryInfo.Name)
                .Select(directory => Path.Join(directory.FullName, "Graded"))
                .First();

            var dataFilePaths =
                Directory
                .GetFiles(latestOutput)
                .Where(file => file.EndsWith("csv"))
                .ToArray();

            // Load the data
            var       dataLoader = mlContext.Data.CreateTextLoader <ModelInput>(separatorChar: ',', hasHeader: false, allowQuoting: true, trimWhitespace: true);
            IDataView data       = dataLoader.Load(dataFilePaths);

            // Split the data
            TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
            IDataView     trainData = dataSplit.TrainSet;
            IDataView     testData  = dataSplit.TestSet;

            // Define experiment settings
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 600;
            experimentSettings.OptimizingMetric           = MulticlassClassificationMetric.LogLoss;

            // Create experiment
            var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            // Run experiment
            var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler());

            // Best Run Results
            var bestModel = experimentResults.BestRun.Model;

            // Evaluate Model
            IDataView scoredTestData = bestModel.Transform(testData);
            var       metrics        = mlContext.MulticlassClassification.Evaluate(scoredTestData);

            Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}");

            // Save Model
            string modelSavePath = Path.Join(solutionDirectory, "RestaurantInspectionsML", "model.zip");

            mlContext.Model.Save(bestModel, data.Schema, modelSavePath);
        }
        public static void Run([BlobTrigger("clickup/{name}", Connection = "AzureWebJobsStorage")] Stream myBlob, string name, ILogger log, ExecutionContext context)
        {
            var blobData = string.Empty;
            var config   = new ConfigurationBuilder()
                           .SetBasePath(context.FunctionAppDirectory)
                           .AddJsonFile("local.settings.json", optional: true, reloadOnChange: true)
                           .AddEnvironmentVariables()
                           .Build();

            var blobConnection = config.GetSection("AzureWebJobsStorage");

            var mlContext = new MLContext();

            using (var reader = new StreamReader(myBlob))
            {
                blobData = reader.ReadToEnd();
            }

            var parsedData = blobData
                             .Split("\r\n")
                             .Skip(1)
                             .Select(line => line.Split(','))
                             .TakeWhile(row => !string.IsNullOrWhiteSpace(row[0]))
                             .Select(row => new TaskInput
            {
                TaskName = row[0],
                Tags     = row[1]
            });

            var data = mlContext.Data.LoadFromEnumerable(parsedData);

            var settings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 600,
                OptimizingMetric           = MulticlassClassificationMetric.LogLoss
            };

            var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(settings);

            var result = experiment.Execute(data, new ColumnInformation {
                LabelColumnName = "Tags"
            });

            var bestModel = result.BestRun.Model;

            mlContext.Model.Save(bestModel, data.Schema, "./clickup-model.zip");

            var storage = CloudStorageAccount.Parse(blobConnection.Value);

            var storageClient = storage.CreateCloudBlobClient();

            var container = storageClient.GetContainerReference("models");

            var modelRef = container.GetBlockBlobReference("clickup-model.zip");

            modelRef.UploadFromFile("clickup-model.zip");
        }
예제 #7
0
        public static ExperimentResult <MulticlassClassificationMetrics> Train(
            MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings,
            MulticlassExperimentProgressHandler progressHandler, DataFilePaths paths, TextLoader textLoader)
        {
            var trainData        = textLoader.Load(paths.TrainPath);
            var validateData     = textLoader.Load(paths.ValidatePath);
            var experimentResult = RunAutoMLExperiment(mlContext, labelColumnName, experimentSettings, progressHandler, trainData);

            EvaluateTrainedModelAndPrintMetrics(mlContext, experimentResult.BestRun.Model, experimentResult.BestRun.TrainerName, validateData);
            SaveModel(mlContext, experimentResult.BestRun.Model, paths.ModelPath, trainData);
            return(experimentResult);
        }
        public void SetUpExperiment()
        {
            var settings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 180,
                OptimizingMetric           = MulticlassClassificationMetric.LogLoss,
                CacheDirectory             = null
            };

            // These two trainers yield no metrics in UWP:
            settings.Trainers.Remove(MulticlassClassificationTrainer.FastTreeOva);
            settings.Trainers.Remove(MulticlassClassificationTrainer.FastForestOva);

            _experiment = MLContext.Auto().CreateMulticlassClassificationExperiment(settings);
        }
        private static void FindTheBestModel()
        {
            Console.WriteLine("Finding the best model using AutoML");

            var mlContext = new MLContext(seed: 0);

            var trainingDataPath = "Data\\uci-news-aggregator.csv";

            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                trainingDataPath,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true);

            var preProcessingPipeline = mlContext.Transforms.Conversion
                                        .MapValueToKey(inputColumnName: "Category", outputColumnName: "Category");

            var mappedInputData = preProcessingPipeline.Fit(trainingDataView).Transform(trainingDataView);

            var experimentSettings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 300,
                CacheBeforeTrainer         = CacheBeforeTrainer.On,
                OptimizingMetric           = MulticlassClassificationMetric.MicroAccuracy,
                CacheDirectory             = null
            };

            var experiment =
                mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            Console.WriteLine("Starting experiments");

            var experimentResult =
                experiment.Execute(
                    trainData: mappedInputData,
                    labelColumnName: "Category",
                    progressHandler: new MulticlassExperimentProgressHandler()
                    );

            Console.WriteLine("Metrics from best run:");

            var metrics = experimentResult.BestRun.ValidationMetrics;

            Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy:0.##}");
            Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy:0.##}");
        }
        public void HyperParameterize()
        {
            var settings = new MulticlassExperimentSettings
            {
                MaxExperimentTimeInSeconds = 180,
                OptimizingMetric           = MulticlassClassificationMetric.LogLoss,
                CacheDirectory             = null
            };

            // There can be only one.
            settings.Trainers.Clear();

            // It's hard to discover its parameters.
            // And there's a bug in 1.3.1 ...
            // settings.Trainers.Add(MulticlassClassificationTrainer.LightGbm);

            // This one's easier:
            settings.Trainers.Add(MulticlassClassificationTrainer.LbfgsMaximumEntropy);

            var experiment = MLContext.Auto().CreateMulticlassClassificationExperiment(settings);

            var result = experiment.Execute(
                trainData: _trainingDataView,
                labelColumnName: "Label",
                progressHandler: this);

            var model = result.BestRun.Model as TransformerChain <ITransformer>;

            var    storageFolder = ApplicationData.Current.LocalFolder;
            string modelPath     = Path.Combine(storageFolder.Path, "Automation.zip");

            MLContext.Model.Save(
                model: model,
                inputSchema: null,
                filePath: modelPath);

            var x = model.First();
            var singleFeaturePredictor = model.First() as TransformerChain <IPredictionTransformer <object> >;
            /// var multiclassPredictor = singleFeaturePredictor.LastTransformer as MulticlassPredictionTransformer<OneVersusAllModelParameters>;
            // When using MulticlassClassificationTrainer.LbfgsMaximumEntropy:
            var multiclassPredictor = singleFeaturePredictor.LastTransformer as MulticlassPredictionTransformer <MaximumEntropyModelParameters>;
            var algorithm           = multiclassPredictor.Model;
            // ... and the rest is not publicly exposed.
            // So it's breakpoint time.
        }
예제 #11
0
        public ITransformer AutoTrain(IEnumerable <Transaction> trainingData, uint maxTimeInSec)
        {
            _trainingDataView = _mlContext.Data.LoadFromEnumerable(trainingData);

            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = maxTimeInSec;
            experimentSettings.OptimizingMetric           = MulticlassClassificationMetric.MacroAccuracy;

            var experiment = _mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);
            var columnInfo = new ColumnInformation
            {
                LabelColumnName = nameof(Transaction.Category)
            };

            columnInfo.TextColumnNames.Add(nameof(Transaction.Description));

            var result = experiment.Execute(_trainingDataView, columnInfo);

            return(result.BestRun.Model);
        }
예제 #12
0
        public static async Task RunExperiment()
        {
            // 1. Create MLContext
            MLContext ctx = new MLContext();

            // 2. Load data
            IDataView data = ctx.Data.LoadFromTextFile <IrisData>("Data/iris.data", separatorChar: ',');

            // 3. Define Automated ML.NET experiment settings
            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 30;
            experimentSettings.OptimizingMetric           = MulticlassClassificationMetric.LogLoss;

            // 4. Create Automated ML.NET
            var experiment = ctx.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            // 5. Create experiment in MLFlow
            var experimentName    = Guid.NewGuid().ToString();
            var experimentRequest = await _mlFlowService.GetOrCreateExperiment(experimentName);

            // 6. Run Automated ML.NET experiment
            var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler());

            // 7. Log Best Run
            LogRun(experimentRequest.ExperimentId, experimentResults);

            string savePath  = Path.Join("MLModels", $"{experimentName}");
            string modelPath = Path.Join(savePath, "model.zip");

            if (!Directory.Exists(savePath))
            {
                Directory.CreateDirectory(savePath);
            }

            // 8. Save Best Trained Model
            ctx.Model.Save(experimentResults.BestRun.Model, data.Schema, modelPath);
        }
예제 #13
0
        private static void FindTheBestModel()
        {
            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\nFinding the Best Model Using AutoML");
            var    mlContext      = new MLContext(0);
            string trainDataPath  = @"Data\uci-news-aggregator.csv";
            string trainCachePath = @"Cache\";
            string bestModelPath  = @"Model\BestModelRun.zip";
            var    trainDataView  = mlContext.Data.LoadFromTextFile <ModelInput>(
                trainDataPath,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true
                );
            var preProcessingPipeline = mlContext.Transforms
                                        .Conversion.MapValueToKey("Category", "Category");
            var mappedInputData = preProcessingPipeline
                                  .Fit(trainDataView).Transform(trainDataView);
            var experimentSetting = new MulticlassExperimentSettings()
            {
                MaxExperimentTimeInSeconds = 300,
                CacheBeforeTrainer         = CacheBeforeTrainer.On,
                OptimizingMetric           = MulticlassClassificationMetric.MicroAccuracy,
                CacheDirectory             = new DirectoryInfo(trainCachePath)
            };
            var experiment       = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSetting);
            var experimentResult = experiment.Execute(
                trainData: mappedInputData,
                labelColumnName: "Category",
                progressHandler: new MulticlassExperimentProgressHandler()
                );

            BCCConsole.Write(BCCConsoleColor.Yellow, false, "Metrics From Best Run ... ");
            var metrics = experimentResult.BestRun.ValidationMetrics;

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, $"Metric Micro Accuracy : {metrics.MicroAccuracy:0.##}");
            BCCConsole.Write(BCCConsoleColor.Green, false, "Success !");
        }
예제 #14
0
        static void Main(string[] args)
        {
            var mlContext     = new MLContext();
            var trainDataPath = Path.Combine(Environment.CurrentDirectory, "..", "..", "..", "RawData", "SMSSpamCollection");

            // Load data from text file
            var data = mlContext.Data.LoadFromTextFile <SpamInput>(path: trainDataPath);

            #region ExperimentSettings

            //Set AutoML experiment settings
            Console.WriteLine("Creating experiment settings");
            var settings = new MulticlassExperimentSettings()
            {
                OptimizingMetric           = MulticlassClassificationMetric.MicroAccuracy,
                MaxExperimentTimeInSeconds = 20
            };
            settings.Trainers.Remove(MulticlassClassificationTrainer.FastForestOva);

            #endregion

            #region Experiment!

            // Start Experiment
            Console.WriteLine("Starting the experiment");
            var experiment = mlContext
                             .Auto()
                             .CreateMulticlassClassificationExperiment(20)
                             .Execute(data, progressHandler: Progress);

            Console.WriteLine($"Winner: {experiment.BestRun.TrainerName}");

            #endregion

            Helpers.OutputMultiClassMetrics(experiment.BestRun.Model, data, mlContext);
        }
예제 #15
0
        public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log)
        {
            var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());

            try
            {
                db.BeginTransaction();

                MLContext context = new MLContext();

                TrainInput input = null;

                using (StreamReader reader = new StreamReader(req.Body))
                {
                    input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd());
                }

                File.WriteAllText(dataFilePath, input.Data);

                IDataView LoadedData = null;

                var columnData = new List <TextLoader.Column>();
                foreach (var c in input.Columns)
                {
                    //data type 1 is for ignore
                    if (c.Type != 1)
                    {
                        var newColData = new TextLoader.Column()
                        {
                            DataKind = (DataKind)c.Type,
                            Name     = c.ColumnName,
                            Source   = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) }
                        };

                        columnData.Add(newColData);
                    }
                }

                LoadedData = context.Data.LoadFromTextFile(
                    dataFilePath,
                    columnData.ToArray(),
                    separatorChar: input.Separator,
                    hasHeader: input.HasHeaders,
                    allowQuoting: true
                    );

                LoadedData = context.Data.ShuffleRows(LoadedData);

                /*
                 * Multiclass will be used in the case of binary experiments and multiclass experiments.
                 * This is because multiclass can accept all types as an output column. This will
                 * allow less interaction with the user and a better user experience.
                 */

                double       bestRunMetric = 0;
                ITransformer bestModel     = null;

                if (input.ModelType == TrainInput.ModelTypes.Multiclass)
                {
                    ExperimentResult <MulticlassClassificationMetrics> Results = null;
                    var settings = new MulticlassExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateMulticlassClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Binary)
                {
                    ExperimentResult <BinaryClassificationMetrics> Results = null;
                    var settings = new BinaryExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateBinaryClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Regression)
                {
                    ExperimentResult <RegressionMetrics> Results = null;
                    var settings = new RegressionExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateRegressionExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.RSquared;
                    bestModel     = Results.BestRun.Model;
                    if (bestRunMetric < 0)
                    {
                        bestRunMetric = 0;
                    }
                }
                else
                {
                    throw new Exception("Invalid model type");
                }


                var modelFileId = 0;

                using (MemoryStream ms = new MemoryStream())
                {
                    context.Model.Save(bestModel, LoadedData.Schema, ms);
                    //Save model to the database
                    FileStore modelSave = new FileStore()
                    {
                        Data = ms.ToArray()
                    };

                    modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId;
                }

                var resultModel = new Model()
                {
                    FileStoreId = modelFileId,
                    Accuracy    = bestRunMetric,
                    Rows        = input.Data.Trim().Split('\n').Length
                };

                db.CompleteTransaction();

                return(new ReturnResult <Model>()
                {
                    Success = true,
                    Item = resultModel
                });
            }
            catch (Exception e)
            {
                db.AbortTransaction();
                log.LogError(e.Message);
                return(new ReturnResult <Model>()
                {
                    Success = false,
                    Exception = e
                });
            }
        }