static void Main(string[] args) { var csvPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), "tasks.csv"); var context = new MLContext(); var data = context.Data.LoadFromTextFile <TaskInput>(csvPath, hasHeader: true, separatorChar: ','); var settings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 600, OptimizingMetric = MulticlassClassificationMetric.LogLoss }; var experiment = context.Auto().CreateMulticlassClassificationExperiment(settings); var result = experiment.Execute(data, new ColumnInformation { LabelColumnName = "Tags" }); var bestModel = result.BestRun.Model; var predictionEngine = context.Model.CreatePredictionEngine <TaskInput, TaskOutput>(bestModel); var prediction = predictionEngine.Predict(new TaskInput { TaskName = "Introduction to ML.NET" }); Console.WriteLine($"Predicted label - {prediction.PredictedLabel}"); context.Model.Save(bestModel, data.Schema, "./clickup-model.zip"); }
public static ExperimentResult <MulticlassClassificationMetrics> RunAutoMLExperiment( MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings, MulticlassExperimentProgressHandler progressHandler, IDataView dataView) { ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Trace.WriteLine($"Running AutoML multiclass classification experiment for {experimentSettings.MaxExperimentTimeInSeconds} seconds..."); var experimentResult = mlContext.Auto() .CreateMulticlassClassificationExperiment(experimentSettings) .Execute(dataView, labelColumnName, progressHandler: progressHandler); Trace.WriteLine(Environment.NewLine); Trace.WriteLine($"num models created: {experimentResult.RunDetails.Count()}"); // Get top few runs ranked by accuracy var topRuns = experimentResult.RunDetails .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy)) .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy).Take(3); Trace.WriteLine("Top models ranked by accuracy --"); CreateRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width); for (var i = 0; i < topRuns.Count(); i++) { var run = topRuns.ElementAt(i); CreateRow($"{i,-4} {run.TrainerName,-35} {run.ValidationMetrics?.MicroAccuracy ?? double.NaN,14:F4} {run.ValidationMetrics?.MacroAccuracy ?? double.NaN,14:F4} {run.RuntimeInSeconds,9:F1}", Width); } return(experimentResult); }
public static (ColumnInferenceResults columnInference, MulticlassExperimentSettings experimentSettings) SetupExperiment( MLContext mlContext, ExperimentModifier st, DataFilePaths paths, bool forPrs) { var columnInference = InferColumns(mlContext, paths.TrainPath, st.LabelColumnName); var columnInformation = columnInference.ColumnInformation; st.ColumnSetup(columnInformation, forPrs); var experimentSettings = new MulticlassExperimentSettings(); st.TrainerSetup(experimentSettings.Trainers); experimentSettings.MaxExperimentTimeInSeconds = st.ExperimentTime; var cts = new System.Threading.CancellationTokenSource(); experimentSettings.CancellationToken = cts.Token; // Set the cache directory to null. // This will cause all models produced by AutoML to be kept in memory // instead of written to disk after each run, as AutoML is training. // (Please note: for an experiment on a large dataset, opting to keep all // models trained by AutoML in memory could cause your system to run out // of memory.) experimentSettings.CacheDirectory = new DirectoryInfo(Path.GetTempPath()); experimentSettings.OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy; return(columnInference, experimentSettings); }
public static void DoAutoML() { // Load Data IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( path: TRAIN_DATA_FILEPATH, hasHeader: true, separatorChar: ',', allowQuoting: true, allowSparse: false); var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 10; MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") }) .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" })); ExperimentResult <Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName: "Saving", preFeaturizer: dataProcessPipeline); var metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}"); Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}"); // Save model SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema); }
static void Main(string[] args) { // Define source data directory paths string solutionDirectory = "/home/lqdev/Development/RestaurantInspectionsSparkMLNET"; string dataLocation = Path.Combine(solutionDirectory, "RestaurantInspectionsETL", "Output"); // Initialize MLContext MLContext mlContext = new MLContext(); // Get directory name of most recent ETL output var latestOutput = Directory .GetDirectories(dataLocation) .Select(directory => new DirectoryInfo(directory)) .OrderBy(directoryInfo => directoryInfo.Name) .Select(directory => Path.Join(directory.FullName, "Graded")) .First(); var dataFilePaths = Directory .GetFiles(latestOutput) .Where(file => file.EndsWith("csv")) .ToArray(); // Load the data var dataLoader = mlContext.Data.CreateTextLoader <ModelInput>(separatorChar: ',', hasHeader: false, allowQuoting: true, trimWhitespace: true); IDataView data = dataLoader.Load(dataFilePaths); // Split the data TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; // Define experiment settings var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 600; experimentSettings.OptimizingMetric = MulticlassClassificationMetric.LogLoss; // Create experiment var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); // Run experiment var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler()); // Best Run Results var bestModel = experimentResults.BestRun.Model; // Evaluate Model IDataView scoredTestData = bestModel.Transform(testData); var metrics = mlContext.MulticlassClassification.Evaluate(scoredTestData); Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}"); // Save Model string modelSavePath = Path.Join(solutionDirectory, "RestaurantInspectionsML", "model.zip"); mlContext.Model.Save(bestModel, data.Schema, modelSavePath); }
public static void Run([BlobTrigger("clickup/{name}", Connection = "AzureWebJobsStorage")] Stream myBlob, string name, ILogger log, ExecutionContext context) { var blobData = string.Empty; var config = new ConfigurationBuilder() .SetBasePath(context.FunctionAppDirectory) .AddJsonFile("local.settings.json", optional: true, reloadOnChange: true) .AddEnvironmentVariables() .Build(); var blobConnection = config.GetSection("AzureWebJobsStorage"); var mlContext = new MLContext(); using (var reader = new StreamReader(myBlob)) { blobData = reader.ReadToEnd(); } var parsedData = blobData .Split("\r\n") .Skip(1) .Select(line => line.Split(',')) .TakeWhile(row => !string.IsNullOrWhiteSpace(row[0])) .Select(row => new TaskInput { TaskName = row[0], Tags = row[1] }); var data = mlContext.Data.LoadFromEnumerable(parsedData); var settings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 600, OptimizingMetric = MulticlassClassificationMetric.LogLoss }; var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(settings); var result = experiment.Execute(data, new ColumnInformation { LabelColumnName = "Tags" }); var bestModel = result.BestRun.Model; mlContext.Model.Save(bestModel, data.Schema, "./clickup-model.zip"); var storage = CloudStorageAccount.Parse(blobConnection.Value); var storageClient = storage.CreateCloudBlobClient(); var container = storageClient.GetContainerReference("models"); var modelRef = container.GetBlockBlobReference("clickup-model.zip"); modelRef.UploadFromFile("clickup-model.zip"); }
public static ExperimentResult <MulticlassClassificationMetrics> Train( MLContext mlContext, string labelColumnName, MulticlassExperimentSettings experimentSettings, MulticlassExperimentProgressHandler progressHandler, DataFilePaths paths, TextLoader textLoader) { var trainData = textLoader.Load(paths.TrainPath); var validateData = textLoader.Load(paths.ValidatePath); var experimentResult = RunAutoMLExperiment(mlContext, labelColumnName, experimentSettings, progressHandler, trainData); EvaluateTrainedModelAndPrintMetrics(mlContext, experimentResult.BestRun.Model, experimentResult.BestRun.TrainerName, validateData); SaveModel(mlContext, experimentResult.BestRun.Model, paths.ModelPath, trainData); return(experimentResult); }
public void SetUpExperiment() { var settings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 180, OptimizingMetric = MulticlassClassificationMetric.LogLoss, CacheDirectory = null }; // These two trainers yield no metrics in UWP: settings.Trainers.Remove(MulticlassClassificationTrainer.FastTreeOva); settings.Trainers.Remove(MulticlassClassificationTrainer.FastForestOva); _experiment = MLContext.Auto().CreateMulticlassClassificationExperiment(settings); }
private static void FindTheBestModel() { Console.WriteLine("Finding the best model using AutoML"); var mlContext = new MLContext(seed: 0); var trainingDataPath = "Data\\uci-news-aggregator.csv"; IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>( trainingDataPath, hasHeader: true, separatorChar: ',', allowQuoting: true); var preProcessingPipeline = mlContext.Transforms.Conversion .MapValueToKey(inputColumnName: "Category", outputColumnName: "Category"); var mappedInputData = preProcessingPipeline.Fit(trainingDataView).Transform(trainingDataView); var experimentSettings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 300, CacheBeforeTrainer = CacheBeforeTrainer.On, OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy, CacheDirectory = null }; var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); Console.WriteLine("Starting experiments"); var experimentResult = experiment.Execute( trainData: mappedInputData, labelColumnName: "Category", progressHandler: new MulticlassExperimentProgressHandler() ); Console.WriteLine("Metrics from best run:"); var metrics = experimentResult.BestRun.ValidationMetrics; Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy:0.##}"); Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy:0.##}"); }
public void HyperParameterize() { var settings = new MulticlassExperimentSettings { MaxExperimentTimeInSeconds = 180, OptimizingMetric = MulticlassClassificationMetric.LogLoss, CacheDirectory = null }; // There can be only one. settings.Trainers.Clear(); // It's hard to discover its parameters. // And there's a bug in 1.3.1 ... // settings.Trainers.Add(MulticlassClassificationTrainer.LightGbm); // This one's easier: settings.Trainers.Add(MulticlassClassificationTrainer.LbfgsMaximumEntropy); var experiment = MLContext.Auto().CreateMulticlassClassificationExperiment(settings); var result = experiment.Execute( trainData: _trainingDataView, labelColumnName: "Label", progressHandler: this); var model = result.BestRun.Model as TransformerChain <ITransformer>; var storageFolder = ApplicationData.Current.LocalFolder; string modelPath = Path.Combine(storageFolder.Path, "Automation.zip"); MLContext.Model.Save( model: model, inputSchema: null, filePath: modelPath); var x = model.First(); var singleFeaturePredictor = model.First() as TransformerChain <IPredictionTransformer <object> >; /// var multiclassPredictor = singleFeaturePredictor.LastTransformer as MulticlassPredictionTransformer<OneVersusAllModelParameters>; // When using MulticlassClassificationTrainer.LbfgsMaximumEntropy: var multiclassPredictor = singleFeaturePredictor.LastTransformer as MulticlassPredictionTransformer <MaximumEntropyModelParameters>; var algorithm = multiclassPredictor.Model; // ... and the rest is not publicly exposed. // So it's breakpoint time. }
public ITransformer AutoTrain(IEnumerable <Transaction> trainingData, uint maxTimeInSec) { _trainingDataView = _mlContext.Data.LoadFromEnumerable(trainingData); var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = maxTimeInSec; experimentSettings.OptimizingMetric = MulticlassClassificationMetric.MacroAccuracy; var experiment = _mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); var columnInfo = new ColumnInformation { LabelColumnName = nameof(Transaction.Category) }; columnInfo.TextColumnNames.Add(nameof(Transaction.Description)); var result = experiment.Execute(_trainingDataView, columnInfo); return(result.BestRun.Model); }
public static async Task RunExperiment() { // 1. Create MLContext MLContext ctx = new MLContext(); // 2. Load data IDataView data = ctx.Data.LoadFromTextFile <IrisData>("Data/iris.data", separatorChar: ','); // 3. Define Automated ML.NET experiment settings var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 30; experimentSettings.OptimizingMetric = MulticlassClassificationMetric.LogLoss; // 4. Create Automated ML.NET var experiment = ctx.Auto().CreateMulticlassClassificationExperiment(experimentSettings); // 5. Create experiment in MLFlow var experimentName = Guid.NewGuid().ToString(); var experimentRequest = await _mlFlowService.GetOrCreateExperiment(experimentName); // 6. Run Automated ML.NET experiment var experimentResults = experiment.Execute(data, progressHandler: new ProgressHandler()); // 7. Log Best Run LogRun(experimentRequest.ExperimentId, experimentResults); string savePath = Path.Join("MLModels", $"{experimentName}"); string modelPath = Path.Join(savePath, "model.zip"); if (!Directory.Exists(savePath)) { Directory.CreateDirectory(savePath); } // 8. Save Best Trained Model ctx.Model.Save(experimentResults.BestRun.Model, data.Schema, modelPath); }
private static void FindTheBestModel() { BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\nFinding the Best Model Using AutoML"); var mlContext = new MLContext(0); string trainDataPath = @"Data\uci-news-aggregator.csv"; string trainCachePath = @"Cache\"; string bestModelPath = @"Model\BestModelRun.zip"; var trainDataView = mlContext.Data.LoadFromTextFile <ModelInput>( trainDataPath, hasHeader: true, separatorChar: ',', allowQuoting: true ); var preProcessingPipeline = mlContext.Transforms .Conversion.MapValueToKey("Category", "Category"); var mappedInputData = preProcessingPipeline .Fit(trainDataView).Transform(trainDataView); var experimentSetting = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 300, CacheBeforeTrainer = CacheBeforeTrainer.On, OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy, CacheDirectory = new DirectoryInfo(trainCachePath) }; var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSetting); var experimentResult = experiment.Execute( trainData: mappedInputData, labelColumnName: "Category", progressHandler: new MulticlassExperimentProgressHandler() ); BCCConsole.Write(BCCConsoleColor.Yellow, false, "Metrics From Best Run ... "); var metrics = experimentResult.BestRun.ValidationMetrics; BCCConsole.Write(BCCConsoleColor.DarkGreen, false, $"Metric Micro Accuracy : {metrics.MicroAccuracy:0.##}"); BCCConsole.Write(BCCConsoleColor.Green, false, "Success !"); }
static void Main(string[] args) { var mlContext = new MLContext(); var trainDataPath = Path.Combine(Environment.CurrentDirectory, "..", "..", "..", "RawData", "SMSSpamCollection"); // Load data from text file var data = mlContext.Data.LoadFromTextFile <SpamInput>(path: trainDataPath); #region ExperimentSettings //Set AutoML experiment settings Console.WriteLine("Creating experiment settings"); var settings = new MulticlassExperimentSettings() { OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy, MaxExperimentTimeInSeconds = 20 }; settings.Trainers.Remove(MulticlassClassificationTrainer.FastForestOva); #endregion #region Experiment! // Start Experiment Console.WriteLine("Starting the experiment"); var experiment = mlContext .Auto() .CreateMulticlassClassificationExperiment(20) .Execute(data, progressHandler: Progress); Console.WriteLine($"Winner: {experiment.BestRun.TrainerName}"); #endregion Helpers.OutputMultiClassMetrics(experiment.BestRun.Model, data, mlContext); }
public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log) { var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); try { db.BeginTransaction(); MLContext context = new MLContext(); TrainInput input = null; using (StreamReader reader = new StreamReader(req.Body)) { input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd()); } File.WriteAllText(dataFilePath, input.Data); IDataView LoadedData = null; var columnData = new List <TextLoader.Column>(); foreach (var c in input.Columns) { //data type 1 is for ignore if (c.Type != 1) { var newColData = new TextLoader.Column() { DataKind = (DataKind)c.Type, Name = c.ColumnName, Source = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) } }; columnData.Add(newColData); } } LoadedData = context.Data.LoadFromTextFile( dataFilePath, columnData.ToArray(), separatorChar: input.Separator, hasHeader: input.HasHeaders, allowQuoting: true ); LoadedData = context.Data.ShuffleRows(LoadedData); /* * Multiclass will be used in the case of binary experiments and multiclass experiments. * This is because multiclass can accept all types as an output column. This will * allow less interaction with the user and a better user experience. */ double bestRunMetric = 0; ITransformer bestModel = null; if (input.ModelType == TrainInput.ModelTypes.Multiclass) { ExperimentResult <MulticlassClassificationMetrics> Results = null; var settings = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateMulticlassClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Binary) { ExperimentResult <BinaryClassificationMetrics> Results = null; var settings = new BinaryExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateBinaryClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Regression) { ExperimentResult <RegressionMetrics> Results = null; var settings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateRegressionExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.RSquared; bestModel = Results.BestRun.Model; if (bestRunMetric < 0) { bestRunMetric = 0; } } else { throw new Exception("Invalid model type"); } var modelFileId = 0; using (MemoryStream ms = new MemoryStream()) { context.Model.Save(bestModel, LoadedData.Schema, ms); //Save model to the database FileStore modelSave = new FileStore() { Data = ms.ToArray() }; modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId; } var resultModel = new Model() { FileStoreId = modelFileId, Accuracy = bestRunMetric, Rows = input.Data.Trim().Split('\n').Length }; db.CompleteTransaction(); return(new ReturnResult <Model>() { Success = true, Item = resultModel }); } catch (Exception e) { db.AbortTransaction(); log.LogError(e.Message); return(new ReturnResult <Model>() { Success = false, Exception = e }); } }