/// <summary> /// Create AutoML Binary Classification experiment settings. /// </summary> private static BinaryExperimentSettings CreateExperimentSettings(MLContext mlContext, CancellationTokenSource cts) { var experimentSettings = new BinaryExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = 3600; experimentSettings.CancellationToken = cts.Token; // Set the metric that AutoML will try to optimize over the course of the experiment. experimentSettings.OptimizingMetric = BinaryClassificationMetric.Accuracy; // Set the cache directory to null. // This will cause all models produced by AutoML to be kept in memory // instead of written to disk after each run, as AutoML is training. // (Please note: for an experiment on a large dataset, opting to keep all // models trained by AutoML in memory could cause your system to run out // of memory.) experimentSettings.CacheDirectory = null; // Don't use LbfgsPoissonRegression and OnlineGradientDescent trainers during this experiment. // (These trainers sometimes underperform on this dataset.) // experimentSettings.Trainers.Remove(BinaryClassificationTrainer.LbfgsLogisticRegression); // experimentSettings.Trainers.Remove(BinaryClassificationTrainer.SymbolicSgdLogisticRegression); return(experimentSettings); }
public void Experiment() { var data = GetData(_dataPath); var validate = GetData(_validatePath); var experimentSettings = new BinaryExperimentSettings { MaxExperimentTimeInSeconds = 45 * 60, OptimizingMetric = BinaryClassificationMetric.F1Score, }; experimentSettings.Trainers.Clear(); experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm); var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings); var experimentResult = experiment.Execute( trainData: data, validationData: validate, columnInformation: new ColumnInformation { ExampleWeightColumnName = nameof(Appointment.Weight) }, progressHandler: new ProgressHandler()); Console.WriteLine("Experiment completed"); Console.WriteLine(); ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics); SaveModel(data.Schema, experimentResult.BestRun.Model); Console.WriteLine("Best model saved"); }
public void Experiment() { var data = GetData(); var split = _context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0); var experimentSettings = new BinaryExperimentSettings { MaxExperimentTimeInSeconds = 45 * 60, OptimizingMetric = BinaryClassificationMetric.F1Score, }; experimentSettings.Trainers.Clear(); experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm); var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings); var experimentResult = experiment.Execute( trainData: split.TrainSet, validationData: split.TestSet, labelColumnName: nameof(Appointment.NoShow), progressHandler: new ProgressHandler()); Console.WriteLine("Experiment completed"); Console.WriteLine(); ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics); SaveModel(data.Schema, experimentResult.BestRun.Model); Console.WriteLine("Best model saved"); }
protected override void Train() { var settings = new BinaryExperimentSettings { MaxExperimentTimeInSeconds = 30 * 60, }; var set1 = context.Data.CreateEnumerable <SpectrogramData>(trainSet, false); var set2 = context.Data.CreateEnumerable <SpectrogramData>(validationSet, false); var combinedSets = context.Data.LoadFromEnumerable(set1.Concat(set2)); var trainTestSplit = context.Data.TrainTestSplit(combinedSets, 0.5); var progressHandler = new Progress <RunDetail <Microsoft.ML.Data.BinaryClassificationMetrics> >(ph => { if (ph.ValidationMetrics != null) { Console.WriteLine($"Current trainer - {ph.TrainerName} with accuracy {ph.ValidationMetrics.Accuracy}"); } }); var experiment = context.Auto().CreateBinaryClassificationExperiment(settings); // Run the experiment Console.WriteLine("Running the experiment..."); var experimentResult = experiment.Execute(trainData: trainTestSplit.TrainSet, validationData: trainTestSplit.TestSet, progressHandler: progressHandler); Console.WriteLine($"Best run ({experimentResult.BestRun.TrainerName}):"); trainedModel = experimentResult.BestRun.Model; metrics = experimentResult.BestRun.ValidationMetrics; }
public static void Run([HttpTrigger()] HttpRequest req, ILogger log) { var connection = File.ReadAllLines("Settings.txt"); var db = new Database( $"Data Source={connection[2]};Initial Catalog={connection[0]};User ID={connection[0]};Password={connection[1]};MultipleActiveResultSets=True;", DatabaseType.SqlServer2012, SqlClientFactory.Instance ); var trainingDataList = db.Fetch <TrainData>("SELECT title as Title, isClickbait as Label FROM [trainingData]"); MLContext context = new MLContext(); var trainingData = context.Data.LoadFromEnumerable <TrainData>(trainingDataList); var settings = new BinaryExperimentSettings(); settings.MaxExperimentTimeInSeconds = 60; var mlExperiment = context.Auto().CreateBinaryClassificationExperiment(settings); var results = mlExperiment.Execute(trainingData); log.LogInformation($"Train complete: {results.BestRun.ValidationMetrics.Accuracy}%"); log.LogInformation($"Train complete: {results.BestRun.TrainerName}"); try { db.BeginTransaction(); db.Execute("DELETE FROM models"); using (MemoryStream ms = new MemoryStream()) { context.Model.Save(results.BestRun.Model, trainingData.Schema, ms); var model = new models() { model = ms.ToArray() }; db.Save(model); } db.CompleteTransaction(); } catch { db.AbortTransaction(); } }
public static void Main() { var mlContext = new MLContext(seed: 1024); var trainData = mlContext .Data .LoadFromTextFile <ModelInput>( TraningDataPath, hasHeader: false, separatorChar: '\t', allowQuoting: true, trimWhitespace: true); var experimentSettings = new BinaryExperimentSettings { MaxExperimentTimeInSeconds = (uint)TimeSpan.FromMinutes(5).TotalSeconds, OptimizingMetric = BinaryClassificationMetric.Accuracy, }; var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings); var preFeaturizer = mlContext.Transforms.Text.TokenizeIntoWords("words", "text") .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("CleanText", "words")) .Append(mlContext.Transforms.Text.FeaturizeText("FeaturizeText", "CleanText")) .Append(mlContext.Transforms.NormalizeMinMax("Features", "FeaturizeText")); var experimentResult = experiment.Execute( trainData, "sentiment", preFeaturizer: preFeaturizer, progressHandler: new BinaryExperimentProgressHandler()); var bestRun = experimentResult.BestRun; PrintMetrics(bestRun.TrainerName, bestRun.ValidationMetrics); if (File.Exists(ModelPath)) { File.Delete(ModelPath); } mlContext.Model.Save(bestRun.Model, trainData.Schema, ModelPath); }
static void Main(string[] args) { #region creating all objects needed MLContext mlContext = new MLContext(); Stopwatch stw = new Stopwatch(); BinaryExperimentSettings settings = new BinaryExperimentSettings(); Progress <RunDetail <BinaryClassificationMetrics> > progress = new Progress <RunDetail <BinaryClassificationMetrics> >(p => { if (p.ValidationMetrics != null) { Console.WriteLine($"Current result - {p.TrainerName}, {p.ValidationMetrics.Accuracy}, {p.ValidationMetrics.AreaUnderRocCurve}"); } }); #region Transformer pipeline Action <QuestionPairs, transformOutput> mapping = (input, output) => { output.Label = input.is_duplicate.Equals("1") ? true : false; }; IEstimator <ITransformer> pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: null) .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "question1", outputColumnName: "question1Featurized")) .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "question2", outputColumnName: "question2Featurized")) .Append(mlContext.Transforms.Concatenate("Features", "question1Featurized", "question2Featurized")) .Append(mlContext.Transforms.DropColumns("question1", "question2", "is_duplicate")) .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")) .AppendCacheCheckpoint(mlContext); #endregion #region Dictionary of models IDictionary <string, IEstimator <ITransformer> > estimator = new Dictionary <string, IEstimator <ITransformer> >(); estimator.Add("AveragedPerceptronTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("SdcaLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("SdcaNonCalibratedBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("SymbolicSgdLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("LbfgsLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("LightGbmBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LightGbm(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("FastTreeBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("FastForestBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FastForest(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); //estimator.Add("GamBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.Gam(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("FieldAwareFactorizationMachineTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); estimator.Add("LinearSvmTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LinearSvm(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))); #endregion #region AutoML settings settings.MaxExperimentTimeInSeconds = 60; settings.Trainers.Clear(); settings.Trainers.Add(BinaryClassificationTrainer.LightGbm); #endregion #endregion stw.Start(); IDataView file = mlContext.Data.LoadFromTextFile <QuestionPairs>(@".\questions.csv", separatorChar: ',', hasHeader: true, allowQuoting: true); var data = mlContext.Data.TrainTestSplit(file, testFraction: 0.2, seed: 42); stw.Stop(); Console.WriteLine($"Finished loading dataset {stw.ElapsedMilliseconds / 1000f}s"); #region Examples /* * var model = BuildAndTrainModel(mlContext: mlContext, traindata: data.TrainSet, pipeline: pipeline, modelname: "FastTreeBinaryTrainer", estimator: estimator, stw: stw); * Evaluate(mlContext: mlContext, model: model, splitTestSet: data.TestSet); * //===================================================================================================================================================================================== * AutoML(mlContext: mlContext, pipeline: pipeline, file: file, progress: progress, settings: settings); * //===================================================================================================================================================================================== * TrainMultiModel(mlContext: mlContext, file: file, estimator: estimator, stw: stw); * //===================================================================================================================================================================================== */ Sweeper(mlContext: mlContext, file: file, pipeline: pipeline, modelname: "LightGbmBinaryTrainer", estimator: estimator, stw: stw); #endregion Console.WriteLine("Press any key to end the program"); Console.ReadKey(); }
public static void AutoML(MLContext mlContext, IEstimator <ITransformer> pipeline, IDataView file, Progress <RunDetail <BinaryClassificationMetrics> > progress, BinaryExperimentSettings settings) { var transdata = pipeline.Fit(file).Transform(file); ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto() .CreateBinaryClassificationExperiment(settings) .Execute(trainData: transdata, labelColumnName: nameof(transformOutput.Label), progressHandler: progress); Console.WriteLine(); Console.WriteLine($"Trainername- {experimentResult.BestRun.TrainerName}"); Console.WriteLine($"Accuracy- {experimentResult.BestRun.ValidationMetrics.Accuracy}"); Console.WriteLine($"AreaUnderRocCurve- {experimentResult.BestRun.ValidationMetrics.AreaUnderRocCurve}"); Console.WriteLine(); var model = experimentResult.BestRun.Model as TransformerChain <ITransformer>; //mlContext.Model.Save(model, transdata.Schema, @"C:\Users\ludwi\source\repos\JugendForscht"); }
public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log) { var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); try { db.BeginTransaction(); MLContext context = new MLContext(); TrainInput input = null; using (StreamReader reader = new StreamReader(req.Body)) { input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd()); } File.WriteAllText(dataFilePath, input.Data); IDataView LoadedData = null; var columnData = new List <TextLoader.Column>(); foreach (var c in input.Columns) { //data type 1 is for ignore if (c.Type != 1) { var newColData = new TextLoader.Column() { DataKind = (DataKind)c.Type, Name = c.ColumnName, Source = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) } }; columnData.Add(newColData); } } LoadedData = context.Data.LoadFromTextFile( dataFilePath, columnData.ToArray(), separatorChar: input.Separator, hasHeader: input.HasHeaders, allowQuoting: true ); LoadedData = context.Data.ShuffleRows(LoadedData); /* * Multiclass will be used in the case of binary experiments and multiclass experiments. * This is because multiclass can accept all types as an output column. This will * allow less interaction with the user and a better user experience. */ double bestRunMetric = 0; ITransformer bestModel = null; if (input.ModelType == TrainInput.ModelTypes.Multiclass) { ExperimentResult <MulticlassClassificationMetrics> Results = null; var settings = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateMulticlassClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Binary) { ExperimentResult <BinaryClassificationMetrics> Results = null; var settings = new BinaryExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateBinaryClassificationExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy; bestModel = Results.BestRun.Model; } else if (input.ModelType == TrainInput.ModelTypes.Regression) { ExperimentResult <RegressionMetrics> Results = null; var settings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 20 }; var training = context.Auto().CreateRegressionExperiment(settings); Results = training.Execute(LoadedData, labelColumnName: input.LabelColumn); bestRunMetric = Results.BestRun.ValidationMetrics.RSquared; bestModel = Results.BestRun.Model; if (bestRunMetric < 0) { bestRunMetric = 0; } } else { throw new Exception("Invalid model type"); } var modelFileId = 0; using (MemoryStream ms = new MemoryStream()) { context.Model.Save(bestModel, LoadedData.Schema, ms); //Save model to the database FileStore modelSave = new FileStore() { Data = ms.ToArray() }; modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId; } var resultModel = new Model() { FileStoreId = modelFileId, Accuracy = bestRunMetric, Rows = input.Data.Trim().Split('\n').Length }; db.CompleteTransaction(); return(new ReturnResult <Model>() { Success = true, Item = resultModel }); } catch (Exception e) { db.AbortTransaction(); log.LogError(e.Message); return(new ReturnResult <Model>() { Success = false, Exception = e }); } }