Example #1
0
        /// <summary>
        /// Create AutoML Binary Classification experiment settings.
        /// </summary>
        private static BinaryExperimentSettings CreateExperimentSettings(MLContext mlContext,
                                                                         CancellationTokenSource cts)
        {
            var experimentSettings = new BinaryExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = 3600;
            experimentSettings.CancellationToken          = cts.Token;

            // Set the metric that AutoML will try to optimize over the course of the experiment.
            experimentSettings.OptimizingMetric = BinaryClassificationMetric.Accuracy;

            // Set the cache directory to null.
            // This will cause all models produced by AutoML to be kept in memory
            // instead of written to disk after each run, as AutoML is training.
            // (Please note: for an experiment on a large dataset, opting to keep all
            // models trained by AutoML in memory could cause your system to run out
            // of memory.)
            experimentSettings.CacheDirectory = null;

            // Don't use LbfgsPoissonRegression and OnlineGradientDescent trainers during this experiment.
            // (These trainers sometimes underperform on this dataset.)
            // experimentSettings.Trainers.Remove(BinaryClassificationTrainer.LbfgsLogisticRegression);
            // experimentSettings.Trainers.Remove(BinaryClassificationTrainer.SymbolicSgdLogisticRegression);

            return(experimentSettings);
        }
Example #2
0
        public void Experiment()
        {
            var data     = GetData(_dataPath);
            var validate = GetData(_validatePath);

            var experimentSettings = new BinaryExperimentSettings
            {
                MaxExperimentTimeInSeconds = 45 * 60,
                OptimizingMetric           = BinaryClassificationMetric.F1Score,
            };

            experimentSettings.Trainers.Clear();
            experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm);

            var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            var experimentResult = experiment.Execute(
                trainData: data,
                validationData: validate,
                columnInformation: new ColumnInformation
            {
                ExampleWeightColumnName = nameof(Appointment.Weight)
            },
                progressHandler: new ProgressHandler());

            Console.WriteLine("Experiment completed");
            Console.WriteLine();

            ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics);

            SaveModel(data.Schema, experimentResult.BestRun.Model);
            Console.WriteLine("Best model saved");
        }
Example #3
0
        public void Experiment()
        {
            var data = GetData();

            var split = _context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0);

            var experimentSettings = new BinaryExperimentSettings
            {
                MaxExperimentTimeInSeconds = 45 * 60,
                OptimizingMetric           = BinaryClassificationMetric.F1Score,
            };

            experimentSettings.Trainers.Clear();
            experimentSettings.Trainers.Add(BinaryClassificationTrainer.LightGbm);

            var experiment = _context.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            var experimentResult = experiment.Execute(
                trainData: split.TrainSet,
                validationData: split.TestSet,
                labelColumnName: nameof(Appointment.NoShow),
                progressHandler: new ProgressHandler());

            Console.WriteLine("Experiment completed");
            Console.WriteLine();

            ConsoleHelper.Print(experimentResult.BestRun.TrainerName, experimentResult.BestRun.ValidationMetrics);

            SaveModel(data.Schema, experimentResult.BestRun.Model);
            Console.WriteLine("Best model saved");
        }
Example #4
0
        protected override void Train()
        {
            var settings = new BinaryExperimentSettings {
                MaxExperimentTimeInSeconds = 30 * 60,
            };

            var set1 = context.Data.CreateEnumerable <SpectrogramData>(trainSet, false);
            var set2 = context.Data.CreateEnumerable <SpectrogramData>(validationSet, false);

            var combinedSets   = context.Data.LoadFromEnumerable(set1.Concat(set2));
            var trainTestSplit = context.Data.TrainTestSplit(combinedSets, 0.5);

            var progressHandler = new Progress <RunDetail <Microsoft.ML.Data.BinaryClassificationMetrics> >(ph => {
                if (ph.ValidationMetrics != null)
                {
                    Console.WriteLine($"Current trainer - {ph.TrainerName} with accuracy {ph.ValidationMetrics.Accuracy}");
                }
            });

            var experiment = context.Auto().CreateBinaryClassificationExperiment(settings);

            // Run the experiment
            Console.WriteLine("Running the experiment...");
            var experimentResult = experiment.Execute(trainData: trainTestSplit.TrainSet, validationData: trainTestSplit.TestSet, progressHandler: progressHandler);

            Console.WriteLine($"Best run ({experimentResult.BestRun.TrainerName}):");
            trainedModel = experimentResult.BestRun.Model;
            metrics      = experimentResult.BestRun.ValidationMetrics;
        }
Example #5
0
        public static void Run([HttpTrigger()] HttpRequest req, ILogger log)
        {
            var connection = File.ReadAllLines("Settings.txt");

            var db = new Database(
                $"Data Source={connection[2]};Initial Catalog={connection[0]};User ID={connection[0]};Password={connection[1]};MultipleActiveResultSets=True;",
                DatabaseType.SqlServer2012,
                SqlClientFactory.Instance
                );

            var trainingDataList = db.Fetch <TrainData>("SELECT title as Title, isClickbait as Label FROM [trainingData]");

            MLContext context      = new MLContext();
            var       trainingData = context.Data.LoadFromEnumerable <TrainData>(trainingDataList);
            var       settings     = new BinaryExperimentSettings();

            settings.MaxExperimentTimeInSeconds = 60;
            var mlExperiment = context.Auto().CreateBinaryClassificationExperiment(settings);
            var results      = mlExperiment.Execute(trainingData);

            log.LogInformation($"Train complete: {results.BestRun.ValidationMetrics.Accuracy}%");
            log.LogInformation($"Train complete: {results.BestRun.TrainerName}");

            try
            {
                db.BeginTransaction();
                db.Execute("DELETE FROM models");
                using (MemoryStream ms = new MemoryStream())
                {
                    context.Model.Save(results.BestRun.Model, trainingData.Schema, ms);
                    var model = new models()
                    {
                        model = ms.ToArray()
                    };
                    db.Save(model);
                }
                db.CompleteTransaction();
            }
            catch
            {
                db.AbortTransaction();
            }
        }
Example #6
0
        public static void Main()
        {
            var mlContext = new MLContext(seed: 1024);

            var trainData = mlContext
                            .Data
                            .LoadFromTextFile <ModelInput>(
                TraningDataPath,
                hasHeader: false,
                separatorChar: '\t',
                allowQuoting: true,
                trimWhitespace: true);

            var experimentSettings = new BinaryExperimentSettings
            {
                MaxExperimentTimeInSeconds = (uint)TimeSpan.FromMinutes(5).TotalSeconds,
                OptimizingMetric           = BinaryClassificationMetric.Accuracy,
            };

            var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            var preFeaturizer = mlContext.Transforms.Text.TokenizeIntoWords("words", "text")
                                .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("CleanText", "words"))
                                .Append(mlContext.Transforms.Text.FeaturizeText("FeaturizeText", "CleanText"))
                                .Append(mlContext.Transforms.NormalizeMinMax("Features", "FeaturizeText"));

            var experimentResult = experiment.Execute(
                trainData,
                "sentiment",
                preFeaturizer: preFeaturizer,
                progressHandler: new BinaryExperimentProgressHandler());

            var bestRun = experimentResult.BestRun;

            PrintMetrics(bestRun.TrainerName, bestRun.ValidationMetrics);

            if (File.Exists(ModelPath))
            {
                File.Delete(ModelPath);
            }

            mlContext.Model.Save(bestRun.Model, trainData.Schema, ModelPath);
        }
Example #7
0
        static void Main(string[] args)
        {
            #region creating all objects needed
            MLContext mlContext = new MLContext();
            Stopwatch stw       = new Stopwatch();
            BinaryExperimentSettings settings = new BinaryExperimentSettings();
            Progress <RunDetail <BinaryClassificationMetrics> > progress = new Progress <RunDetail <BinaryClassificationMetrics> >(p =>
            {
                if (p.ValidationMetrics != null)
                {
                    Console.WriteLine($"Current result - {p.TrainerName}, {p.ValidationMetrics.Accuracy}, {p.ValidationMetrics.AreaUnderRocCurve}");
                }
            });

            #region Transformer pipeline
            Action <QuestionPairs, transformOutput> mapping = (input, output) => { output.Label = input.is_duplicate.Equals("1") ? true : false; };
            IEstimator <ITransformer> pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: null)
                                                 .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "question1", outputColumnName: "question1Featurized"))
                                                 .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "question2", outputColumnName: "question2Featurized"))
                                                 .Append(mlContext.Transforms.Concatenate("Features", "question1Featurized", "question2Featurized"))
                                                 .Append(mlContext.Transforms.DropColumns("question1", "question2", "is_duplicate"))
                                                 .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features"))
                                                 .AppendCacheCheckpoint(mlContext);
            #endregion

            #region Dictionary of models
            IDictionary <string, IEstimator <ITransformer> > estimator = new Dictionary <string, IEstimator <ITransformer> >();

            estimator.Add("AveragedPerceptronTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("SdcaLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("SdcaNonCalibratedBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("SymbolicSgdLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("LbfgsLogisticRegressionBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("LightGbmBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LightGbm(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("FastTreeBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("FastForestBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FastForest(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            //estimator.Add("GamBinaryTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.Gam(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("FieldAwareFactorizationMachineTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            estimator.Add("LinearSvmTrainer", pipeline.Append(mlContext.BinaryClassification.Trainers.LinearSvm(labelColumnName: nameof(transformOutput.Label), featureColumnName: "Features")));
            #endregion

            #region AutoML settings
            settings.MaxExperimentTimeInSeconds = 60;
            settings.Trainers.Clear();
            settings.Trainers.Add(BinaryClassificationTrainer.LightGbm);
            #endregion

            #endregion
            stw.Start();
            IDataView file = mlContext.Data.LoadFromTextFile <QuestionPairs>(@".\questions.csv", separatorChar: ',', hasHeader: true, allowQuoting: true);
            var       data = mlContext.Data.TrainTestSplit(file, testFraction: 0.2, seed: 42);
            stw.Stop();
            Console.WriteLine($"Finished loading dataset                        {stw.ElapsedMilliseconds / 1000f}s");

            #region Examples

            /*
             * var model = BuildAndTrainModel(mlContext: mlContext, traindata: data.TrainSet, pipeline: pipeline, modelname: "FastTreeBinaryTrainer", estimator: estimator, stw: stw);
             * Evaluate(mlContext: mlContext, model: model, splitTestSet: data.TestSet);
             * //=====================================================================================================================================================================================
             * AutoML(mlContext: mlContext, pipeline: pipeline, file: file, progress: progress, settings: settings);
             * //=====================================================================================================================================================================================
             * TrainMultiModel(mlContext: mlContext, file: file, estimator: estimator, stw: stw);
             * //=====================================================================================================================================================================================
             */

            Sweeper(mlContext: mlContext, file: file, pipeline: pipeline, modelname: "LightGbmBinaryTrainer", estimator: estimator, stw: stw);
            #endregion


            Console.WriteLine("Press any key to end the program");
            Console.ReadKey();
        }
Example #8
0
        public static void AutoML(MLContext mlContext, IEstimator <ITransformer> pipeline, IDataView file, Progress <RunDetail <BinaryClassificationMetrics> > progress, BinaryExperimentSettings settings)
        {
            var transdata = pipeline.Fit(file).Transform(file);
            ExperimentResult <BinaryClassificationMetrics> experimentResult = mlContext.Auto()
                                                                              .CreateBinaryClassificationExperiment(settings)
                                                                              .Execute(trainData: transdata, labelColumnName: nameof(transformOutput.Label), progressHandler: progress);

            Console.WriteLine();
            Console.WriteLine($"Trainername- {experimentResult.BestRun.TrainerName}");
            Console.WriteLine($"Accuracy- {experimentResult.BestRun.ValidationMetrics.Accuracy}");
            Console.WriteLine($"AreaUnderRocCurve- {experimentResult.BestRun.ValidationMetrics.AreaUnderRocCurve}");
            Console.WriteLine();
            var model = experimentResult.BestRun.Model as TransformerChain <ITransformer>;
            //mlContext.Model.Save(model, transdata.Schema, @"C:\Users\ludwi\source\repos\JugendForscht");
        }
Example #9
0
        public static ReturnResult <Model> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)] HttpRequest req, ILogger log)
        {
            var dataFilePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());

            try
            {
                db.BeginTransaction();

                MLContext context = new MLContext();

                TrainInput input = null;

                using (StreamReader reader = new StreamReader(req.Body))
                {
                    input = JsonConvert.DeserializeObject <TrainInput>(reader.ReadToEnd());
                }

                File.WriteAllText(dataFilePath, input.Data);

                IDataView LoadedData = null;

                var columnData = new List <TextLoader.Column>();
                foreach (var c in input.Columns)
                {
                    //data type 1 is for ignore
                    if (c.Type != 1)
                    {
                        var newColData = new TextLoader.Column()
                        {
                            DataKind = (DataKind)c.Type,
                            Name     = c.ColumnName,
                            Source   = new TextLoader.Range[] { new TextLoader.Range(c.ColumnIndex) }
                        };

                        columnData.Add(newColData);
                    }
                }

                LoadedData = context.Data.LoadFromTextFile(
                    dataFilePath,
                    columnData.ToArray(),
                    separatorChar: input.Separator,
                    hasHeader: input.HasHeaders,
                    allowQuoting: true
                    );

                LoadedData = context.Data.ShuffleRows(LoadedData);

                /*
                 * Multiclass will be used in the case of binary experiments and multiclass experiments.
                 * This is because multiclass can accept all types as an output column. This will
                 * allow less interaction with the user and a better user experience.
                 */

                double       bestRunMetric = 0;
                ITransformer bestModel     = null;

                if (input.ModelType == TrainInput.ModelTypes.Multiclass)
                {
                    ExperimentResult <MulticlassClassificationMetrics> Results = null;
                    var settings = new MulticlassExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateMulticlassClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.MacroAccuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Binary)
                {
                    ExperimentResult <BinaryClassificationMetrics> Results = null;
                    var settings = new BinaryExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateBinaryClassificationExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.Accuracy;
                    bestModel     = Results.BestRun.Model;
                }
                else if (input.ModelType == TrainInput.ModelTypes.Regression)
                {
                    ExperimentResult <RegressionMetrics> Results = null;
                    var settings = new RegressionExperimentSettings()
                    {
                        MaxExperimentTimeInSeconds = 20
                    };
                    var training = context.Auto().CreateRegressionExperiment(settings);
                    Results       = training.Execute(LoadedData, labelColumnName: input.LabelColumn);
                    bestRunMetric = Results.BestRun.ValidationMetrics.RSquared;
                    bestModel     = Results.BestRun.Model;
                    if (bestRunMetric < 0)
                    {
                        bestRunMetric = 0;
                    }
                }
                else
                {
                    throw new Exception("Invalid model type");
                }


                var modelFileId = 0;

                using (MemoryStream ms = new MemoryStream())
                {
                    context.Model.Save(bestModel, LoadedData.Schema, ms);
                    //Save model to the database
                    FileStore modelSave = new FileStore()
                    {
                        Data = ms.ToArray()
                    };

                    modelFileId = FileStore.InsertUpdate(db, modelSave).Item.FileStoreId;
                }

                var resultModel = new Model()
                {
                    FileStoreId = modelFileId,
                    Accuracy    = bestRunMetric,
                    Rows        = input.Data.Trim().Split('\n').Length
                };

                db.CompleteTransaction();

                return(new ReturnResult <Model>()
                {
                    Success = true,
                    Item = resultModel
                });
            }
            catch (Exception e)
            {
                db.AbortTransaction();
                log.LogError(e.Message);
                return(new ReturnResult <Model>()
                {
                    Success = false,
                    Exception = e
                });
            }
        }