LbfgsLogisticRegression(
            this SweepableBinaryClassificationTrainers trainer,
            string labelColumnName   = "Label",
            string featureColumnName = "Features",
            SweepableOption <LbfgsLogisticRegressionBinaryTrainer.Options> optionBuilder = null,
            LbfgsLogisticRegressionBinaryTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = LbfgsLogisticRegressionBinaryTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);
            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;

                return context.BinaryClassification.Trainers.LbfgsLogisticRegression(option);
            },
                       optionBuilder,
                       new string[] { labelColumnName, featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(LbfgsLogisticRegressionBinaryTrainer)));
        }
Пример #2
0
        private double Training(IDataView data, MLContext ctx)
        {
            var split = ctx.Data.TrainTestSplit(data, testFraction: 0.18);

            var features = split.TrainSet.Schema
                           .Select(col => col.Name)
                           .Where(col => col != "Label")
                           .ToArray();
            var trainer = new LbfgsLogisticRegressionBinaryTrainer.Options()
            {
                MaximumNumberOfIterations = 100,
            };
            var pipeline = ctx.Transforms.Concatenate("Features", features)
                           .Append(ctx.BinaryClassification.Trainers.Gam(learningRate: 0.052, numberOfIterations: 25000));

            var model = pipeline.Fit(split.TrainSet);

            var predictions = model.Transform(split.TestSet);

            var metrics = ctx.BinaryClassification.Evaluate(predictions);

            ctx.Model.Save(model, data.Schema, "model.zip");
            _model = model;
            return(metrics.Accuracy);
        }
Пример #3
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param)
        {
            var option = new LbfgsLogisticRegressionBinaryTrainer.Options()
            {
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.BinaryClassification.Trainers.LbfgsLogisticRegression(option));
        }
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // Define trainer options.
            var options = new LbfgsLogisticRegressionBinaryTrainer.Options()
            {
                MaximumNumberOfIterations = 100,
                OptimizationTolerance     = 1e-8f,
                L2Regularization          = 0.01f
            };

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: True, Prediction: True
            //   Label: False, Prediction: True
            //   Label: True, Prediction: True
            //   Label: True, Prediction: True
            //   Label: False, Prediction: False

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.87
            //   AUC: 0.96
            //   F1 Score: 0.87
            //   Negative Precision: 0.89
            //   Negative Recall: 0.87
            //   Positive Precision: 0.86
            //   Positive Recall: 0.88
            //   Log Loss: 0.37
            //   Log Loss Reduction: 0.63
            //   Entropy: 1.00
            //
            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //   Confusion table
            //             ||======================
            //   PREDICTED || positive | negative | Recall
            //   TRUTH     ||======================
            //    positive ||      210 |       28 | 0.8824
            //    negative ||       35 |      227 | 0.8664
            //             ||======================
            //   Precision ||   0.8571 |   0.8902 |
        }
        static void Main(string[] args)
        {
            MLContext ml = new MLContext(seed: 1);

            var trainData = ml.Data.LoadFromTextFile <MlInput>("learningdata.csv", ',', hasHeader: true);

            //var select = ml.Transforms.SelectColumns("Airlines", "Architecture", "Art & Museums", "Automotive", "Banking & Financial", "Cannabis", "Casinos & Lottery");
            //var selectTransform = select.Fit(trainData).Transform(trainData);

            var inputcolumns = new List <string> {
            };

            double f1sum = 0;
            int    count = 0;

            var techlist = new List <string> {
                "360 Photo/Video", "3D Environments", "3D Modelling", "AR", "AR Authoring", "AR Cloud", "AR Headsets", "AR Kit", "AR: Hololens", "AR: Magic Leap", "ARCore", "Avatar Creation", "Distribution & Device Management", "Eye Tracking", "Haptics & Peripherals", "Motion Capture",
                "Motion Simulators", "Networking", "Photogrammetry", "Spatial Audio", "Unity", "Unreal", "VR", "VR Authoring", "VR Headsets", "VR: Mobile", "VR: PC", "Volumetric Capture", "Web XR"
            };

            foreach (var technology in techlist.ToList())
            {
                var removedlist = techlist.ToList();
                removedlist.Remove(technology);
                string[] removedarray = removedlist.ToArray();

                var prepedData          = ml.Transforms.DropColumns(removedarray);
                var prepedDataTransform = prepedData.Fit(trainData).Transform(trainData);

                TrainTestData trainTestData = ml.Data.TrainTestSplit(prepedDataTransform, testFraction: 0.2);

                IEstimator <ITransformer> dataPipe = ml.Transforms.Concatenate("Features", new[] { "Airlines", "Architecture", "Art & Museums", "Automotive", "Banking & Financial", "Cannabis", "Casinos & Lottery", "Charities", "Education & Training", "Education (K-12)", "Emergency Response", "Health & Medical", "Industrial",
                                                                                                   "Legal & Insurance", "Media & News & Entertainment", "Military", "Music", "Real Estate", "Restaurant & Food", "Retail", "Sales & Marketing", "Sports", "Telecommunications", "Travel & Tourism", "Collaboration & Social", "Communications", "Data Analytics", "Design", "Gaming", "General Training", "Health & Safety",
                                                                                                   "LBE", "Leadership", "Marketing", "Rehabilitation", "Simulation", "Tools", "Training: Hands On", "Training: Hard Skills", "Training: Soft Skills" });

                var options = new LbfgsLogisticRegressionBinaryTrainer.Options()
                {
                    LabelColumnName           = technology,
                    FeatureColumnName         = "Features",
                    MaximumNumberOfIterations = 100,
                    OptimizationTolerance     = 1e-8f
                };

                var lbfsgLogistic = ml.BinaryClassification.Trainers.LbfgsLogisticRegression(options);
                var trainPipe     = dataPipe.Append(lbfsgLogistic);

                Display(trainTestData.TrainSet);
                var model = trainPipe.Fit(trainTestData.TrainSet);

                IDataView predictionView = model.Transform(trainTestData.TestSet);
                CalibratedBinaryClassificationMetrics metrics = ml.BinaryClassification.Evaluate(predictionView, labelColumnName: technology);

                //Console.WriteLine($"F1 {metrics.ConfusionMatrix.GetFormattedConfusionTable().ToString()} ");
                Console.WriteLine($"F1 {metrics.F1Score} ");
                if (metrics.F1Score > 0)
                {
                    f1sum += metrics.F1Score;
                    count++;
                }
            }

            Console.WriteLine($"Average: {f1sum/count}");
        }