LbfgsLogisticRegression( this SweepableBinaryClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <LbfgsLogisticRegressionBinaryTrainer.Options> optionBuilder = null, LbfgsLogisticRegressionBinaryTrainer.Options defaultOption = null) { var context = trainer.Context; if (optionBuilder == null) { optionBuilder = LbfgsLogisticRegressionBinaryTrainerSweepableOptions.Default; } optionBuilder.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.BinaryClassification.Trainers.LbfgsLogisticRegression(option); }, optionBuilder, new string[] { labelColumnName, featureColumnName }, new string[] { PredictedLabel }, nameof(LbfgsLogisticRegressionBinaryTrainer))); }
private double Training(IDataView data, MLContext ctx) { var split = ctx.Data.TrainTestSplit(data, testFraction: 0.18); var features = split.TrainSet.Schema .Select(col => col.Name) .Where(col => col != "Label") .ToArray(); var trainer = new LbfgsLogisticRegressionBinaryTrainer.Options() { MaximumNumberOfIterations = 100, }; var pipeline = ctx.Transforms.Concatenate("Features", features) .Append(ctx.BinaryClassification.Trainers.Gam(learningRate: 0.052, numberOfIterations: 25000)); var model = pipeline.Fit(split.TrainSet); var predictions = model.Transform(split.TestSet); var metrics = ctx.BinaryClassification.Evaluate(predictions); ctx.Model.Save(model, data.Schema, "model.zip"); _model = model; return(metrics.Accuracy); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param) { var option = new LbfgsLogisticRegressionBinaryTrainer.Options() { L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.BinaryClassification.Trainers.LbfgsLogisticRegression(option)); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LbfgsLogisticRegressionBinaryTrainer.Options() { MaximumNumberOfIterations = 100, OptimizationTolerance = 1e-8f, L2Regularization = 0.01f }; // Define the trainer. var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: True, Prediction: True // Label: False, Prediction: True // Label: True, Prediction: True // Label: True, Prediction: True // Label: False, Prediction: False // Evaluate the overall metrics. var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Accuracy: 0.87 // AUC: 0.96 // F1 Score: 0.87 // Negative Precision: 0.89 // Negative Recall: 0.87 // Positive Precision: 0.86 // Positive Recall: 0.88 // Log Loss: 0.37 // Log Loss Reduction: 0.63 // Entropy: 1.00 // // TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0)) // Confusion table // ||====================== // PREDICTED || positive | negative | Recall // TRUTH ||====================== // positive || 210 | 28 | 0.8824 // negative || 35 | 227 | 0.8664 // ||====================== // Precision || 0.8571 | 0.8902 | }
static void Main(string[] args) { MLContext ml = new MLContext(seed: 1); var trainData = ml.Data.LoadFromTextFile <MlInput>("learningdata.csv", ',', hasHeader: true); //var select = ml.Transforms.SelectColumns("Airlines", "Architecture", "Art & Museums", "Automotive", "Banking & Financial", "Cannabis", "Casinos & Lottery"); //var selectTransform = select.Fit(trainData).Transform(trainData); var inputcolumns = new List <string> { }; double f1sum = 0; int count = 0; var techlist = new List <string> { "360 Photo/Video", "3D Environments", "3D Modelling", "AR", "AR Authoring", "AR Cloud", "AR Headsets", "AR Kit", "AR: Hololens", "AR: Magic Leap", "ARCore", "Avatar Creation", "Distribution & Device Management", "Eye Tracking", "Haptics & Peripherals", "Motion Capture", "Motion Simulators", "Networking", "Photogrammetry", "Spatial Audio", "Unity", "Unreal", "VR", "VR Authoring", "VR Headsets", "VR: Mobile", "VR: PC", "Volumetric Capture", "Web XR" }; foreach (var technology in techlist.ToList()) { var removedlist = techlist.ToList(); removedlist.Remove(technology); string[] removedarray = removedlist.ToArray(); var prepedData = ml.Transforms.DropColumns(removedarray); var prepedDataTransform = prepedData.Fit(trainData).Transform(trainData); TrainTestData trainTestData = ml.Data.TrainTestSplit(prepedDataTransform, testFraction: 0.2); IEstimator <ITransformer> dataPipe = ml.Transforms.Concatenate("Features", new[] { "Airlines", "Architecture", "Art & Museums", "Automotive", "Banking & Financial", "Cannabis", "Casinos & Lottery", "Charities", "Education & Training", "Education (K-12)", "Emergency Response", "Health & Medical", "Industrial", "Legal & Insurance", "Media & News & Entertainment", "Military", "Music", "Real Estate", "Restaurant & Food", "Retail", "Sales & Marketing", "Sports", "Telecommunications", "Travel & Tourism", "Collaboration & Social", "Communications", "Data Analytics", "Design", "Gaming", "General Training", "Health & Safety", "LBE", "Leadership", "Marketing", "Rehabilitation", "Simulation", "Tools", "Training: Hands On", "Training: Hard Skills", "Training: Soft Skills" }); var options = new LbfgsLogisticRegressionBinaryTrainer.Options() { LabelColumnName = technology, FeatureColumnName = "Features", MaximumNumberOfIterations = 100, OptimizationTolerance = 1e-8f }; var lbfsgLogistic = ml.BinaryClassification.Trainers.LbfgsLogisticRegression(options); var trainPipe = dataPipe.Append(lbfsgLogistic); Display(trainTestData.TrainSet); var model = trainPipe.Fit(trainTestData.TrainSet); IDataView predictionView = model.Transform(trainTestData.TestSet); CalibratedBinaryClassificationMetrics metrics = ml.BinaryClassification.Evaluate(predictionView, labelColumnName: technology); //Console.WriteLine($"F1 {metrics.ConfusionMatrix.GetFormattedConfusionTable().ToString()} "); Console.WriteLine($"F1 {metrics.F1Score} "); if (metrics.F1Score > 0) { f1sum += metrics.F1Score; count++; } } Console.WriteLine($"Average: {f1sum/count}"); }