SdcaLogisticRegression( this SweepableBinaryClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <SdcaLogisticRegressionBinaryTrainer.Options> optionBuilder = null, SdcaLogisticRegressionBinaryTrainer.Options defaultOption = null) { var context = trainer.Context; if (optionBuilder == null) { optionBuilder = SdcaLogisticRegressionBinaryTrainerSweepableOptions.Default; } optionBuilder.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.BinaryClassification.Trainers.SdcaLogisticRegression(option); }, optionBuilder, new string[] { labelColumnName, featureColumnName }, new string[] { PredictedLabel }, nameof(SdcaLogisticRegressionBinaryTrainer))); }
/// <summary> /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss. /// </summary> /// <param name="catalog">The binary classification catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="options">Advanced arguments to the algorithm.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the /// result in any way; it is only a way for the caller to be informed about what was learnt.</param> /// <returns>The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)] /// ]]></format> /// </example> public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar <bool> label, Vector <float> features, Scalar <float> weights, SdcaLogisticRegressionBinaryTrainer.Options options, Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckValueOrNull(options); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; var trainer = new SdcaLogisticRegressionBinaryTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => { onFit(trans.Model); })); } return(trainer); }, label, features, weights); return(rec.Output); }
private void MLNETTrain(MLContext mlc) { IDataView trainData = mlc.Data.LoadFromTextFile <model1>(TrainDataPath); var b = mlc.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("Lable", "Class") }, DataKind.Boolean); var transformer = b.Fit(trainData); var transformedData = transformer.Transform(trainData); var convertedData = mlc.Data.CreateEnumerable <ModelResult>(transformedData, true); trainData = mlc.Data.LoadFromEnumerable <ModelResult>(convertedData); var a = mlc.Transforms.Concatenate("Features", new[] { "X1", "X2" }); var options = new SdcaLogisticRegressionBinaryTrainer.Options() { LabelColumnName = "Lable", FeatureColumnName = "Features", MaximumNumberOfIterations = 10, L2Regularization = 1.0f, L1Regularization = 1.0f, BiasLearningRate = 0.01f, // OptimizationTolerance = 1e-8f }; var trainer = mlc.BinaryClassification.Trainers.SdcaLogisticRegression(options); var trainPipe = a.Append(trainer); // Console.WriteLine("Starting training"); ITransformer model = trainPipe.Fit(trainData); // Console.WriteLine("Training complete"); IDataView predictions = model.Transform(trainData); var metrics = mlc.BinaryClassification. EvaluateNonCalibrated(predictions, "PredictedLabel"); // Console.Write("Model accuracy on training data = "); // Console.WriteLine(metrics.Accuracy.ToString("F4") + "\n"); var models = InitDataSet(); var pe = mlc.Model.CreatePredictionEngine <model1, Predicate>(model); int tcnt = 0; for (int i = 0; i < models.Length; i++) { var Y = pe.Predict(models[i]); // Console.WriteLine("Predicted: {0},Actual:{1}" // ,Y.PreLable // ,models[i].Class==1?true:false); if (models[i].Class == 1?true:false == Y.PreLable) { tcnt++; } } Console.WriteLine("The ML.NET Predicate Correct Rate is {0}%", ((double)tcnt / models.Length) * 100); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param) { var option = new SdcaLogisticRegressionBinaryTrainer.Options() { LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option)); }
// In this examples we will use the adult income dataset. The goal is to predict // if a person's income is above $50K or not, based on demographic information about that person. // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); // Define the trainer options. var options = new SdcaLogisticRegressionBinaryTrainer.Options() { // Make the convergence tolerance tighter. ConvergenceTolerance = 0.05f, // Increase the maximum number of passes over training data. MaximumNumberOfIterations = 30, // Give the instances of the positive class slightly more weight. PositiveInstanceWeight = 1.2f, }; // Create data training pipeline. var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options); // Fit this pipeline to the training data. var model = pipeline.Fit(trainTestData.TrainSet); // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 // AUC: 0.90 // F1 Score: 0.66 // Negative Precision: 0.89 // Negative Recall: 0.92 // Positive Precision: 0.70 // Positive Recall: 0.63 // LogLoss: 0.47 // LogLossReduction: 39.77 // Entropy: 0.78 }
public void EgitimYap() { label22.Text = "Model eğitiliyor..."; var context = new MLContext(seed: 0); // Load the data var data = context.Data.LoadFromTextFile <Input>(_path, hasHeader: true, separatorChar: ','); // Split the data into a training set and a test set var trainTestData = context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0); var trainData = trainTestData.TrainSet; var testData = trainTestData.TestSet; // Eğitim için gerekli gördüğümüz özellikleri tanımlıyoruz. var options = new SdcaLogisticRegressionBinaryTrainer.Options() { // Yakınsama toleransını ayarlar. ConvergenceTolerance = 0.05f, // Eğitim verileri üzerinden maksimum iterasyon sayısını belirler. MaximumNumberOfIterations = 1000, // Pozitif sınıfın örneklerine biraz daha fazla ağırlık verir. //PositiveInstanceWeight = 1.2f, }; // Build and train the model var pipeline = context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: "Text") .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression(options)); var model = pipeline.Fit(trainData); // Evaluate the model var predictions = model.Transform(testData); var metrics = context.BinaryClassification.Evaluate(predictions, "Label"); var TP = metrics.ConfusionMatrix.Counts[0][0]; var FP = metrics.ConfusionMatrix.Counts[1][0]; var FN = metrics.ConfusionMatrix.Counts[0][1]; var TN = metrics.ConfusionMatrix.Counts[1][1]; var Prevalence = (TP + FN) / (TP + FP + FN + TN); var Accuracy = (TP + TN) / (TP + FP + FN + TN); var Auc = metrics.AreaUnderPrecisionRecallCurve; var Ppv = TP / (TP + FP); // Positive predictive value (PPV), Precision var Fdr = FP / (TP + FP); // False discovery rate (FDR) var For = FN / (FN + TN); // False omission rate (FOR) var Npv = TN / (FN + TN); // Negative predictive value (NPV) var Tpr = TP / (TP + FN); // True positive rate (TPR), Recall, Sensitivity, probability of detection, Power var Fpr = FP / (FP + TN); // False positive rate (FPR), Fall-out, probability of false alarm (1-Specificity) var Fnr = FN / (TP + FN); // False negative rate (FNR), Miss rate var Tnr = TN / (FP + TN); // True negative rate (TNR), Specificity (SPC), Selectivity var LrArti = (Tpr) / (Fpr); // Positive likelihood ratio (LR+) var LrEksi = (Fnr) / (Tnr); // Negative likelihood ratio (LR−) var Dor = (LrArti) / (LrEksi); // Diagnostic odds ratio (DOR) var F1 = 2 * ((Ppv * Tpr) / (Ppv + Tpr)); // F1 score /*var PositivePrecision = metrics.PositivePrecision; * var NegativePrecision = metrics.NegativePrecision; * var PositiveRecall = metrics.PositiveRecall; * var NegativeRecall = metrics.NegativeRecall;*/ CreateConfusionMatrix(TN, FP, FN, TP); CreateResults(Prevalence, Accuracy, Auc, Ppv, Fdr, For, Npv, Tpr, Fpr, Fnr, Tnr, LrArti, LrEksi, Dor, F1); // Use the model to make predictions predictor = context.Model.CreatePredictionEngine <Input, Output>(model); label22.Text = "Model eğitimi tamamlandı."; }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaLogisticRegressionBinaryTrainer.Options() { // Make the convergence tolerance tighter. ConvergenceTolerance = 0.05f, // Increase the maximum number of passes over training data. MaximumNumberOfIterations = 30, // Give the instances of the positive class slightly more weight. PositiveInstanceWeight = 1.2f, }; // Define the trainer. var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: True, Prediction: True // Label: False, Prediction: False // Label: True, Prediction: True // Label: True, Prediction: True // Label: False, Prediction: True // Evaluate the overall metrics. var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Accuracy: 0.60 // AUC: 0.67 // F1 Score: 0.65 // Negative Precision: 0.69 // Negative Recall: 0.45 // Positive Precision: 0.56 // Positive Recall: 0.77 }
// Oluşturulan Veri Setimiz ile modelin eğitilmesi public void EgitimYap() { label22.Text = "Model eğitiliyor..."; //ML.net ile yeni bir “context” oluşturuyoruz. var context = new MLContext(seed: 0); // Girilen yolda bulunan veri setimizden veriler yükleniyor.Ayırıcı karakter olarak virgül kullanılıyor. var data = context.Data.LoadFromTextFile <Input>(_path, hasHeader: true, separatorChar: ','); // Veri setimizi %20 test %80 eğitim olacak şekilde bölüyoruz. var trainTestData = context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0); var trainData = trainTestData.TrainSet; var testData = trainTestData.TestSet; // Eğitim için gerekli gördüğümüz özellikleri tanımlıyoruz. var options = new SdcaLogisticRegressionBinaryTrainer.Options() { // Yakınsama toleransını ayarlar. ConvergenceTolerance = 0.05f, // Eğitim verileri üzerinden maksimum iterasyon sayısını belirler. MaximumNumberOfIterations = 100000, // Pozitif sınıfın örneklerine biraz daha fazla ağırlık verir. //PositiveInstanceWeight = 1.2f, }; // Model oluşturulur ve lojistik regresyon kullanılarak eğitilir. var pipeline = context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: "Text") .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression(options)); var model = pipeline.Fit(trainData); // Modelin sonuçları test verisi üzerinden analiz edilir ve çıkartılır. var predictions = model.Transform(testData); var metrics = context.BinaryClassification.Evaluate(predictions, "Label"); var TP = metrics.ConfusionMatrix.Counts[0][0]; var FP = metrics.ConfusionMatrix.Counts[1][0]; var FN = metrics.ConfusionMatrix.Counts[0][1]; var TN = metrics.ConfusionMatrix.Counts[1][1]; var Prevalence = (TP + FN) / (TP + FP + FN + TN); var Accuracy = (TP + TN) / (TP + FP + FN + TN); var Auc = metrics.AreaUnderPrecisionRecallCurve; var Ppv = TP / (TP + FP); // Positive predictive value (PPV), Precision var Fdr = FP / (TP + FP); // False discovery rate (FDR) var For = FN / (FN + TN); // False omission rate (FOR) var Npv = TN / (FN + TN); // Negative predictive value (NPV) var Tpr = TP / (TP + FN); // True positive rate (TPR), Recall, Sensitivity, probability of detection, Power var Fpr = FP / (FP + TN); // False positive rate (FPR), Fall-out, probability of false alarm (1-Specificity) var Fnr = FN / (TP + FN); // False negative rate (FNR), Miss rate var Tnr = TN / (FP + TN); // True negative rate (TNR), Specificity (SPC), Selectivity var LrArti = (Tpr) / (Fpr); // Positive likelihood ratio (LR+) var LrEksi = (Fnr) / (Tnr); // Negative likelihood ratio (LR−) var Dor = (LrArti) / (LrEksi); // Diagnostic odds ratio (DOR) var F1 = 2 * ((Ppv * Tpr) / (Ppv + Tpr)); // F1 score CreateConfusionMatrix(TN, FP, FN, TP); //Confusion Matrix oluşturmak için fonksiyon CreateResults(Prevalence, Accuracy, Auc, Ppv, Fdr, For, Npv, Tpr, Fpr, Fnr, Tnr, LrArti, LrEksi, Dor, F1); //Sonuçları göstermek için fonksiyon // Tahmin yapmak için modeli kullanmak predictor değişkenine atıyoruz. predictor = context.Model.CreatePredictionEngine <Input, Output>(model); label22.Text = "Model eğitimi tamamlandı."; //Eğitim tamamlandığını ekranda gösteriyoruz. }