SdcaLogisticRegression(
            this SweepableBinaryClassificationTrainers trainer,
            string labelColumnName   = "Label",
            string featureColumnName = "Features",
            SweepableOption <SdcaLogisticRegressionBinaryTrainer.Options> optionBuilder = null,
            SdcaLogisticRegressionBinaryTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = SdcaLogisticRegressionBinaryTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);
            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;

                return context.BinaryClassification.Trainers.SdcaLogisticRegression(option);
            },
                       optionBuilder,
                       new string[] { labelColumnName, featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(SdcaLogisticRegressionBinaryTrainer)));
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaLogisticRegressionBinaryTrainer.Options options,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName   = labelName;
                options.FeatureColumnName = featuresName;

                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
Exemple #3
0
        private void MLNETTrain(MLContext mlc)
        {
            IDataView trainData = mlc.Data.LoadFromTextFile <model1>(TrainDataPath);
            var       b         = mlc.Transforms.Conversion.ConvertType(new[]
            {
                new InputOutputColumnPair("Lable", "Class")
            }, DataKind.Boolean);
            var transformer     = b.Fit(trainData);
            var transformedData = transformer.Transform(trainData);
            var convertedData   = mlc.Data.CreateEnumerable <ModelResult>(transformedData, true);

            trainData = mlc.Data.LoadFromEnumerable <ModelResult>(convertedData);
            var a       = mlc.Transforms.Concatenate("Features", new[] { "X1", "X2" });
            var options = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                LabelColumnName           = "Lable",
                FeatureColumnName         = "Features",
                MaximumNumberOfIterations = 10,
                L2Regularization          = 1.0f,
                L1Regularization          = 1.0f,
                BiasLearningRate          = 0.01f,
                // OptimizationTolerance = 1e-8f
            };
            var trainer   = mlc.BinaryClassification.Trainers.SdcaLogisticRegression(options);
            var trainPipe = a.Append(trainer);
            // Console.WriteLine("Starting training");
            ITransformer model = trainPipe.Fit(trainData);
            // Console.WriteLine("Training complete");
            IDataView predictions = model.Transform(trainData);
            var       metrics     = mlc.BinaryClassification.
                                    EvaluateNonCalibrated(predictions, "PredictedLabel");
            // Console.Write("Model accuracy on training data = ");
            // Console.WriteLine(metrics.Accuracy.ToString("F4") + "\n");
            var models = InitDataSet();
            var pe     = mlc.Model.CreatePredictionEngine <model1, Predicate>(model);
            int tcnt   = 0;

            for (int i = 0; i < models.Length; i++)
            {
                var Y = pe.Predict(models[i]);
                // Console.WriteLine("Predicted: {0},Actual:{1}"
                // ,Y.PreLable
                // ,models[i].Class==1?true:false);
                if (models[i].Class == 1?true:false == Y.PreLable)
                {
                    tcnt++;
                }
            }
            Console.WriteLine("The ML.NET Predicate Correct Rate is {0}%", ((double)tcnt / models.Length) * 100);
        }
Exemple #4
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param)
        {
            var option = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option));
        }
Exemple #5
0
        // In this examples we will use the adult income dataset. The goal is to predict
        // if a person's income is above $50K or not, based on demographic information about that person.
        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Download and featurize the dataset.
            var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

            // Leave out 10% of data for testing.
            var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

            // Define the trainer options.
            var options = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
                // Give the instances of the positive class slightly more weight.
                PositiveInstanceWeight = 1.2f,
            };

            // Create data training pipeline.
            var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options);

            // Fit this pipeline to the training data.
            var model = pipeline.Fit(trainTestData.TrainSet);

            // Evaluate how the model is doing on the test data.
            var dataWithPredictions = model.Transform(trainTestData.TestSet);
            var metrics             = mlContext.BinaryClassification.Evaluate(dataWithPredictions);

            Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.85
            //   AUC: 0.90
            //   F1 Score: 0.66
            //   Negative Precision: 0.89
            //   Negative Recall: 0.92
            //   Positive Precision: 0.70
            //   Positive Recall: 0.63
            //   LogLoss: 0.47
            //   LogLossReduction: 39.77
            //   Entropy: 0.78
        }
Exemple #6
0
        public void EgitimYap()
        {
            label22.Text = "Model eğitiliyor...";
            var context = new MLContext(seed: 0);

            // Load the data
            var data = context.Data.LoadFromTextFile <Input>(_path, hasHeader: true, separatorChar: ',');

            // Split the data into a training set and a test set
            var trainTestData = context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0);
            var trainData     = trainTestData.TrainSet;
            var testData      = trainTestData.TestSet;


            // Eğitim için gerekli gördüğümüz özellikleri tanımlıyoruz.
            var options = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                // Yakınsama toleransını ayarlar.
                ConvergenceTolerance = 0.05f,
                // Eğitim verileri üzerinden maksimum iterasyon sayısını belirler.
                MaximumNumberOfIterations = 1000,
                // Pozitif sınıfın örneklerine biraz daha fazla ağırlık verir.
                //PositiveInstanceWeight = 1.2f,
            };

            // Build and train the model
            var pipeline = context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: "Text")
                           .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression(options));

            var model = pipeline.Fit(trainData);

            // Evaluate the model
            var predictions = model.Transform(testData);
            var metrics     = context.BinaryClassification.Evaluate(predictions, "Label");

            var TP = metrics.ConfusionMatrix.Counts[0][0];
            var FP = metrics.ConfusionMatrix.Counts[1][0];
            var FN = metrics.ConfusionMatrix.Counts[0][1];
            var TN = metrics.ConfusionMatrix.Counts[1][1];


            var Prevalence = (TP + FN) / (TP + FP + FN + TN);
            var Accuracy   = (TP + TN) / (TP + FP + FN + TN);
            var Auc        = metrics.AreaUnderPrecisionRecallCurve;


            var Ppv = TP / (TP + FP);                     // Positive predictive value (PPV), Precision
            var Fdr = FP / (TP + FP);                     // False discovery rate (FDR)
            var For = FN / (FN + TN);                     // False omission rate (FOR)
            var Npv = TN / (FN + TN);                     // Negative predictive value (NPV)

            var Tpr = TP / (TP + FN);                     // True positive rate (TPR), Recall, Sensitivity, probability of detection, Power
            var Fpr = FP / (FP + TN);                     // False positive rate (FPR), Fall-out, probability of false alarm (1-Specificity)
            var Fnr = FN / (TP + FN);                     // False negative rate (FNR), Miss rate
            var Tnr = TN / (FP + TN);                     // True negative rate (TNR), Specificity (SPC), Selectivity

            var LrArti = (Tpr) / (Fpr);                   // Positive likelihood ratio (LR+)
            var LrEksi = (Fnr) / (Tnr);                   // Negative likelihood ratio (LR−)
            var Dor    = (LrArti) / (LrEksi);             // Diagnostic odds ratio (DOR)
            var F1     = 2 * ((Ppv * Tpr) / (Ppv + Tpr)); // F1 score

            /*var PositivePrecision = metrics.PositivePrecision;
             * var NegativePrecision = metrics.NegativePrecision;
             * var PositiveRecall = metrics.PositiveRecall;
             * var NegativeRecall = metrics.NegativeRecall;*/

            CreateConfusionMatrix(TN, FP, FN, TP);
            CreateResults(Prevalence, Accuracy, Auc, Ppv, Fdr, For, Npv, Tpr, Fpr, Fnr, Tnr, LrArti, LrEksi, Dor, F1);

            // Use the model to make predictions
            predictor    = context.Model.CreatePredictionEngine <Input, Output>(model);
            label22.Text = "Model eğitimi tamamlandı.";
        }
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
            // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
            // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
            // which needs many data passes.
            trainingData = mlContext.Data.Cache(trainingData);

            // Define trainer options.
            var options = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
                // Give the instances of the positive class slightly more weight.
                PositiveInstanceWeight = 1.2f,
            };

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: True, Prediction: True
            //   Label: False, Prediction: False
            //   Label: True, Prediction: True
            //   Label: True, Prediction: True
            //   Label: False, Prediction: True

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.60
            //   AUC: 0.67
            //   F1 Score: 0.65
            //   Negative Precision: 0.69
            //   Negative Recall: 0.45
            //   Positive Precision: 0.56
            //   Positive Recall: 0.77
        }
Exemple #8
0
        // Oluşturulan Veri Setimiz ile modelin eğitilmesi
        public void EgitimYap()
        {
            label22.Text = "Model eğitiliyor...";
            //ML.net ile yeni bir “context” oluşturuyoruz.
            var context = new MLContext(seed: 0);

            // Girilen yolda bulunan veri setimizden veriler yükleniyor.Ayırıcı karakter olarak virgül kullanılıyor.
            var data = context.Data.LoadFromTextFile <Input>(_path, hasHeader: true, separatorChar: ',');

            // Veri setimizi %20 test %80 eğitim olacak şekilde bölüyoruz.
            var trainTestData = context.Data.TrainTestSplit(data, testFraction: 0.2, seed: 0);
            var trainData     = trainTestData.TrainSet;
            var testData      = trainTestData.TestSet;

            // Eğitim için gerekli gördüğümüz özellikleri tanımlıyoruz.
            var options = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                // Yakınsama toleransını ayarlar.
                ConvergenceTolerance = 0.05f,
                // Eğitim verileri üzerinden maksimum iterasyon sayısını belirler.
                MaximumNumberOfIterations = 100000,
                // Pozitif sınıfın örneklerine biraz daha fazla ağırlık verir.
                //PositiveInstanceWeight = 1.2f,
            };

            // Model oluşturulur ve lojistik regresyon kullanılarak eğitilir.
            var pipeline = context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: "Text")
                           .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression(options));
            var model = pipeline.Fit(trainData);

            // Modelin sonuçları test verisi üzerinden analiz edilir ve çıkartılır.
            var predictions = model.Transform(testData);
            var metrics     = context.BinaryClassification.Evaluate(predictions, "Label");

            var TP = metrics.ConfusionMatrix.Counts[0][0];
            var FP = metrics.ConfusionMatrix.Counts[1][0];
            var FN = metrics.ConfusionMatrix.Counts[0][1];
            var TN = metrics.ConfusionMatrix.Counts[1][1];


            var Prevalence = (TP + FN) / (TP + FP + FN + TN);
            var Accuracy   = (TP + TN) / (TP + FP + FN + TN);
            var Auc        = metrics.AreaUnderPrecisionRecallCurve;


            var Ppv = TP / (TP + FP);                     // Positive predictive value (PPV), Precision
            var Fdr = FP / (TP + FP);                     // False discovery rate (FDR)
            var For = FN / (FN + TN);                     // False omission rate (FOR)
            var Npv = TN / (FN + TN);                     // Negative predictive value (NPV)

            var Tpr = TP / (TP + FN);                     // True positive rate (TPR), Recall, Sensitivity, probability of detection, Power
            var Fpr = FP / (FP + TN);                     // False positive rate (FPR), Fall-out, probability of false alarm (1-Specificity)
            var Fnr = FN / (TP + FN);                     // False negative rate (FNR), Miss rate
            var Tnr = TN / (FP + TN);                     // True negative rate (TNR), Specificity (SPC), Selectivity

            var LrArti = (Tpr) / (Fpr);                   // Positive likelihood ratio (LR+)
            var LrEksi = (Fnr) / (Tnr);                   // Negative likelihood ratio (LR−)
            var Dor    = (LrArti) / (LrEksi);             // Diagnostic odds ratio (DOR)
            var F1     = 2 * ((Ppv * Tpr) / (Ppv + Tpr)); // F1 score


            CreateConfusionMatrix(TN, FP, FN, TP);                                                                     //Confusion Matrix oluşturmak için fonksiyon
            CreateResults(Prevalence, Accuracy, Auc, Ppv, Fdr, For, Npv, Tpr, Fpr, Fnr, Tnr, LrArti, LrEksi, Dor, F1); //Sonuçları göstermek için fonksiyon

            // Tahmin yapmak için modeli kullanmak predictor değişkenine atıyoruz.
            predictor    = context.Model.CreatePredictionEngine <Input, Output>(model);
            label22.Text = "Model eğitimi tamamlandı."; //Eğitim tamamlandığını ekranda gösteriyoruz.
        }