Ejemplo n.º 1
0
        private static Task <ITransformer> TrainAndGetBestModel(string FilePath)
        {
            return(Task.Factory.StartNew(() =>
            {
                MLContext MLC = MLCProvider.Current;

                IDataView TrainingDataView = MLC.Data.LoadFromTextFile <BookRating>(FilePath, ',', true);
                TrainingDataView = MLC.Data.Cache(TrainingDataView);

                Console.WriteLine("=============== 正在读取训练数据文件 ===============");

                EstimatorChain <ColumnConcatenatingTransformer> DataPipeLine = MLC.Transforms.Text.FeaturizeText("UserIdFeaturized", nameof(BookRating.UserId))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("ISBNFeaturized", nameof(BookRating.ISBN)))
                                                                               .Append(MLC.Transforms.Text.FeaturizeText("AgeFeaturized", nameof(BookRating.Age)))
                                                                               .Append(MLC.Transforms.Concatenate("Features", "UserIdFeaturized", "ISBNFeaturized", "AgeFeaturized"));

                Console.WriteLine("=============== 正在使用交叉验证训练预测模型 ===============");


                FieldAwareFactorizationMachineTrainer.Options Options = new FieldAwareFactorizationMachineTrainer.Options
                {
                    Verbose = true,
                    NumberOfIterations = 10,
                    FeatureColumnName = "Features",
                    Shuffle = true
                };

                EstimatorChain <FieldAwareFactorizationMachinePredictionTransformer> TrainingPipeLine = DataPipeLine.Append(MLC.BinaryClassification.Trainers.FieldAwareFactorizationMachine(Options));

                var CVResult = MLC.BinaryClassification.CrossValidate(TrainingDataView, TrainingPipeLine);

                return CVResult.OrderByDescending(t => t.Metrics.Accuracy).Select(r => r.Model).FirstOrDefault();
            }, TaskCreationOptions.LongRunning));
        }
Ejemplo n.º 2
0
        public void FieldAwareFactorizationMachine_Estimator()
        {
            var data = new TextLoader(Env, GetFafmBCLoaderArgs())
                       .Read(GetDataPath(TestDatasets.breastCancer.trainFilename));

            var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options {
                FeatureColumn       = "Feature1",                                   // Features from the 1st field.
                ExtraFeatureColumns = new[] { "Feature2", "Feature3", "Feature4" }, // 2nd field's feature column, 3rd field's feature column, 4th field's feature column.
                Shuffle             = false,
                Iters     = 3,
                LatentDim = 7,
            };

            var est = ML.BinaryClassification.Trainers.FieldAwareFactorizationMachine(ffmArgs);

            TestEstimatorCore(est, data);
            var model        = est.Fit(data);
            var anotherModel = est.Fit(data, data, model.Model);

            Done();
        }
Ejemplo n.º 3
0
        public void FfmBinaryClassificationWithAdvancedArguments()
        {
            var mlContext = new MLContext(seed: 0);
            var data      = DatasetUtils.GenerateFfmSamples(500);
            var dataView  = mlContext.Data.ReadFromEnumerable(data);

            var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options();

            // Customized the field names.
            ffmArgs.FeatureColumn       = nameof(DatasetUtils.FfmExample.Field0); // First field.
            ffmArgs.ExtraFeatureColumns = new[] { nameof(DatasetUtils.FfmExample.Field1), nameof(DatasetUtils.FfmExample.Field2) };

            var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(ffmArgs);

            var model      = pipeline.Fit(dataView);
            var prediction = model.Transform(dataView);

            var metrics = mlContext.BinaryClassification.Evaluate(prediction);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0.9, 1);
            Assert.InRange(metrics.Auc, 0.9, 1);
            Assert.InRange(metrics.Auprc, 0.9, 1);
        }
        /// <summary>
        /// Predict a target using a field-aware factorization machine algorithm.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                                                           FieldAwareFactorizationMachineTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            var env = CatalogUtils.GetEnvironment(catalog);

            return(new FieldAwareFactorizationMachineTrainer(env, options));
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Predict a target using a field-aware factorization machine.
        /// </summary>
        /// <param name="catalog">The binary classifier catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this.
        /// This delegate will receive the model that was trained. The type of the model is <see cref="FieldAwareFactorizationMachineModelParameters"/>.
        /// Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                                                                          Scalar <bool> label, Vector <float>[] features,
                                                                                                          FieldAwareFactorizationMachineTrainer.Options options,
                                                                                                          Action <FieldAwareFactorizationMachineModelParameters> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckNonEmpty(features, nameof(features));

            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new CustomReconciler((env, labelCol, featureCols) =>
            {
                var trainer = new FieldAwareFactorizationMachineTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, label, features);

            return(rec.Output);
        }
Ejemplo n.º 6
0
        // This example first train a field-aware factorization to binary
        // classification, measure the trained model's quality, and finally
        // use the trained model to make prediction.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            IEnumerable <DataPoint> data = GenerateRandomDataPoints(500);

            // Convert the list of data points to an IDataView object, which is
            // consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(data);

            // Define trainer options.
            var options = new FieldAwareFactorizationMachineTrainer.Options
            {
                FeatureColumnName   = nameof(DataPoint.Field0),
                ExtraFeatureColumns =
                    new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },

                LabelColumnName    = nameof(DataPoint.Label),
                LambdaLatent       = 0.01f,
                LambdaLinear       = 0.001f,
                LatentDimension    = 16,
                NumberOfIterations = 50,
                LearningRate       = 0.5f
            };

            // Define the trainer.
            // This trainer trains field-aware factorization (FFM)
            // for binary classification.
            // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
            // behind and
            // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
            // training algorithm implemented in ML.NET.
            var pipeline = mlContext.BinaryClassification.Trainers
                           .FieldAwareFactorizationMachine(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Run the model on training data set.
            var transformedTrainingData = model.Transform(trainingData);

            // Measure the quality of the trained model.
            var metrics = mlContext.BinaryClassification
                          .Evaluate(transformedTrainingData);

            // Show the quality metrics.
            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.99
            //   AUC: 1.00
            //   F1 Score: 0.99
            //   Negative Precision: 1.00
            //   Negative Recall: 0.98
            //   Positive Precision: 0.98
            //   Positive Recall: 1.00
            //   Log Loss: 0.17
            //   Log Loss Reduction: 0.83
            //   Entropy: 1.00
            //
            //  TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //  Confusion table
            //            ||======================
            //  PREDICTED || positive | negative | Recall
            //  TRUTH     ||======================
            //   positive ||      199 |       39 | 0.8361
            //   negative ||       69 |      193 | 0.7366
            //            ||======================
            //  Precision ||   0.7425 |   0.8319 |

            // Create prediction function from the trained model.
            var engine = mlContext.Model
                         .CreatePredictionEngine <DataPoint, Result>(model);

            // Make some predictions.
            foreach (var dataPoint in data.Take(5))
            {
                var result = engine.Predict(dataPoint);
                Console.WriteLine($"Actual label: {dataPoint.Label}, "
                                  + $"predicted label: {result.PredictedLabel}, "
                                  + $"score of being positive class: {result.Score}, "
                                  + $"and probability of beling positive class: "
                                  + $"{result.Probability}.");
            }

            // Expected output:
            //   Actual label: True, predicted label: True, score of being positive class: 1.115094, and probability of beling positive class: 0.7530775.
            //   Actual label: False, predicted label: False, score of being positive class: -3.478797, and probability of beling positive class: 0.02992158.
            //   Actual label: True, predicted label: True, score of being positive class: 3.191896, and probability of beling positive class: 0.9605282.
            //   Actual label: False, predicted label: False, score of being positive class: -3.400863, and probability of beling positive class: 0.03226851.
            //   Actual label: True, predicted label: True, score of being positive class: 4.06056, and probability of beling positive class: 0.9830528.
        }