[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
        public void MatrixFactorization_Estimator()
        {
            string labelColumnName             = "Label";
            string matrixColumnIndexColumnName = "Col";
            string matrixRowIndexColumnName    = "Row";

            // This data contains three columns, Label, Col, and Row where Col and Row will be treated as the expected input names
            // of the trained matrix factorization model.
            var data = new TextLoader(Env, GetLoaderArgs(labelColumnName, matrixColumnIndexColumnName, matrixRowIndexColumnName))
                       .Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));

            // "invalidData" is not compatible to "data" because it contains columns Label, ColRenamed, and RowRenamed (no column is Col or Row).
            var invalidData = new TextLoader(Env, GetLoaderArgs(labelColumnName, matrixColumnIndexColumnName + "Renamed", matrixRowIndexColumnName + "Renamed"))
                              .Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));

            var est = new MatrixFactorizationTrainer(Env, matrixColumnIndexColumnName, matrixRowIndexColumnName, labelColumnName,
                                                     advancedSettings: s =>
            {
                s.NumIterations = 3;
                s.NumThreads    = 1;
                s.K             = 4;
            });

            TestEstimatorCore(est, data, invalidInput: invalidData);

            Done();
        }
Exemple #2
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
        public void MatrixFactorizationInMemoryData()
        {
            // Create an in-memory matrix as a list of tuples (column index, row index, value).
            var dataMatrix = new List <MatrixElement>();

            for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i)
            {
                for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j)
                {
                    dataMatrix.Add(new MatrixElement()
                    {
                        MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5
                    });
                }
            }

            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
            var dataView = ComponentCreation.CreateDataView(Env, dataMatrix);

            // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
            // matrix's column index, and "MatrixRowIndex" as the matrix's row index.
            var mlContext = new MLContext(seed: 1, conc: 1);
            var pipeline  = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex",
                                                           advancedSettings: s =>
            {
                s.NumIterations = 10;
                s.NumThreads    = 1;  // To eliminate randomness, # of threads must be 1.
                s.K             = 32;
            });

            // Train a matrix factorization model.
            var model = pipeline.Fit(dataView);

            // Check if the expected types in the trained model are expected.
            Assert.True(model.MatrixColumnIndexColumnName == "MatrixColumnIndex");
            Assert.True(model.MatrixRowIndexColumnName == "MatrixRowIndex");
            Assert.True(model.MatrixColumnIndexColumnType.IsKey);
            Assert.True(model.MatrixRowIndexColumnType.IsKey);
            var matColKeyType = model.MatrixColumnIndexColumnType.AsKey;

            Assert.True(matColKeyType.Min == _synthesizedMatrixFirstColumnIndex);
            Assert.True(matColKeyType.Count == _synthesizedMatrixColumnCount);
            var matRowKeyType = model.MatrixRowIndexColumnType.AsKey;

            Assert.True(matRowKeyType.Min == _synthesizedMatrixFirstRowIndex);
            Assert.True(matRowKeyType.Count == _synthesizedMatrixRowCount);

            // Apply the trained model to the training set
            var prediction = model.Transform(dataView);

            // Calculate regression matrices for the prediction result
            var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score");

            // Native test. Just check the pipeline runs.
            Assert.True(metrics.L2 < 0.1);
        }
        public IActionResult TrainModels()
        {
            var stopWatch = Stopwatch.StartNew();

            foreach (string country in countries)
            {
                var mlContext = new MLContext();

                IDataView dataView = mlContext.Data.LoadFromTextFile(
                    path: GetDataPath($"{country}-{datasetName}"),
                    columns: new[]
                {
                    new TextLoader.Column(
                        name: "Label",
                        dataKind: DataKind.Double,
                        index: 0
                        ),
                    new TextLoader.Column(
                        name: nameof(ProductCobought.ProductID),
                        dataKind: DataKind.UInt32,
                        source: new[] { new TextLoader.Range(0) },
                        keyCount: new KeyCount(77)
                        ),
                    new TextLoader.Column(
                        name: nameof(ProductCobought.CoboughtProductID),
                        dataKind: DataKind.UInt32,
                        source: new[] { new TextLoader.Range(1) },
                        keyCount: new KeyCount(77)
                        ),
                },
                    hasHeader: true,
                    separatorChar: '\t');

                var options = new MatrixFactorizationTrainer.Options
                {
                    MatrixColumnIndexColumnName = nameof(ProductCobought.ProductID),
                    MatrixRowIndexColumnName    = nameof(ProductCobought.CoboughtProductID),
                    LabelColumnName             = "Label",
                    LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass,
                    Alpha        = 0.01,
                    Lambda       = 0.025,
                    C            = 0.00001
                };
                MatrixFactorizationTrainer mft = mlContext.Recommendation().Trainers.MatrixFactorization(options);
                ITransformer trainedModel      = mft.Fit(dataView);
                mlContext.Model.Save(trainedModel, inputSchema: dataView.Schema, filePath: GetDataPath($"{country}-model.zip"));
            }
            stopWatch.Stop();

            var model = CreateHomeIndexViewModel();

            model.Milliseconds = stopWatch.ElapsedMilliseconds;

            return(View("Index", model));
        }
Exemple #4
0
        public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
            // Data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("RaterId", "RaterId")
                                      .Append(mlContext.Transforms.Conversion.MapValueToKey("MovieId", "MovieId"));
            MatrixFactorizationTrainer trainer = GetTrainer(mlContext);

            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
Exemple #5
0
        /// <summary>
        /// Predict matrix entry using matrix factorization
        /// </summary>
        /// <typeparam name="T">The type of physical value of matrix's row and column index. It must be an integer type such as uint.</typeparam>
        /// <param name="ctx">The regression context trainer object.</param>
        /// <param name="label">The label variable.</param>
        /// <param name="matrixColumnIndex">The column index of the considered matrix.</param>
        /// <param name="matrixRowIndex">The row index of the considered matrix.</param>
        /// <param name="regularizationCoefficient">The frobenius norms of factor matrices.</param>
        /// <param name="approximationRank">Rank of the two factor matrices whose product is used to approximate the consdered matrix</param>
        /// <param name="learningRate">Initial learning rate.</param>
        /// <param name="numIterations">Number of training iterations.</param>
        /// <param name="advancedSettings">A delegate to set more settings.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        public static Scalar <float> MatrixFactorization <T>(this RegressionContext.RegressionTrainers ctx,
                                                             Scalar <float> label, Key <T> matrixColumnIndex, Key <T> matrixRowIndex,
                                                             float regularizationCoefficient = 0.1f,
                                                             int approximationRank           = 8,
                                                             float learningRate = 0.1f,
                                                             int numIterations  = 20,
                                                             Action <MatrixFactorizationTrainer.Arguments> advancedSettings = null,
                                                             Action <MatrixFactorizationPredictor> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(matrixColumnIndex, nameof(matrixColumnIndex));
            Contracts.CheckValue(matrixRowIndex, nameof(matrixRowIndex));

            Contracts.CheckParam(regularizationCoefficient >= 0, nameof(regularizationCoefficient), "Must be non-negative");
            Contracts.CheckParam(approximationRank > 0, nameof(approximationRank), "Must be positive");
            Contracts.CheckParam(learningRate > 0, nameof(learningRate), "Must be positive");
            Contracts.CheckParam(numIterations > 0, nameof(numIterations), "Must be positive");
            Contracts.CheckValueOrNull(advancedSettings);
            Contracts.CheckValueOrNull(onFit);

            var rec = new MatrixFactorizationReconciler <T>((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) =>
            {
                var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings:
                                                             args =>
                {
                    args.Lambda        = regularizationCoefficient;
                    args.K             = approximationRank;
                    args.Eta           = learningRate;
                    args.NumIterations = numIterations;
                    // The previous settings may be overwritten by the line below.
                    advancedSettings?.Invoke(args);
                });
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, label, matrixColumnIndex, matrixRowIndex);

            return(rec.Output);
        }
        /// <summary>
        /// Loads the data and train.
        /// </summary>
        /// <param name="products">The products.</param>
        /// <returns>an instance of <see cref="ITransformer"/>.</returns>
        private ITransformer LoadDataAndTrain(IEnumerable <ProductEntry> products)
        {
            // Read the trained data using TextLoader by defining the schema for reading the product co-purchase data-set
            IDataView productData = this.mlContext.Data.LoadFromEnumerable(data: products);

            DataOperationsCatalog.TrainTestData trainTestData = this.mlContext.Data.TrainTestSplit(productData, testFraction: 0.2, seed: 1);
            IDataView trainDataView = trainTestData.TrainSet;
            IDataView testDataView  = trainTestData.TestSet;

            IDataView cachedData = this.mlContext.Data.Cache(trainDataView);

            // Your data is already encoded so all you need to do is specify options for MatrixFactorizationTrainer with a few extra hyper parameters
            // LossFunction, Alpha, Lambda and a few others like K and C as shown below and call the trainer.
            MatrixFactorizationTrainer.Options options = new MatrixFactorizationTrainer.Options
            {
                MatrixColumnIndexColumnName =
                    nameof(ProductEntry.ProductId),
                MatrixRowIndexColumnName =
                    nameof(ProductEntry.CoPurchaseProductId),
                LabelColumnName = nameof(ProductEntry.Label),
                LossFunction    =
                    MatrixFactorizationTrainer.LossFunctionType
                    .SquareLossOneClass,
                Alpha             = 0.01,
                Lambda            = 0.025,
                ApproximationRank = 128,
                C = 0.00001
            };

            // Call the MatrixFactorization trainer by passing options.
            MatrixFactorizationTrainer est = this.mlContext.Recommendation().Trainers
                                             .MatrixFactorization(options: options);

            // Train the model fitting to the DataSet
            ITransformer trainedModel = est.Fit(input: cachedData);

            IDataView         predictions = trainedModel.Transform(testDataView);
            RegressionMetrics metrics     = this.mlContext.Regression.Evaluate(predictions);

            this.log.Information($"The model evaluation metrics RootMeanSquaredError:{metrics.RootMeanSquaredError}, LossFunction:{metrics.LossFunction}, MeanAbsoluteError:{metrics.MeanAbsoluteError}, MeanSquaredError:{metrics.MeanSquaredError}");

            return(trainedModel);
        }
Exemple #7
0
        public void MatrixFactorizationSimpleTrainAndPredict()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Specific column names of the considered data set
                string labelColumnName = "Label";
                string userColumnName  = "User";
                string itemColumnName  = "Item";
                string scoreColumnName = "Score";

                // Create reader for both of training and test data sets
                var reader = new TextLoader(env, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName));

                // Read training data as an IDataView object
                var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));

                // Create a pipeline with a single operator.
                var pipeline = new MatrixFactorizationTrainer(env, labelColumnName, userColumnName, itemColumnName,
                                                              advancedSettings: s =>
                {
                    s.NumIterations = 3;
                    s.NumThreads    = 1;  // To eliminate randomness, # of threads must be 1.
                    s.K             = 7;
                });

                // Train a matrix factorization model.
                var model = pipeline.Fit(data);

                // Read the test data set as an IDataView
                var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));

                // Apply the trained model to the test set
                var prediction = model.Transform(testData);

                // Get output schema and check its column names
                var outputSchema        = model.GetOutputSchema(data.Schema);
                var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName };
                foreach (var(i, col) in outputSchema.GetColumns())
                {
                    Assert.True(col.Name == expectedOutputNames[i]);
                }

                // Retrieve label column's index from the test IDataView
                testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId);

                // Retrieve score column's index from the IDataView produced by the trained model
                prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId);

                // Compute prediction errors
                var mlContext = new MLContext();
                var metrices  = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName);

                // Determine if the selected metric is reasonable for differen
                var    expectedWindowsL2Error = 0.61528733643754685; // Windows baseline
                var    expectedMacL2Error     = 0.61192207960271;    // Mac baseline
                var    expectedLinuxL2Error   = 0.616821448679879;   // Linux baseline
                double tolerance      = System.Math.Pow(10, -DigitsOfPrecision);
                bool   inWindowsRange = expectedWindowsL2Error - tolerance < metrices.L2 && metrices.L2 < expectedWindowsL2Error + tolerance;
                bool   inMacRange     = expectedMacL2Error - tolerance < metrices.L2 && metrices.L2 < expectedMacL2Error + tolerance;
                bool   inLinuxRange   = expectedLinuxL2Error - tolerance < metrices.L2 && metrices.L2 < expectedLinuxL2Error + tolerance;
                Assert.True(inWindowsRange || inMacRange || inLinuxRange);
            }
        }
        public IActionResult TrainModels()
        {
            var stopWatch = Stopwatch.StartNew();

            foreach (string country in countries)
            {
                var mlContext = new MLContext();

                IDataView dataView = mlContext.Data.LoadFromTextFile(
                    path: GetDataPath($"{country}-{datasetName}"),
                    columns: new[]
                {
                    new TextLoader.Column(name: "Label",
                                          dataKind: DataKind.Double, index: 0),
                    // The key count is the cardinality i.e. maximum
                    // valid value. This column is used internally when
                    // training the model. When results are shown, the
                    // columns are mapped to instances of our model
                    // which could have a different cardinality but
                    // happen to have the same.
                    new TextLoader.Column(
                        name: nameof(ProductCobought.ProductID),
                        dataKind: DataKind.UInt32,
                        source: new [] { new TextLoader.Range(0) },
                        keyCount: new KeyCount(77)),
                    new TextLoader.Column(
                        name: nameof(ProductCobought.CoboughtProductID),
                        dataKind: DataKind.UInt32,
                        source: new [] { new TextLoader.Range(1) },
                        keyCount: new KeyCount(77))
                },
                    hasHeader: true,
                    separatorChar: '\t');

                var options = new MatrixFactorizationTrainer.Options
                {
                    MatrixColumnIndexColumnName =
                        nameof(ProductCobought.ProductID),
                    MatrixRowIndexColumnName =
                        nameof(ProductCobought.CoboughtProductID),
                    LabelColumnName = "Label",
                    LossFunction    = MatrixFactorizationTrainer
                                      .LossFunctionType.SquareLossOneClass,
                    Alpha  = 0.01,
                    Lambda = 0.025,
                    C      = 0.00001
                };

                MatrixFactorizationTrainer mft = mlContext.Recommendation()
                                                 .Trainers.MatrixFactorization(options);

                ITransformer trainedModel = mft.Fit(dataView);

                mlContext.Model.Save(trainedModel,
                                     inputSchema: dataView.Schema,
                                     filePath: GetDataPath($"{country}-model.zip"));
            }

            stopWatch.Stop();

            var model = CreateHomeIndexViewModel();

            model.Milliseconds = stopWatch.ElapsedMilliseconds;
            return(View("Index", model));
        }
Exemple #9
0
        // This example first creates in-memory data and then use it to train a matrix factorization model. Afterward, quality metrics are reported.
        public static void MatrixFactorizationInMemoryData()
        {
            // Create an in-memory matrix as a list of tuples (column index, row index, value).
            var dataMatrix = new List <MatrixElement>();

            for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i)
            {
                for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j)
                {
                    dataMatrix.Add(new MatrixElement()
                    {
                        MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5
                    });
                }
            }

            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext(seed: 0, conc: 1);

            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
            var dataView = ComponentCreation.CreateDataView(mlContext, dataMatrix);

            // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
            // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
            // names' in MatrixElement class.
            var pipeline = new MatrixFactorizationTrainer(mlContext,
                                                          nameof(MatrixElement.MatrixColumnIndex),
                                                          nameof(MatrixElement.MatrixRowIndex),
                                                          nameof(MatrixElement.Value),
                                                          advancedSettings: s =>
            {
                s.NumIterations = 10;
                s.NumThreads    = 1;  // To eliminate randomness, # of threads must be 1.
                s.K             = 32;
            });

            // Train a matrix factorization model.
            var model = pipeline.Fit(dataView);

            // Apply the trained model to the training set.
            var prediction = model.Transform(dataView);

            // Calculate regression matrices for the prediction result.
            var metrics = mlContext.Regression.Evaluate(prediction,
                                                        label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));

            // Print out some metrics for checking the model's quality.
            Console.WriteLine($"L1 - {metrics.L1}");
            Console.WriteLine($"L2 - {metrics.L2}");
            Console.WriteLine($"LossFunction - {metrics.LossFn}");
            Console.WriteLine($"RMS - {metrics.Rms}");
            Console.WriteLine($"RSquared - {metrics.RSquared}");

            // Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
            // (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
            // (e.g., MatrixColumnIndex=99999), the prediction value will be NaN.
            var testMatrix = new List <MatrixElementForScore>()
            {
                new MatrixElementForScore()
                {
                    MatrixColumnIndex = 1, MatrixRowIndex = 7, Score = 0
                },
                new MatrixElementForScore()
                {
                    MatrixColumnIndex = 3, MatrixRowIndex = 6, Score = 0
                }
            };

            // Again, convert the test data to a format supported by ML.NET.
            var testDataView = ComponentCreation.CreateDataView(mlContext, testMatrix);

            // Feed the test data into the model and then iterate through all predictions.
            foreach (var pred in model.Transform(testDataView).AsEnumerable <MatrixElementForScore>(mlContext, false))
            {
                Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex} and column {pred.MatrixColumnIndex} is {pred.Score}");
            }
        }
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
        public void MatrixFactorizationSimpleTrainAndPredict()
        {
            var mlContext = new MLContext(seed: 1, conc: 1);

            // Specific column names of the considered data set
            string labelColumnName = "Label";
            string userColumnName  = "User";
            string itemColumnName  = "Item";
            string scoreColumnName = "Score";

            // Create reader for both of training and test data sets
            var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName));

            // Read training data as an IDataView object
            var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));

            // Create a pipeline with a single operator.
            var pipeline = new MatrixFactorizationTrainer(mlContext, userColumnName, itemColumnName, labelColumnName,
                                                          advancedSettings: s =>
            {
                s.NumIterations = 3;
                s.NumThreads    = 1;  // To eliminate randomness, # of threads must be 1.
                s.K             = 7;
            });

            // Train a matrix factorization model.
            var model = pipeline.Fit(data);

            // Read the test data set as an IDataView
            var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));

            // Apply the trained model to the test set
            var prediction = model.Transform(testData);

            // Get output schema and check its column names
            var outputSchema        = model.GetOutputSchema(data.Schema);
            var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName };

            foreach (var(i, col) in outputSchema.GetColumns())
            {
                Assert.True(col.Name == expectedOutputNames[i]);
            }

            // Retrieve label column's index from the test IDataView
            testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId);

            // Retrieve score column's index from the IDataView produced by the trained model
            prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId);

            // Compute prediction errors
            var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName);

            // Determine if the selected metric is reasonable for different platforms
            double tolerance = Math.Pow(10, -7);

            if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
            {
                // Linux case
                var expectedUnixL2Error = 0.616821448679879; // Linux baseline
                Assert.InRange(metrices.L2, expectedUnixL2Error - tolerance, expectedUnixL2Error + tolerance);
            }
            else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
            {
                // The Mac case is just broken. Should be fixed later. Re-enable when done.
                // Mac case
                //var expectedMacL2Error = 0.61192207960271; // Mac baseline
                //Assert.InRange(metrices.L2, expectedMacL2Error - 5e-3, expectedMacL2Error + 5e-3); // 1e-7 is too small for Mac so we try 1e-5
            }
            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                // Windows case
                var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline
                Assert.InRange(metrices.L2, expectedWindowsL2Error - tolerance, expectedWindowsL2Error + tolerance);
            }
        }
Exemple #11
0
        public static async Task <Dictionary <int, double> > Run(Request request)
        {
            CreateFile(request.ItemCustomersList);
            //STEP 1: Create MLContext to be shared across the model creation workflow objects
            var mlContext = new MLContext();

            //STEP 2: Read the trained data using TextLoader by defining the schema for reading the product co-purchase dataset
            //        Do remember to replace amazon0302.txt with dataset from https://snap.stanford.edu/data/amazon0302.html
            IDataView dataView = mlContext.Data.LoadFromTextFile(path: TrainingDataLocation,
                                                                 new[]
            {
                new TextLoader.Column("Label", DataKind.Single, 0),
                new TextLoader.Column(nameof(ProductEntry.CustomerNumber), DataKind.UInt32, new [] { new TextLoader.Range(0) }, new KeyCount(10000)),
                new TextLoader.Column(nameof(ProductEntry.RelatedItemId), DataKind.UInt32, new [] { new TextLoader.Range(1) }, new KeyCount(10000))
            },
                                                                 hasHeader: true);

            //STEP 3: Your data is already encoded so all you need to do is specify options for MatrxiFactorizationTrainer with a few extra hyperparameters
            //        LossFunction, Alpa, Lambda and a few others like K and C as shown below and call the trainer.
            var options = new MatrixFactorizationTrainer.Options
            {
                MatrixColumnIndexColumnName = nameof(ProductEntry.CustomerNumber),
                MatrixRowIndexColumnName    = nameof(ProductEntry.RelatedItemId),
                LabelColumnName             = "Label",
                LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass,
                Alpha        = 0.01,
                Lambda       = 0.025
            };

            // For better results use the following parameters
            //options.K = 100;
            //options.C = 0.00001;

            //Step 4: Call the MatrixFactorization trainer by passing options.
            MatrixFactorizationTrainer est = mlContext.Recommendation().Trainers.MatrixFactorization(options);

            //STEP 5: Train the model fitting to the DataSet
            //Please add Amazon0302.txt dataset from https://snap.stanford.edu/data/amazon0302.html to Data folder if FileNotFoundException is thrown.
            ITransformer model = est.Fit(dataView);

            //STEP 6: Create prediction engine and predict the score for Product 63 being co-purchased with Product 3.
            //        The higher the score the higher the probability for this particular productID being co-purchased
            PredictionEngine <ProductEntry, PredictionScore> predictionEngine = mlContext.Model.CreatePredictionEngine <ProductEntry, PredictionScore>(model);
            var scores = new Dictionary <int, double>();

            foreach (int itemId in request.AllItemsIds)
            {
                var entry = new ProductEntry
                {
                    CustomerNumber = (uint)request.CustomerNumber,
                    RelatedItemId  = (uint)itemId
                };

                PredictionScore predictionScore = predictionEngine.Predict(entry);
                double          finalScore      = Math.Round(predictionScore.Score, 3);
                scores.Add(itemId, finalScore);
            }

            scores = scores.ToDictionary(pair => pair.Key, pair => pair.Value);

            return(scores);
        }
Exemple #12
0
        /*
         *  Modeli eğitilmesi için kullanılan Action metodu.
         *  Matrix Factorization (Collaborative Filtering olarak da geçiyor) algoritması kullanılır.
         */
        public IActionResult TrainModels()
        {
            foreach (string country in countries)
            {
                var mlContext = new MLContext();

                // Algoritma için girdi verisini taşıyan IDataView örneği hazırlanır

                var dataView = mlContext.Data.LoadFromTextFile(     // Dosyadan yükleyecek
                    path: GetDataSetPath($"{country}-dataset.txt"), // veriseti dosyasını belirtiyoruz
                    columns: new[]                                  // column ve row bilgilerini tanımlıyoruz
                {
                    new TextLoader.Column(
                        name:     "Label",
                        dataKind: DataKind.Double,
                        index:    0),

                    new TextLoader.Column(
                        name:     "ProductID",
                        dataKind: DataKind.UInt32,
                        source:   new [] { new TextLoader.Range(0) },
                        keyCount: new KeyCount(200)),

                    new TextLoader.Column(
                        name:     "RelatedProductID",
                        dataKind: DataKind.UInt32,
                        source:   new [] { new TextLoader.Range(1) },
                        keyCount: new KeyCount(200))
                },
                    hasHeader: true,
                    separatorChar: '\t'); // Kolonları Tab ile ayırmıştık hatırlarsanız

                /*
                 *  Algoritmaya has ayarlar. Buraları anlamak için algoritmanın detaylarını öğrenmem lazım.
                 *  Alphe, Lambda ve C değerleri ne anlama geliyor. Neden bu değerler verilmiş araştıralım.
                 */
                var options = new MatrixFactorizationTrainer.Options
                {
                    MatrixColumnIndexColumnName = "ProductID",
                    MatrixRowIndexColumnName    = "RelatedProductID",
                    LabelColumnName             = "Label",
                    LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass,
                    Alpha        = 0.01,
                    Lambda       = 0.025,
                    C            = 0.00001
                };

                MatrixFactorizationTrainer coachCarter = mlContext.Recommendation()
                                                         .Trainers.MatrixFactorization(options);

                ITransformer kokoskov = coachCarter.Fit(dataView); // Model eğitilir

                /*
                 *  Üretilen model zip uzantılı kaydedilir.
                 *  Bu zip'i alıp başka bir uygulamada da kullanabiliriz.
                 *  Tabii veri setinin değişmesi halinde modeli yeniden eğitmek gerekecektir.
                 */

                mlContext.Model.Save(kokoskov,
                                     inputSchema: dataView.Schema,
                                     filePath: GetDataSetPath($"{country}-model.zip"));
            }

            // Modelin ne kadar sürede eğitildiğini bulmak için buraya bir Stopwatch kullanımı getirilebilir ;)
            var model = CreateHomeIndexViewModel();

            return(View("Index", model));
        }