[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void MatrixFactorization_Estimator() { string labelColumnName = "Label"; string matrixColumnIndexColumnName = "Col"; string matrixRowIndexColumnName = "Row"; // This data contains three columns, Label, Col, and Row where Col and Row will be treated as the expected input names // of the trained matrix factorization model. var data = new TextLoader(Env, GetLoaderArgs(labelColumnName, matrixColumnIndexColumnName, matrixRowIndexColumnName)) .Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); // "invalidData" is not compatible to "data" because it contains columns Label, ColRenamed, and RowRenamed (no column is Col or Row). var invalidData = new TextLoader(Env, GetLoaderArgs(labelColumnName, matrixColumnIndexColumnName + "Renamed", matrixRowIndexColumnName + "Renamed")) .Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); var est = new MatrixFactorizationTrainer(Env, matrixColumnIndexColumnName, matrixRowIndexColumnName, labelColumnName, advancedSettings: s => { s.NumIterations = 3; s.NumThreads = 1; s.K = 4; }); TestEstimatorCore(est, data, invalidInput: invalidData); Done(); }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void MatrixFactorizationInMemoryData() { // Create an in-memory matrix as a list of tuples (column index, row index, value). var dataMatrix = new List <MatrixElement>(); for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i) { for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j) { dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); } } // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. var dataView = ComponentCreation.CreateDataView(Env, dataMatrix); // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. var mlContext = new MLContext(seed: 1, conc: 1); var pipeline = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex", advancedSettings: s => { s.NumIterations = 10; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 32; }); // Train a matrix factorization model. var model = pipeline.Fit(dataView); // Check if the expected types in the trained model are expected. Assert.True(model.MatrixColumnIndexColumnName == "MatrixColumnIndex"); Assert.True(model.MatrixRowIndexColumnName == "MatrixRowIndex"); Assert.True(model.MatrixColumnIndexColumnType.IsKey); Assert.True(model.MatrixRowIndexColumnType.IsKey); var matColKeyType = model.MatrixColumnIndexColumnType.AsKey; Assert.True(matColKeyType.Min == _synthesizedMatrixFirstColumnIndex); Assert.True(matColKeyType.Count == _synthesizedMatrixColumnCount); var matRowKeyType = model.MatrixRowIndexColumnType.AsKey; Assert.True(matRowKeyType.Min == _synthesizedMatrixFirstRowIndex); Assert.True(matRowKeyType.Count == _synthesizedMatrixRowCount); // Apply the trained model to the training set var prediction = model.Transform(dataView); // Calculate regression matrices for the prediction result var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score"); // Native test. Just check the pipeline runs. Assert.True(metrics.L2 < 0.1); }
public IActionResult TrainModels() { var stopWatch = Stopwatch.StartNew(); foreach (string country in countries) { var mlContext = new MLContext(); IDataView dataView = mlContext.Data.LoadFromTextFile( path: GetDataPath($"{country}-{datasetName}"), columns: new[] { new TextLoader.Column( name: "Label", dataKind: DataKind.Double, index: 0 ), new TextLoader.Column( name: nameof(ProductCobought.ProductID), dataKind: DataKind.UInt32, source: new[] { new TextLoader.Range(0) }, keyCount: new KeyCount(77) ), new TextLoader.Column( name: nameof(ProductCobought.CoboughtProductID), dataKind: DataKind.UInt32, source: new[] { new TextLoader.Range(1) }, keyCount: new KeyCount(77) ), }, hasHeader: true, separatorChar: '\t'); var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = nameof(ProductCobought.ProductID), MatrixRowIndexColumnName = nameof(ProductCobought.CoboughtProductID), LabelColumnName = "Label", LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass, Alpha = 0.01, Lambda = 0.025, C = 0.00001 }; MatrixFactorizationTrainer mft = mlContext.Recommendation().Trainers.MatrixFactorization(options); ITransformer trainedModel = mft.Fit(dataView); mlContext.Model.Save(trainedModel, inputSchema: dataView.Schema, filePath: GetDataPath($"{country}-model.zip")); } stopWatch.Stop(); var model = CreateHomeIndexViewModel(); model.Milliseconds = stopWatch.ElapsedMilliseconds; return(View("Index", model)); }
public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext) { // Data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("RaterId", "RaterId") .Append(mlContext.Transforms.Conversion.MapValueToKey("MovieId", "MovieId")); MatrixFactorizationTrainer trainer = GetTrainer(mlContext); var trainingPipeline = dataProcessPipeline.Append(trainer); return(trainingPipeline); }
/// <summary> /// Predict matrix entry using matrix factorization /// </summary> /// <typeparam name="T">The type of physical value of matrix's row and column index. It must be an integer type such as uint.</typeparam> /// <param name="ctx">The regression context trainer object.</param> /// <param name="label">The label variable.</param> /// <param name="matrixColumnIndex">The column index of the considered matrix.</param> /// <param name="matrixRowIndex">The row index of the considered matrix.</param> /// <param name="regularizationCoefficient">The frobenius norms of factor matrices.</param> /// <param name="approximationRank">Rank of the two factor matrices whose product is used to approximate the consdered matrix</param> /// <param name="learningRate">Initial learning rate.</param> /// <param name="numIterations">Number of training iterations.</param> /// <param name="advancedSettings">A delegate to set more settings.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> public static Scalar <float> MatrixFactorization <T>(this RegressionContext.RegressionTrainers ctx, Scalar <float> label, Key <T> matrixColumnIndex, Key <T> matrixRowIndex, float regularizationCoefficient = 0.1f, int approximationRank = 8, float learningRate = 0.1f, int numIterations = 20, Action <MatrixFactorizationTrainer.Arguments> advancedSettings = null, Action <MatrixFactorizationPredictor> onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(matrixColumnIndex, nameof(matrixColumnIndex)); Contracts.CheckValue(matrixRowIndex, nameof(matrixRowIndex)); Contracts.CheckParam(regularizationCoefficient >= 0, nameof(regularizationCoefficient), "Must be non-negative"); Contracts.CheckParam(approximationRank > 0, nameof(approximationRank), "Must be positive"); Contracts.CheckParam(learningRate > 0, nameof(learningRate), "Must be positive"); Contracts.CheckParam(numIterations > 0, nameof(numIterations), "Must be positive"); Contracts.CheckValueOrNull(advancedSettings); Contracts.CheckValueOrNull(onFit); var rec = new MatrixFactorizationReconciler <T>((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) => { var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings: args => { args.Lambda = regularizationCoefficient; args.K = approximationRank; args.Eta = learningRate; args.NumIterations = numIterations; // The previous settings may be overwritten by the line below. advancedSettings?.Invoke(args); }); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } else { return(trainer); } }, label, matrixColumnIndex, matrixRowIndex); return(rec.Output); }
/// <summary> /// Loads the data and train. /// </summary> /// <param name="products">The products.</param> /// <returns>an instance of <see cref="ITransformer"/>.</returns> private ITransformer LoadDataAndTrain(IEnumerable <ProductEntry> products) { // Read the trained data using TextLoader by defining the schema for reading the product co-purchase data-set IDataView productData = this.mlContext.Data.LoadFromEnumerable(data: products); DataOperationsCatalog.TrainTestData trainTestData = this.mlContext.Data.TrainTestSplit(productData, testFraction: 0.2, seed: 1); IDataView trainDataView = trainTestData.TrainSet; IDataView testDataView = trainTestData.TestSet; IDataView cachedData = this.mlContext.Data.Cache(trainDataView); // Your data is already encoded so all you need to do is specify options for MatrixFactorizationTrainer with a few extra hyper parameters // LossFunction, Alpha, Lambda and a few others like K and C as shown below and call the trainer. MatrixFactorizationTrainer.Options options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = nameof(ProductEntry.ProductId), MatrixRowIndexColumnName = nameof(ProductEntry.CoPurchaseProductId), LabelColumnName = nameof(ProductEntry.Label), LossFunction = MatrixFactorizationTrainer.LossFunctionType .SquareLossOneClass, Alpha = 0.01, Lambda = 0.025, ApproximationRank = 128, C = 0.00001 }; // Call the MatrixFactorization trainer by passing options. MatrixFactorizationTrainer est = this.mlContext.Recommendation().Trainers .MatrixFactorization(options: options); // Train the model fitting to the DataSet ITransformer trainedModel = est.Fit(input: cachedData); IDataView predictions = trainedModel.Transform(testDataView); RegressionMetrics metrics = this.mlContext.Regression.Evaluate(predictions); this.log.Information($"The model evaluation metrics RootMeanSquaredError:{metrics.RootMeanSquaredError}, LossFunction:{metrics.LossFunction}, MeanAbsoluteError:{metrics.MeanAbsoluteError}, MeanSquaredError:{metrics.MeanSquaredError}"); return(trainedModel); }
public void MatrixFactorizationSimpleTrainAndPredict() { using (var env = new LocalEnvironment(seed: 1, conc: 1)) { // Specific column names of the considered data set string labelColumnName = "Label"; string userColumnName = "User"; string itemColumnName = "Item"; string scoreColumnName = "Score"; // Create reader for both of training and test data sets var reader = new TextLoader(env, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); // Read training data as an IDataView object var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); // Create a pipeline with a single operator. var pipeline = new MatrixFactorizationTrainer(env, labelColumnName, userColumnName, itemColumnName, advancedSettings: s => { s.NumIterations = 3; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 7; }); // Train a matrix factorization model. var model = pipeline.Fit(data); // Read the test data set as an IDataView var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // Apply the trained model to the test set var prediction = model.Transform(testData); // Get output schema and check its column names var outputSchema = model.GetOutputSchema(data.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName }; foreach (var(i, col) in outputSchema.GetColumns()) { Assert.True(col.Name == expectedOutputNames[i]); } // Retrieve label column's index from the test IDataView testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); // Retrieve score column's index from the IDataView produced by the trained model prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); // Compute prediction errors var mlContext = new MLContext(); var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName); // Determine if the selected metric is reasonable for differen var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline var expectedMacL2Error = 0.61192207960271; // Mac baseline var expectedLinuxL2Error = 0.616821448679879; // Linux baseline double tolerance = System.Math.Pow(10, -DigitsOfPrecision); bool inWindowsRange = expectedWindowsL2Error - tolerance < metrices.L2 && metrices.L2 < expectedWindowsL2Error + tolerance; bool inMacRange = expectedMacL2Error - tolerance < metrices.L2 && metrices.L2 < expectedMacL2Error + tolerance; bool inLinuxRange = expectedLinuxL2Error - tolerance < metrices.L2 && metrices.L2 < expectedLinuxL2Error + tolerance; Assert.True(inWindowsRange || inMacRange || inLinuxRange); } }
public IActionResult TrainModels() { var stopWatch = Stopwatch.StartNew(); foreach (string country in countries) { var mlContext = new MLContext(); IDataView dataView = mlContext.Data.LoadFromTextFile( path: GetDataPath($"{country}-{datasetName}"), columns: new[] { new TextLoader.Column(name: "Label", dataKind: DataKind.Double, index: 0), // The key count is the cardinality i.e. maximum // valid value. This column is used internally when // training the model. When results are shown, the // columns are mapped to instances of our model // which could have a different cardinality but // happen to have the same. new TextLoader.Column( name: nameof(ProductCobought.ProductID), dataKind: DataKind.UInt32, source: new [] { new TextLoader.Range(0) }, keyCount: new KeyCount(77)), new TextLoader.Column( name: nameof(ProductCobought.CoboughtProductID), dataKind: DataKind.UInt32, source: new [] { new TextLoader.Range(1) }, keyCount: new KeyCount(77)) }, hasHeader: true, separatorChar: '\t'); var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = nameof(ProductCobought.ProductID), MatrixRowIndexColumnName = nameof(ProductCobought.CoboughtProductID), LabelColumnName = "Label", LossFunction = MatrixFactorizationTrainer .LossFunctionType.SquareLossOneClass, Alpha = 0.01, Lambda = 0.025, C = 0.00001 }; MatrixFactorizationTrainer mft = mlContext.Recommendation() .Trainers.MatrixFactorization(options); ITransformer trainedModel = mft.Fit(dataView); mlContext.Model.Save(trainedModel, inputSchema: dataView.Schema, filePath: GetDataPath($"{country}-model.zip")); } stopWatch.Stop(); var model = CreateHomeIndexViewModel(); model.Milliseconds = stopWatch.ElapsedMilliseconds; return(View("Index", model)); }
// This example first creates in-memory data and then use it to train a matrix factorization model. Afterward, quality metrics are reported. public static void MatrixFactorizationInMemoryData() { // Create an in-memory matrix as a list of tuples (column index, row index, value). var dataMatrix = new List <MatrixElement>(); for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i) { for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j) { dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); } } // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(seed: 0, conc: 1); // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. var dataView = ComponentCreation.CreateDataView(mlContext, dataMatrix); // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field // names' in MatrixElement class. var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex), nameof(MatrixElement.Value), advancedSettings: s => { s.NumIterations = 10; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 32; }); // Train a matrix factorization model. var model = pipeline.Fit(dataView); // Apply the trained model to the training set. var prediction = model.Transform(dataView); // Calculate regression matrices for the prediction result. var metrics = mlContext.Regression.Evaluate(prediction, label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score)); // Print out some metrics for checking the model's quality. Console.WriteLine($"L1 - {metrics.L1}"); Console.WriteLine($"L2 - {metrics.L2}"); Console.WriteLine($"LossFunction - {metrics.LossFn}"); Console.WriteLine($"RMS - {metrics.Rms}"); Console.WriteLine($"RSquared - {metrics.RSquared}"); // Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything // (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range // (e.g., MatrixColumnIndex=99999), the prediction value will be NaN. var testMatrix = new List <MatrixElementForScore>() { new MatrixElementForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 7, Score = 0 }, new MatrixElementForScore() { MatrixColumnIndex = 3, MatrixRowIndex = 6, Score = 0 } }; // Again, convert the test data to a format supported by ML.NET. var testDataView = ComponentCreation.CreateDataView(mlContext, testMatrix); // Feed the test data into the model and then iterate through all predictions. foreach (var pred in model.Transform(testDataView).AsEnumerable <MatrixElementForScore>(mlContext, false)) { Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex} and column {pred.MatrixColumnIndex} is {pred.Score}"); } }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void MatrixFactorizationSimpleTrainAndPredict() { var mlContext = new MLContext(seed: 1, conc: 1); // Specific column names of the considered data set string labelColumnName = "Label"; string userColumnName = "User"; string itemColumnName = "Item"; string scoreColumnName = "Score"; // Create reader for both of training and test data sets var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); // Read training data as an IDataView object var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); // Create a pipeline with a single operator. var pipeline = new MatrixFactorizationTrainer(mlContext, userColumnName, itemColumnName, labelColumnName, advancedSettings: s => { s.NumIterations = 3; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 7; }); // Train a matrix factorization model. var model = pipeline.Fit(data); // Read the test data set as an IDataView var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // Apply the trained model to the test set var prediction = model.Transform(testData); // Get output schema and check its column names var outputSchema = model.GetOutputSchema(data.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName }; foreach (var(i, col) in outputSchema.GetColumns()) { Assert.True(col.Name == expectedOutputNames[i]); } // Retrieve label column's index from the test IDataView testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); // Retrieve score column's index from the IDataView produced by the trained model prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); // Compute prediction errors var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName); // Determine if the selected metric is reasonable for different platforms double tolerance = Math.Pow(10, -7); if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { // Linux case var expectedUnixL2Error = 0.616821448679879; // Linux baseline Assert.InRange(metrices.L2, expectedUnixL2Error - tolerance, expectedUnixL2Error + tolerance); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { // The Mac case is just broken. Should be fixed later. Re-enable when done. // Mac case //var expectedMacL2Error = 0.61192207960271; // Mac baseline //Assert.InRange(metrices.L2, expectedMacL2Error - 5e-3, expectedMacL2Error + 5e-3); // 1e-7 is too small for Mac so we try 1e-5 } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Windows case var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline Assert.InRange(metrices.L2, expectedWindowsL2Error - tolerance, expectedWindowsL2Error + tolerance); } }
public static async Task <Dictionary <int, double> > Run(Request request) { CreateFile(request.ItemCustomersList); //STEP 1: Create MLContext to be shared across the model creation workflow objects var mlContext = new MLContext(); //STEP 2: Read the trained data using TextLoader by defining the schema for reading the product co-purchase dataset // Do remember to replace amazon0302.txt with dataset from https://snap.stanford.edu/data/amazon0302.html IDataView dataView = mlContext.Data.LoadFromTextFile(path: TrainingDataLocation, new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column(nameof(ProductEntry.CustomerNumber), DataKind.UInt32, new [] { new TextLoader.Range(0) }, new KeyCount(10000)), new TextLoader.Column(nameof(ProductEntry.RelatedItemId), DataKind.UInt32, new [] { new TextLoader.Range(1) }, new KeyCount(10000)) }, hasHeader: true); //STEP 3: Your data is already encoded so all you need to do is specify options for MatrxiFactorizationTrainer with a few extra hyperparameters // LossFunction, Alpa, Lambda and a few others like K and C as shown below and call the trainer. var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = nameof(ProductEntry.CustomerNumber), MatrixRowIndexColumnName = nameof(ProductEntry.RelatedItemId), LabelColumnName = "Label", LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass, Alpha = 0.01, Lambda = 0.025 }; // For better results use the following parameters //options.K = 100; //options.C = 0.00001; //Step 4: Call the MatrixFactorization trainer by passing options. MatrixFactorizationTrainer est = mlContext.Recommendation().Trainers.MatrixFactorization(options); //STEP 5: Train the model fitting to the DataSet //Please add Amazon0302.txt dataset from https://snap.stanford.edu/data/amazon0302.html to Data folder if FileNotFoundException is thrown. ITransformer model = est.Fit(dataView); //STEP 6: Create prediction engine and predict the score for Product 63 being co-purchased with Product 3. // The higher the score the higher the probability for this particular productID being co-purchased PredictionEngine <ProductEntry, PredictionScore> predictionEngine = mlContext.Model.CreatePredictionEngine <ProductEntry, PredictionScore>(model); var scores = new Dictionary <int, double>(); foreach (int itemId in request.AllItemsIds) { var entry = new ProductEntry { CustomerNumber = (uint)request.CustomerNumber, RelatedItemId = (uint)itemId }; PredictionScore predictionScore = predictionEngine.Predict(entry); double finalScore = Math.Round(predictionScore.Score, 3); scores.Add(itemId, finalScore); } scores = scores.ToDictionary(pair => pair.Key, pair => pair.Value); return(scores); }
/* * Modeli eğitilmesi için kullanılan Action metodu. * Matrix Factorization (Collaborative Filtering olarak da geçiyor) algoritması kullanılır. */ public IActionResult TrainModels() { foreach (string country in countries) { var mlContext = new MLContext(); // Algoritma için girdi verisini taşıyan IDataView örneği hazırlanır var dataView = mlContext.Data.LoadFromTextFile( // Dosyadan yükleyecek path: GetDataSetPath($"{country}-dataset.txt"), // veriseti dosyasını belirtiyoruz columns: new[] // column ve row bilgilerini tanımlıyoruz { new TextLoader.Column( name: "Label", dataKind: DataKind.Double, index: 0), new TextLoader.Column( name: "ProductID", dataKind: DataKind.UInt32, source: new [] { new TextLoader.Range(0) }, keyCount: new KeyCount(200)), new TextLoader.Column( name: "RelatedProductID", dataKind: DataKind.UInt32, source: new [] { new TextLoader.Range(1) }, keyCount: new KeyCount(200)) }, hasHeader: true, separatorChar: '\t'); // Kolonları Tab ile ayırmıştık hatırlarsanız /* * Algoritmaya has ayarlar. Buraları anlamak için algoritmanın detaylarını öğrenmem lazım. * Alphe, Lambda ve C değerleri ne anlama geliyor. Neden bu değerler verilmiş araştıralım. */ var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = "ProductID", MatrixRowIndexColumnName = "RelatedProductID", LabelColumnName = "Label", LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass, Alpha = 0.01, Lambda = 0.025, C = 0.00001 }; MatrixFactorizationTrainer coachCarter = mlContext.Recommendation() .Trainers.MatrixFactorization(options); ITransformer kokoskov = coachCarter.Fit(dataView); // Model eğitilir /* * Üretilen model zip uzantılı kaydedilir. * Bu zip'i alıp başka bir uygulamada da kullanabiliriz. * Tabii veri setinin değişmesi halinde modeli yeniden eğitmek gerekecektir. */ mlContext.Model.Save(kokoskov, inputSchema: dataView.Schema, filePath: GetDataSetPath($"{country}-model.zip")); } // Modelin ne kadar sürede eğitildiğini bulmak için buraya bir Stopwatch kullanımı getirilebilir ;) var model = CreateHomeIndexViewModel(); return(View("Index", model)); }