public void TrainSmallModelUsingDefaultParametersTest() { const string baseFolder = nameof(TrainSmallModelUsingDefaultParametersTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 100); var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(ModelTrainingParameters.Default, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNull(result.CatalogFilesParsingReport); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }
public void ModelEvaluationTest() { const string baseFolder = nameof(ModelEvaluationTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(20, 50); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); string evaluationFileFolderPath = Path.Combine(baseFolder, "evaluation"); Directory.CreateDirectory(evaluationFileFolderPath); generator.CreateEvaluationFiles(Path.Combine(usageFileFolderPath, "usage.csv"), Path.Combine(evaluationFileFolderPath, "evaluationUsage.csv"), 500, 30); var trainer = new ModelTrainer(); var modelTrainingParameters = ModelTrainingParameters.Default; ModelTrainResult result = trainer.TrainModel(modelTrainingParameters, usageFileFolderPath, null, evaluationFileFolderPath, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNull(result.CatalogFilesParsingReport); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsFalse(result.EvaluationFilesParsingReport.HasErrors); Assert.IsNotNull(result.ModelMetrics); Assert.IsNotNull(result.ModelMetrics.ModelDiversityMetrics); Assert.IsNotNull(result.ModelMetrics.ModelPrecisionMetrics); }
public void ParseAValidCatalogFileTest() { const int catalogItemsCount = 20; const string baseFolder = nameof(ParseAValidCatalogFileTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(itemsCount: catalogItemsCount); string catalogFile = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFile); IList <SarCatalogItem> catalogItems; string[] featureNames; var parser = new CatalogFileParser(); FileParsingReport report = parser.ParseCatalogFile(catalogFile, CancellationToken.None, out catalogItems, out featureNames); Assert.IsNotNull(report); Assert.IsTrue(report.IsCompletedSuccessfuly); Assert.AreEqual(catalogItemsCount, report.SuccessfulLinesCount); Assert.AreEqual(catalogItemsCount, report.TotalLinesCount); Assert.IsTrue(report.Errors == null || !report.Errors.Any()); }
public void GetRecommendationsUsingUserId() { const string baseFolder = nameof(GetRecommendationsUsingUserId); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(8); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000); var trainingParameters = ModelTrainingParameters.Default; trainingParameters.EnableBackfilling = false; trainingParameters.EnableUserToItemRecommendations = true; trainingParameters.AllowSeedItemsInRecommendations = true; Dictionary <string, Document> userHistory = null; IDocumentStore documentStore = Substitute.For <IDocumentStore>(); documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(), Arg.Any <CancellationToken>()) .Returns(info => { userHistory = info.Arg <IEnumerable <Document> >().ToDictionary(doc => doc.Id); return(Task.FromResult(userHistory.Count)); }); documentStore.GetDocument(Arg.Any <string>(), Arg.Any <string>()) .Returns(info => userHistory?[info.ArgAt <string>(1)]); var trainer = new ModelTrainer(documentStore: documentStore); ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); var recommender = new Recommender(result.Model, documentStore); var items = new List <UsageEvent> { new UsageEvent { ItemId = warmItems.First(), EventType = UsageEventType.Click, Timestamp = DateTime.UtcNow } }; string userId = generator.Users.FirstOrDefault(); IList <Recommendation> recommendations = recommender.GetRecommendations(items, userId, 3); // expect the document store to be called once with the provided user id documentStore.Received(1).GetDocument(Arg.Any <string>(), userId); Assert.IsNotNull(recommendations); Assert.IsTrue(recommendations.Any()); Assert.IsTrue(recommendations.All(r => r != null)); Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId))); }
public void TrainModelWithRangeOfPossibleParametersTest() { const string baseFolder = nameof(TrainModelWithRangeOfPossibleParametersTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); // create catalog items IList <SarCatalogItem> catalogItems; string[] featureNames; string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); var itemIdsIndex = new ConcurrentDictionary <string, uint>(); var catalogParser = new CatalogFileParser(0, itemIdsIndex); var parsingReport = catalogParser.ParseCatalogFile(catalogFilePath, CancellationToken.None, out catalogItems, out featureNames); Assert.IsTrue(parsingReport.IsCompletedSuccessfuly); // create usage items IList <SarUsageEvent> usageEvents; string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 10000); var userIdsIndex = new ConcurrentDictionary <string, uint>(); var usageFilesParser = new UsageEventsFilesParser(itemIdsIndex, userIdsIndex); parsingReport = usageFilesParser.ParseUsageEventFiles(usageFileFolderPath, CancellationToken.None, out usageEvents); Assert.IsTrue(parsingReport.IsCompletedSuccessfuly); int count = 0; var sarTrainer = new SarTrainer(); IDictionary <string, double> catalogFeatureWeights; foreach (IModelTrainerSettings settings in GetAllModelTrainingParameters()) { IPredictorModel model = sarTrainer.Train(settings, usageEvents, catalogItems, featureNames, userIdsIndex.Count, itemIdsIndex.Count, out catalogFeatureWeights); Assert.IsNotNull(model, $"Expected training to complete successfully when using settings#{count}: {settings}"); count++; } }
public void ParseTwoValidUsageFilesTest() { const string baseFolder = nameof(ParseTwoValidUsageFilesTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); generator.CreateUsageFile(Path.Combine(baseFolder, "usage1.csv"), 100); generator.CreateUsageFile(Path.Combine(baseFolder, "usage2.csv"), 50); IList <SarUsageEvent> usageEvents; var parser = new UsageEventsFilesParser(); FileParsingReport report = parser.ParseUsageEventFiles(baseFolder, CancellationToken.None, out usageEvents); Assert.IsNotNull(report); Assert.IsTrue(report.IsCompletedSuccessfuly); Assert.AreEqual(150, report.SuccessfulLinesCount); Assert.AreEqual(150, report.TotalLinesCount); Assert.IsTrue(report.Errors == null || !report.Errors.Any()); }
public void GetRecommendationsUsingSmallModelWithDefaultParameters() { const string baseFolder = nameof(GetRecommendationsUsingSmallModelWithDefaultParameters); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(8); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000); var trainingParameters = ModelTrainingParameters.Default; trainingParameters.EnableBackfilling = false; var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); var recommender = new Recommender(result.Model); var items = new List <UsageEvent> { new UsageEvent { ItemId = warmItems.First(), EventType = UsageEventType.Click, Timestamp = DateTime.UtcNow } }; IList <Recommendation> recommendations = recommender.GetRecommendations(items, null, 3); Assert.IsNotNull(recommendations); Assert.IsTrue(recommendations.Any()); Assert.IsTrue(recommendations.All(r => r != null)); Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId))); }
public void TrainSmallModelEnablingColdItemPlacementTest() { const string baseFolder = nameof(TrainSmallModelEnablingColdItemPlacementTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 30000); var parameters = ModelTrainingParameters.Default; parameters.EnableColdItemPlacement = true; parameters.EnableColdToColdRecommendations = true; var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }
public void TrainSmallModelEnablingUserToItemRecommendationsTest() { const string baseFolder = nameof(TrainSmallModelEnablingUserToItemRecommendationsTest); Directory.CreateDirectory(baseFolder); int usersCount = 50; int usageEventsCount = 30000; var generator = new ModelTrainingFilesGenerator(usersCount); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), usageEventsCount); var parameters = ModelTrainingParameters.Default; parameters.EnableUserToItemRecommendations = true; int itemsCount = 0; var users = new HashSet <string>(); IDocumentStore documentStore = Substitute.For <IDocumentStore>(); documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(), Arg.Any <CancellationToken>()) .Returns(info => { var docs = info.Arg <IEnumerable <Document> >().ToList(); foreach (Document document in docs) { users.Add(document.Id); itemsCount += document.Content?.Split(',').Length ?? 0; } return(Task.FromResult(docs.Count)); }); var trainer = new ModelTrainer(documentStore: documentStore); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); // expect only one call to document store documentStore.ReceivedWithAnyArgs(1); // make sure that all the users got their history stored Assert.AreEqual(usersCount, users.Count); // make sure the amount of stored history doesn't exceeds 100 items per user Assert.IsTrue(itemsCount <= usersCount * 100); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }