public void ParseAValidCatalogFileTest() { const int catalogItemsCount = 20; const string baseFolder = nameof(ParseAValidCatalogFileTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(itemsCount: catalogItemsCount); string catalogFile = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFile); IList <SarCatalogItem> catalogItems; string[] featureNames; var parser = new CatalogFileParser(); FileParsingReport report = parser.ParseCatalogFile(catalogFile, CancellationToken.None, out catalogItems, out featureNames); Assert.IsNotNull(report); Assert.IsTrue(report.IsCompletedSuccessfuly); Assert.AreEqual(catalogItemsCount, report.SuccessfulLinesCount); Assert.AreEqual(catalogItemsCount, report.TotalLinesCount); Assert.IsTrue(report.Errors == null || !report.Errors.Any()); }
public void TrainModelWithRangeOfPossibleParametersTest() { const string baseFolder = nameof(TrainModelWithRangeOfPossibleParametersTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); // create catalog items IList <SarCatalogItem> catalogItems; string[] featureNames; string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); var itemIdsIndex = new ConcurrentDictionary <string, uint>(); var catalogParser = new CatalogFileParser(0, itemIdsIndex); var parsingReport = catalogParser.ParseCatalogFile(catalogFilePath, CancellationToken.None, out catalogItems, out featureNames); Assert.IsTrue(parsingReport.IsCompletedSuccessfuly); // create usage items IList <SarUsageEvent> usageEvents; string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 10000); var userIdsIndex = new ConcurrentDictionary <string, uint>(); var usageFilesParser = new UsageEventsFilesParser(itemIdsIndex, userIdsIndex); parsingReport = usageFilesParser.ParseUsageEventFiles(usageFileFolderPath, CancellationToken.None, out usageEvents); Assert.IsTrue(parsingReport.IsCompletedSuccessfuly); int count = 0; var sarTrainer = new SarTrainer(); IDictionary <string, double> catalogFeatureWeights; foreach (IModelTrainerSettings settings in GetAllModelTrainingParameters()) { IPredictorModel model = sarTrainer.Train(settings, usageEvents, catalogItems, featureNames, userIdsIndex.Count, itemIdsIndex.Count, out catalogFeatureWeights); Assert.IsNotNull(model, $"Expected training to complete successfully when using settings#{count}: {settings}"); count++; } }
public void TrainSmallModelEnablingColdItemPlacementTest() { const string baseFolder = nameof(TrainSmallModelEnablingColdItemPlacementTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 30000); var parameters = ModelTrainingParameters.Default; parameters.EnableColdItemPlacement = true; parameters.EnableColdToColdRecommendations = true; var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }
public void TrainSmallModelEnablingUserToItemRecommendationsTest() { const string baseFolder = nameof(TrainSmallModelEnablingUserToItemRecommendationsTest); Directory.CreateDirectory(baseFolder); int usersCount = 50; int usageEventsCount = 30000; var generator = new ModelTrainingFilesGenerator(usersCount); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), usageEventsCount); var parameters = ModelTrainingParameters.Default; parameters.EnableUserToItemRecommendations = true; int itemsCount = 0; var users = new HashSet <string>(); IDocumentStore documentStore = Substitute.For <IDocumentStore>(); documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(), Arg.Any <CancellationToken>()) .Returns(info => { var docs = info.Arg <IEnumerable <Document> >().ToList(); foreach (Document document in docs) { users.Add(document.Id); itemsCount += document.Content?.Split(',').Length ?? 0; } return(Task.FromResult(docs.Count)); }); var trainer = new ModelTrainer(documentStore: documentStore); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); // expect only one call to document store documentStore.ReceivedWithAnyArgs(1); // make sure that all the users got their history stored Assert.AreEqual(usersCount, users.Count); // make sure the amount of stored history doesn't exceeds 100 items per user Assert.IsTrue(itemsCount <= usersCount * 100); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }