Exemplo n.º 1
0
        public void TrainSmallModelUsingDefaultParametersTest()
        {
            const string baseFolder = nameof(TrainSmallModelUsingDefaultParametersTest);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator();
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 100);

            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(ModelTrainingParameters.Default, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);
            Assert.IsNull(result.CatalogFilesParsingReport);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }
Exemplo n.º 2
0
        public void ModelEvaluationTest()
        {
            const string baseFolder = nameof(ModelEvaluationTest);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(20, 50);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            string evaluationFileFolderPath = Path.Combine(baseFolder, "evaluation");

            Directory.CreateDirectory(evaluationFileFolderPath);
            generator.CreateEvaluationFiles(Path.Combine(usageFileFolderPath, "usage.csv"), Path.Combine(evaluationFileFolderPath, "evaluationUsage.csv"), 500, 30);

            var trainer = new ModelTrainer();

            var modelTrainingParameters = ModelTrainingParameters.Default;
            ModelTrainResult result     = trainer.TrainModel(modelTrainingParameters, usageFileFolderPath, null, evaluationFileFolderPath, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);
            Assert.IsNull(result.CatalogFilesParsingReport);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsFalse(result.EvaluationFilesParsingReport.HasErrors);
            Assert.IsNotNull(result.ModelMetrics);
            Assert.IsNotNull(result.ModelMetrics.ModelDiversityMetrics);
            Assert.IsNotNull(result.ModelMetrics.ModelPrecisionMetrics);
        }
        public void ParseAValidCatalogFileTest()
        {
            const int    catalogItemsCount = 20;
            const string baseFolder        = nameof(ParseAValidCatalogFileTest);

            Directory.CreateDirectory(baseFolder);

            var    generator   = new ModelTrainingFilesGenerator(itemsCount: catalogItemsCount);
            string catalogFile = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFile);

            IList <SarCatalogItem> catalogItems;

            string[]          featureNames;
            var               parser = new CatalogFileParser();
            FileParsingReport report =
                parser.ParseCatalogFile(catalogFile, CancellationToken.None, out catalogItems, out featureNames);

            Assert.IsNotNull(report);
            Assert.IsTrue(report.IsCompletedSuccessfuly);
            Assert.AreEqual(catalogItemsCount, report.SuccessfulLinesCount);
            Assert.AreEqual(catalogItemsCount, report.TotalLinesCount);
            Assert.IsTrue(report.Errors == null || !report.Errors.Any());
        }
        public void GetRecommendationsUsingUserId()
        {
            const string baseFolder = nameof(GetRecommendationsUsingUserId);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(8);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000);

            var trainingParameters = ModelTrainingParameters.Default;

            trainingParameters.EnableBackfilling = false;
            trainingParameters.EnableUserToItemRecommendations = true;
            trainingParameters.AllowSeedItemsInRecommendations = true;

            Dictionary <string, Document> userHistory = null;
            IDocumentStore documentStore = Substitute.For <IDocumentStore>();

            documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(),
                                            Arg.Any <CancellationToken>())
            .Returns(info =>
            {
                userHistory = info.Arg <IEnumerable <Document> >().ToDictionary(doc => doc.Id);
                return(Task.FromResult(userHistory.Count));
            });

            documentStore.GetDocument(Arg.Any <string>(), Arg.Any <string>())
            .Returns(info => userHistory?[info.ArgAt <string>(1)]);

            var trainer             = new ModelTrainer(documentStore: documentStore);
            ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            var recommender = new Recommender(result.Model, documentStore);
            var items       = new List <UsageEvent>
            {
                new UsageEvent
                {
                    ItemId    = warmItems.First(),
                    EventType = UsageEventType.Click,
                    Timestamp = DateTime.UtcNow
                }
            };

            string userId = generator.Users.FirstOrDefault();
            IList <Recommendation> recommendations = recommender.GetRecommendations(items, userId, 3);

            // expect the document store to be called once with the provided user id
            documentStore.Received(1).GetDocument(Arg.Any <string>(), userId);

            Assert.IsNotNull(recommendations);
            Assert.IsTrue(recommendations.Any());
            Assert.IsTrue(recommendations.All(r => r != null));
            Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId)));
        }
Exemplo n.º 5
0
        public void TrainModelWithRangeOfPossibleParametersTest()
        {
            const string baseFolder = nameof(TrainModelWithRangeOfPossibleParametersTest);

            Directory.CreateDirectory(baseFolder);

            var generator = new ModelTrainingFilesGenerator();

            // create catalog items
            IList <SarCatalogItem> catalogItems;

            string[] featureNames;
            string   catalogFilePath = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFilePath);
            var itemIdsIndex  = new ConcurrentDictionary <string, uint>();
            var catalogParser = new CatalogFileParser(0, itemIdsIndex);
            var parsingReport = catalogParser.ParseCatalogFile(catalogFilePath, CancellationToken.None, out catalogItems, out featureNames);

            Assert.IsTrue(parsingReport.IsCompletedSuccessfuly);

            // create usage items
            IList <SarUsageEvent> usageEvents;
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 10000);
            var userIdsIndex     = new ConcurrentDictionary <string, uint>();
            var usageFilesParser = new UsageEventsFilesParser(itemIdsIndex, userIdsIndex);

            parsingReport = usageFilesParser.ParseUsageEventFiles(usageFileFolderPath, CancellationToken.None, out usageEvents);
            Assert.IsTrue(parsingReport.IsCompletedSuccessfuly);

            int count      = 0;
            var sarTrainer = new SarTrainer();
            IDictionary <string, double> catalogFeatureWeights;

            foreach (IModelTrainerSettings settings in GetAllModelTrainingParameters())
            {
                IPredictorModel model = sarTrainer.Train(settings, usageEvents, catalogItems, featureNames, userIdsIndex.Count, itemIdsIndex.Count, out catalogFeatureWeights);
                Assert.IsNotNull(model, $"Expected training to complete successfully when using settings#{count}: {settings}");
                count++;
            }
        }
Exemplo n.º 6
0
        public void ParseTwoValidUsageFilesTest()
        {
            const string baseFolder = nameof(ParseTwoValidUsageFilesTest);

            Directory.CreateDirectory(baseFolder);

            var generator = new ModelTrainingFilesGenerator();

            generator.CreateUsageFile(Path.Combine(baseFolder, "usage1.csv"), 100);
            generator.CreateUsageFile(Path.Combine(baseFolder, "usage2.csv"), 50);

            IList <SarUsageEvent> usageEvents;
            var parser = new UsageEventsFilesParser();
            FileParsingReport report = parser.ParseUsageEventFiles(baseFolder, CancellationToken.None, out usageEvents);

            Assert.IsNotNull(report);
            Assert.IsTrue(report.IsCompletedSuccessfuly);
            Assert.AreEqual(150, report.SuccessfulLinesCount);
            Assert.AreEqual(150, report.TotalLinesCount);
            Assert.IsTrue(report.Errors == null || !report.Errors.Any());
        }
Exemplo n.º 7
0
        public void GetRecommendationsUsingSmallModelWithDefaultParameters()
        {
            const string baseFolder = nameof(GetRecommendationsUsingSmallModelWithDefaultParameters);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(8);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000);

            var trainingParameters = ModelTrainingParameters.Default;

            trainingParameters.EnableBackfilling = false;
            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            var recommender = new Recommender(result.Model);
            var items       = new List <UsageEvent>
            {
                new UsageEvent
                {
                    ItemId    = warmItems.First(),
                    EventType = UsageEventType.Click,
                    Timestamp = DateTime.UtcNow
                }
            };

            IList <Recommendation> recommendations = recommender.GetRecommendations(items, null, 3);

            Assert.IsNotNull(recommendations);
            Assert.IsTrue(recommendations.Any());
            Assert.IsTrue(recommendations.All(r => r != null));
            Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId)));
        }
Exemplo n.º 8
0
        public void TrainSmallModelEnablingColdItemPlacementTest()
        {
            const string baseFolder = nameof(TrainSmallModelEnablingColdItemPlacementTest);

            Directory.CreateDirectory(baseFolder);

            var    generator       = new ModelTrainingFilesGenerator();
            string catalogFilePath = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFilePath);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 30000);

            var parameters = ModelTrainingParameters.Default;

            parameters.EnableColdItemPlacement         = true;
            parameters.EnableColdToColdRecommendations = true;

            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null,
                                                         CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            Assert.IsNotNull(result.CatalogFilesParsingReport);
            Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }
Exemplo n.º 9
0
        public void TrainSmallModelEnablingUserToItemRecommendationsTest()
        {
            const string baseFolder = nameof(TrainSmallModelEnablingUserToItemRecommendationsTest);

            Directory.CreateDirectory(baseFolder);

            int    usersCount       = 50;
            int    usageEventsCount = 30000;
            var    generator        = new ModelTrainingFilesGenerator(usersCount);
            string catalogFilePath  = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFilePath);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), usageEventsCount);

            var parameters = ModelTrainingParameters.Default;

            parameters.EnableUserToItemRecommendations = true;

            int            itemsCount    = 0;
            var            users         = new HashSet <string>();
            IDocumentStore documentStore = Substitute.For <IDocumentStore>();

            documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(),
                                            Arg.Any <CancellationToken>())
            .Returns(info =>
            {
                var docs = info.Arg <IEnumerable <Document> >().ToList();
                foreach (Document document in docs)
                {
                    users.Add(document.Id);
                    itemsCount += document.Content?.Split(',').Length ?? 0;
                }

                return(Task.FromResult(docs.Count));
            });

            var trainer             = new ModelTrainer(documentStore: documentStore);
            ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null,
                                                         CancellationToken.None);

            // expect only one call to document store
            documentStore.ReceivedWithAnyArgs(1);

            // make sure that all the users got their history stored
            Assert.AreEqual(usersCount, users.Count);

            // make sure the amount of stored history doesn't exceeds 100 items per user
            Assert.IsTrue(itemsCount <= usersCount * 100);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            Assert.IsNotNull(result.CatalogFilesParsingReport);
            Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }