Example #1
0
        public void TrainSmallModelUsingDefaultParametersTest()
        {
            const string baseFolder = nameof(TrainSmallModelUsingDefaultParametersTest);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator();
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 100);

            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(ModelTrainingParameters.Default, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);
            Assert.IsNull(result.CatalogFilesParsingReport);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }
Example #2
0
        /// <summary>
        /// Starts model training
        /// </summary>
        /// <param name="modelId"></param>
        /// <param name="parameters"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters,
                                                  CancellationToken cancellationToken)
        {
            // create an auto reset event to ensure that model status updates happen one at a time
            var ongoingUpdateEvent = new AutoResetEvent(true);

            // create a progress messages event handler for updating the model status message in the registry
            Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler(
                progressMessage, modelId, ongoingUpdateEvent, cancellationToken);

            Trace.TraceInformation($"Starting model '{modelId}' training");
            ModelTrainResult result = await _modelsProvider.TrainAsync(
                modelId, parameters, progressMessageHandler, cancellationToken);

            // get the model status
            ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed;

            Trace.TraceInformation($"Model training completed with status '{newModelStatus}'");

            Trace.TraceInformation("Extracting model statistics from the model training result");
            ModelStatistics modelStatistics = CreateModelStatistics(result, parameters);

            Trace.TraceInformation("Wait for any ongoing model status message updates before updating the final status");
            ongoingUpdateEvent.WaitOne();

            Trace.TraceInformation("Update the model status and statistics to the registry");
            await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken,
                                                   newModelStatus, result.CompletionMessage, modelStatistics);

            return(result.IsCompletedSuccessfuly);
        }
Example #3
0
        public void ModelEvaluationTest()
        {
            const string baseFolder = nameof(ModelEvaluationTest);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(20, 50);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            string evaluationFileFolderPath = Path.Combine(baseFolder, "evaluation");

            Directory.CreateDirectory(evaluationFileFolderPath);
            generator.CreateEvaluationFiles(Path.Combine(usageFileFolderPath, "usage.csv"), Path.Combine(evaluationFileFolderPath, "evaluationUsage.csv"), 500, 30);

            var trainer = new ModelTrainer();

            var modelTrainingParameters = ModelTrainingParameters.Default;
            ModelTrainResult result     = trainer.TrainModel(modelTrainingParameters, usageFileFolderPath, null, evaluationFileFolderPath, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);
            Assert.IsNull(result.CatalogFilesParsingReport);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsFalse(result.EvaluationFilesParsingReport.HasErrors);
            Assert.IsNotNull(result.ModelMetrics);
            Assert.IsNotNull(result.ModelMetrics.ModelDiversityMetrics);
            Assert.IsNotNull(result.ModelMetrics.ModelPrecisionMetrics);
        }
        public void GetRecommendationsUsingUserId()
        {
            const string baseFolder = nameof(GetRecommendationsUsingUserId);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(8);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000);

            var trainingParameters = ModelTrainingParameters.Default;

            trainingParameters.EnableBackfilling = false;
            trainingParameters.EnableUserToItemRecommendations = true;
            trainingParameters.AllowSeedItemsInRecommendations = true;

            Dictionary <string, Document> userHistory = null;
            IDocumentStore documentStore = Substitute.For <IDocumentStore>();

            documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(),
                                            Arg.Any <CancellationToken>())
            .Returns(info =>
            {
                userHistory = info.Arg <IEnumerable <Document> >().ToDictionary(doc => doc.Id);
                return(Task.FromResult(userHistory.Count));
            });

            documentStore.GetDocument(Arg.Any <string>(), Arg.Any <string>())
            .Returns(info => userHistory?[info.ArgAt <string>(1)]);

            var trainer             = new ModelTrainer(documentStore: documentStore);
            ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            var recommender = new Recommender(result.Model, documentStore);
            var items       = new List <UsageEvent>
            {
                new UsageEvent
                {
                    ItemId    = warmItems.First(),
                    EventType = UsageEventType.Click,
                    Timestamp = DateTime.UtcNow
                }
            };

            string userId = generator.Users.FirstOrDefault();
            IList <Recommendation> recommendations = recommender.GetRecommendations(items, userId, 3);

            // expect the document store to be called once with the provided user id
            documentStore.Received(1).GetDocument(Arg.Any <string>(), userId);

            Assert.IsNotNull(recommendations);
            Assert.IsTrue(recommendations.Any());
            Assert.IsTrue(recommendations.All(r => r != null));
            Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId)));
        }
Example #5
0
        /// <summary>
        /// Starts model training
        /// </summary>
        /// <param name="modelId"></param>
        /// <param name="parameters"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters,
                                                  CancellationToken cancellationToken)
        {
            // create an auto reset event to ensure that model status updates happen one at a time
            var ongoingUpdateEvent = new AutoResetEvent(true);

            // create a progress messages event handler for updating the model status message in the registry
            Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler(
                progressMessage, modelId, ongoingUpdateEvent, cancellationToken);

            Trace.TraceInformation($"Iniciando entrenamiento del modelo '{modelId}'.");
            ModelTrainResult result = await _modelsProvider.TrainAsync(
                modelId, parameters, progressMessageHandler, cancellationToken);

            // get the model status
            ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed;

            Trace.TraceInformation($"Entrenamiento del Modelo completado con estado '{newModelStatus}'");

            Trace.TraceInformation("Extrayendo estadísticas de los resultados de entrenamiento del modelo");
            ModelStatistics modelStatistics = CreateModelStatistics(result, parameters);

            Trace.TraceInformation("Espere a que se actualice el mensaje de estado del modelo en curso antes de actualizar el estado final");
            ongoingUpdateEvent.WaitOne();

            Trace.TraceInformation("Actualice el estado del modelo y estadisticas al registro");
            await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken,
                                                   newModelStatus, result.CompletionMessage, modelStatistics);

            return(result.IsCompletedSuccessfuly);
        }
Example #6
0
        public void GetRecommendationsUsingSmallModelWithDefaultParameters()
        {
            const string baseFolder = nameof(GetRecommendationsUsingSmallModelWithDefaultParameters);

            Directory.CreateDirectory(baseFolder);

            var    generator           = new ModelTrainingFilesGenerator(8);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000);

            var trainingParameters = ModelTrainingParameters.Default;

            trainingParameters.EnableBackfilling = false;
            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            var recommender = new Recommender(result.Model);
            var items       = new List <UsageEvent>
            {
                new UsageEvent
                {
                    ItemId    = warmItems.First(),
                    EventType = UsageEventType.Click,
                    Timestamp = DateTime.UtcNow
                }
            };

            IList <Recommendation> recommendations = recommender.GetRecommendations(items, null, 3);

            Assert.IsNotNull(recommendations);
            Assert.IsTrue(recommendations.Any());
            Assert.IsTrue(recommendations.All(r => r != null));
            Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId)));
        }
Example #7
0
        public void TrainSmallModelEnablingColdItemPlacementTest()
        {
            const string baseFolder = nameof(TrainSmallModelEnablingColdItemPlacementTest);

            Directory.CreateDirectory(baseFolder);

            var    generator       = new ModelTrainingFilesGenerator();
            string catalogFilePath = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFilePath);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 30000);

            var parameters = ModelTrainingParameters.Default;

            parameters.EnableColdItemPlacement         = true;
            parameters.EnableColdToColdRecommendations = true;

            var trainer             = new ModelTrainer();
            ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null,
                                                         CancellationToken.None);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            Assert.IsNotNull(result.CatalogFilesParsingReport);
            Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }
Example #8
0
        /// <summary>
        /// Create model statistics out of the model training result
        /// </summary>
        private static ModelStatistics CreateModelStatistics(ModelTrainResult result, ModelTrainingParameters parameters)
        {
            var statistics = new ModelStatistics
            {
                // set the total duration
                TotalDuration = result.Duration.TotalDuration,

                // set the core training duration
                TrainingDuration = result.Duration.TrainingDuration,

                // set the storing user history duration
                StoringUserHistoryDuration = result.Duration.StoringUserHistoryDuration,

                // create the catalog parsing report
                CatalogParsingReport = CreateParsingReport(result.CatalogFilesParsingReport,
                                                           result.Duration.CatalogParsingDuration,
                                                           string.IsNullOrWhiteSpace(parameters.CatalogFileRelativePath)
                        ? null
                        : Path.GetDirectoryName(parameters.CatalogFileRelativePath)),

                // create the usage files parsing report
                UsageEventsParsingReport = CreateParsingReport(result.UsageFilesParsingReport,
                                                               result.Duration.UsageFilesParsingDuration,
                                                               parameters.UsageRelativePath),

                // set the number of items in catalog
                NumberOfCatalogItems = result.CatalogItemsCount,

                // set the number of valid items in usage files
                NumberOfUsageItems = result.UniqueItemsCount,

                // set the number of unique users in usage files
                NumberOfUsers = result.UniqueUsersCount,

                // set the catalog coverage when applicable
                CatalogCoverage =
                    result.CatalogItemsCount != null &&
                    result.CatalogItemsCount != 0
                        ? (double)result.UniqueItemsCount / result.CatalogItemsCount
                        : null,

                // set the catalog features weights, if calculated
                CatalogFeatureWeights = result.CatalogFeatureWeights?.Count > 0 ? result.CatalogFeatureWeights : null
            };

            // set the evaluation statistics if available
            if (!string.IsNullOrWhiteSpace(parameters.EvaluationUsageRelativePath))
            {
                // create evaluation result
                statistics.EvaluationResult = new ModelEvaluationResult
                {
                    // set the evaluation duration
                    Duration = result.Duration.EvaluationDuration,

                    // set the evaluation result
                    Metrics = result.ModelMetrics,

                    // create the evaluation usage files parsing report
                    EvaluationUsageEventsParsingReport =
                        CreateParsingReport(result.EvaluationFilesParsingReport,
                                            result.Duration.EvaluationUsageFilesParsingDuration,
                                            parameters.EvaluationUsageRelativePath)
                };
            }

            return(statistics);
        }
Example #9
0
        /// <summary>
        /// Trains new model
        /// </summary>
        /// <param name="modelId">Model ID of the model to create</param>
        /// <param name="trainingParameters">Parameters of the new model to train</param>
        /// <param name="progressMessageReportDelegate">A delegate for handling progress messages</param>
        /// <param name="cancellationToken">A cancellation token used to abort the operation</param>
        /// <returns>The model training result</returns>
        public async Task <ModelTrainResult> TrainAsync(Guid modelId, ModelTrainingParameters trainingParameters,
                                                        Action <string> progressMessageReportDelegate, CancellationToken cancellationToken)
        {
            Trace.TraceVerbose($"Model training started for model with id '{modelId}'.");
            progressMessageReportDelegate = progressMessageReportDelegate ?? (_ => { });

            // create a temporary local folder for the model training files
            string trainingTempPath = Path.Combine(_trainedModelsLocalRootPath,
                                                   Path.GetFileNameWithoutExtension(Path.GetRandomFileName()));

            Directory.CreateDirectory(trainingTempPath);
            IDocumentStore modelDocumentStore = null;

            try
            {
                // report progress
                progressMessageReportDelegate("Downloading Training blobs");

                // download the training files
                TrainingLocalFilePaths localFilePaths =
                    await DownloadTrainingBlobsAsync(modelId, trainingTempPath, trainingParameters, cancellationToken);

                // check if the operation was cancelled
                cancellationToken.ThrowIfCancellationRequested();

                // create user history store if user-to-item is enabled
                if (trainingParameters.EnableUserToItemRecommendations)
                {
                    Trace.TraceInformation($"Creating user history document store for model '{modelId}'");
                    modelDocumentStore = _documentStoreProvider.GetDocumentStore(modelId);
                    modelDocumentStore.CreateIfNotExists();
                }

                // create a model trainer
                var modelTrainer = new ModelTrainer(new Tracer(nameof(ModelTrainer)), modelDocumentStore,
                                                    progressMessageReportDelegate);

                // train the model
                ModelTrainResult result = TrainModel(modelTrainer, modelId, trainingParameters,
                                                     localFilePaths, cancellationToken);
                if (!result.IsCompletedSuccessfuly)
                {
                    Trace.TraceWarning($"Model training failed for model with id '{modelId}'.");
                    return(result);
                }

                // serialize and upload the trained model
                using (Stream modelStream = new MemoryStream())
                {
                    Trace.TraceInformation("Serializing the trained model to a stream");
                    SerializeTrainedModel(result.Model, modelStream, modelId);

                    // rewind the stream before reading
                    modelStream.Seek(0, SeekOrigin.Begin);

                    // upload the serialized model to blob storage
                    await UploadTrainedModelAsync(modelStream, modelId, cancellationToken);
                }

                // return the result
                Trace.TraceInformation($"Model training completed for model with id '{modelId}'. Result: {result}");
                return(result);
            }
            finally
            {
                Trace.TraceInformation($"Deleting the training temporary local folder '{trainingTempPath}'.");
                Directory.Delete(trainingTempPath, true);
            }
        }
Example #10
0
        public void TrainSmallModelEnablingUserToItemRecommendationsTest()
        {
            const string baseFolder = nameof(TrainSmallModelEnablingUserToItemRecommendationsTest);

            Directory.CreateDirectory(baseFolder);

            int    usersCount       = 50;
            int    usageEventsCount = 30000;
            var    generator        = new ModelTrainingFilesGenerator(usersCount);
            string catalogFilePath  = Path.Combine(baseFolder, "catalog.csv");

            generator.CreateCatalogFile(catalogFilePath);
            string usageFileFolderPath = Path.Combine(baseFolder, "usage");

            Directory.CreateDirectory(usageFileFolderPath);
            generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), usageEventsCount);

            var parameters = ModelTrainingParameters.Default;

            parameters.EnableUserToItemRecommendations = true;

            int            itemsCount    = 0;
            var            users         = new HashSet <string>();
            IDocumentStore documentStore = Substitute.For <IDocumentStore>();

            documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(),
                                            Arg.Any <CancellationToken>())
            .Returns(info =>
            {
                var docs = info.Arg <IEnumerable <Document> >().ToList();
                foreach (Document document in docs)
                {
                    users.Add(document.Id);
                    itemsCount += document.Content?.Split(',').Length ?? 0;
                }

                return(Task.FromResult(docs.Count));
            });

            var trainer             = new ModelTrainer(documentStore: documentStore);
            ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null,
                                                         CancellationToken.None);

            // expect only one call to document store
            documentStore.ReceivedWithAnyArgs(1);

            // make sure that all the users got their history stored
            Assert.AreEqual(usersCount, users.Count);

            // make sure the amount of stored history doesn't exceeds 100 items per user
            Assert.IsTrue(itemsCount <= usersCount * 100);

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IsCompletedSuccessfuly);

            Assert.IsNotNull(result.CatalogFilesParsingReport);
            Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors);

            Assert.IsNotNull(result.UsageFilesParsingReport);
            Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly);
            Assert.IsFalse(result.UsageFilesParsingReport.HasErrors);
            Assert.IsNull(result.ModelMetrics);
            Assert.IsNull(result.EvaluationFilesParsingReport);
        }