public void TrainSmallModelUsingDefaultParametersTest() { const string baseFolder = nameof(TrainSmallModelUsingDefaultParametersTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 100); var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(ModelTrainingParameters.Default, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNull(result.CatalogFilesParsingReport); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }
/// <summary> /// Starts model training /// </summary> /// <param name="modelId"></param> /// <param name="parameters"></param> /// <param name="cancellationToken"></param> /// <returns></returns> private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters, CancellationToken cancellationToken) { // create an auto reset event to ensure that model status updates happen one at a time var ongoingUpdateEvent = new AutoResetEvent(true); // create a progress messages event handler for updating the model status message in the registry Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler( progressMessage, modelId, ongoingUpdateEvent, cancellationToken); Trace.TraceInformation($"Starting model '{modelId}' training"); ModelTrainResult result = await _modelsProvider.TrainAsync( modelId, parameters, progressMessageHandler, cancellationToken); // get the model status ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed; Trace.TraceInformation($"Model training completed with status '{newModelStatus}'"); Trace.TraceInformation("Extracting model statistics from the model training result"); ModelStatistics modelStatistics = CreateModelStatistics(result, parameters); Trace.TraceInformation("Wait for any ongoing model status message updates before updating the final status"); ongoingUpdateEvent.WaitOne(); Trace.TraceInformation("Update the model status and statistics to the registry"); await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken, newModelStatus, result.CompletionMessage, modelStatistics); return(result.IsCompletedSuccessfuly); }
public void ModelEvaluationTest() { const string baseFolder = nameof(ModelEvaluationTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(20, 50); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); string evaluationFileFolderPath = Path.Combine(baseFolder, "evaluation"); Directory.CreateDirectory(evaluationFileFolderPath); generator.CreateEvaluationFiles(Path.Combine(usageFileFolderPath, "usage.csv"), Path.Combine(evaluationFileFolderPath, "evaluationUsage.csv"), 500, 30); var trainer = new ModelTrainer(); var modelTrainingParameters = ModelTrainingParameters.Default; ModelTrainResult result = trainer.TrainModel(modelTrainingParameters, usageFileFolderPath, null, evaluationFileFolderPath, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNull(result.CatalogFilesParsingReport); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsFalse(result.EvaluationFilesParsingReport.HasErrors); Assert.IsNotNull(result.ModelMetrics); Assert.IsNotNull(result.ModelMetrics.ModelDiversityMetrics); Assert.IsNotNull(result.ModelMetrics.ModelPrecisionMetrics); }
public void GetRecommendationsUsingUserId() { const string baseFolder = nameof(GetRecommendationsUsingUserId); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(8); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000); var trainingParameters = ModelTrainingParameters.Default; trainingParameters.EnableBackfilling = false; trainingParameters.EnableUserToItemRecommendations = true; trainingParameters.AllowSeedItemsInRecommendations = true; Dictionary <string, Document> userHistory = null; IDocumentStore documentStore = Substitute.For <IDocumentStore>(); documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(), Arg.Any <CancellationToken>()) .Returns(info => { userHistory = info.Arg <IEnumerable <Document> >().ToDictionary(doc => doc.Id); return(Task.FromResult(userHistory.Count)); }); documentStore.GetDocument(Arg.Any <string>(), Arg.Any <string>()) .Returns(info => userHistory?[info.ArgAt <string>(1)]); var trainer = new ModelTrainer(documentStore: documentStore); ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); var recommender = new Recommender(result.Model, documentStore); var items = new List <UsageEvent> { new UsageEvent { ItemId = warmItems.First(), EventType = UsageEventType.Click, Timestamp = DateTime.UtcNow } }; string userId = generator.Users.FirstOrDefault(); IList <Recommendation> recommendations = recommender.GetRecommendations(items, userId, 3); // expect the document store to be called once with the provided user id documentStore.Received(1).GetDocument(Arg.Any <string>(), userId); Assert.IsNotNull(recommendations); Assert.IsTrue(recommendations.Any()); Assert.IsTrue(recommendations.All(r => r != null)); Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId))); }
/// <summary> /// Starts model training /// </summary> /// <param name="modelId"></param> /// <param name="parameters"></param> /// <param name="cancellationToken"></param> /// <returns></returns> private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters, CancellationToken cancellationToken) { // create an auto reset event to ensure that model status updates happen one at a time var ongoingUpdateEvent = new AutoResetEvent(true); // create a progress messages event handler for updating the model status message in the registry Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler( progressMessage, modelId, ongoingUpdateEvent, cancellationToken); Trace.TraceInformation($"Iniciando entrenamiento del modelo '{modelId}'."); ModelTrainResult result = await _modelsProvider.TrainAsync( modelId, parameters, progressMessageHandler, cancellationToken); // get the model status ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed; Trace.TraceInformation($"Entrenamiento del Modelo completado con estado '{newModelStatus}'"); Trace.TraceInformation("Extrayendo estadísticas de los resultados de entrenamiento del modelo"); ModelStatistics modelStatistics = CreateModelStatistics(result, parameters); Trace.TraceInformation("Espere a que se actualice el mensaje de estado del modelo en curso antes de actualizar el estado final"); ongoingUpdateEvent.WaitOne(); Trace.TraceInformation("Actualice el estado del modelo y estadisticas al registro"); await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken, newModelStatus, result.CompletionMessage, modelStatistics); return(result.IsCompletedSuccessfuly); }
public void GetRecommendationsUsingSmallModelWithDefaultParameters() { const string baseFolder = nameof(GetRecommendationsUsingSmallModelWithDefaultParameters); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(8); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); IList <string> warmItems = generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 1000); var trainingParameters = ModelTrainingParameters.Default; trainingParameters.EnableBackfilling = false; var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(trainingParameters, usageFileFolderPath, null, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); var recommender = new Recommender(result.Model); var items = new List <UsageEvent> { new UsageEvent { ItemId = warmItems.First(), EventType = UsageEventType.Click, Timestamp = DateTime.UtcNow } }; IList <Recommendation> recommendations = recommender.GetRecommendations(items, null, 3); Assert.IsNotNull(recommendations); Assert.IsTrue(recommendations.Any()); Assert.IsTrue(recommendations.All(r => r != null)); Assert.IsTrue(recommendations.All(r => r.Score > 0 && !string.IsNullOrWhiteSpace(r.RecommendedItemId))); }
public void TrainSmallModelEnablingColdItemPlacementTest() { const string baseFolder = nameof(TrainSmallModelEnablingColdItemPlacementTest); Directory.CreateDirectory(baseFolder); var generator = new ModelTrainingFilesGenerator(); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), 30000); var parameters = ModelTrainingParameters.Default; parameters.EnableColdItemPlacement = true; parameters.EnableColdToColdRecommendations = true; var trainer = new ModelTrainer(); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }
/// <summary> /// Create model statistics out of the model training result /// </summary> private static ModelStatistics CreateModelStatistics(ModelTrainResult result, ModelTrainingParameters parameters) { var statistics = new ModelStatistics { // set the total duration TotalDuration = result.Duration.TotalDuration, // set the core training duration TrainingDuration = result.Duration.TrainingDuration, // set the storing user history duration StoringUserHistoryDuration = result.Duration.StoringUserHistoryDuration, // create the catalog parsing report CatalogParsingReport = CreateParsingReport(result.CatalogFilesParsingReport, result.Duration.CatalogParsingDuration, string.IsNullOrWhiteSpace(parameters.CatalogFileRelativePath) ? null : Path.GetDirectoryName(parameters.CatalogFileRelativePath)), // create the usage files parsing report UsageEventsParsingReport = CreateParsingReport(result.UsageFilesParsingReport, result.Duration.UsageFilesParsingDuration, parameters.UsageRelativePath), // set the number of items in catalog NumberOfCatalogItems = result.CatalogItemsCount, // set the number of valid items in usage files NumberOfUsageItems = result.UniqueItemsCount, // set the number of unique users in usage files NumberOfUsers = result.UniqueUsersCount, // set the catalog coverage when applicable CatalogCoverage = result.CatalogItemsCount != null && result.CatalogItemsCount != 0 ? (double)result.UniqueItemsCount / result.CatalogItemsCount : null, // set the catalog features weights, if calculated CatalogFeatureWeights = result.CatalogFeatureWeights?.Count > 0 ? result.CatalogFeatureWeights : null }; // set the evaluation statistics if available if (!string.IsNullOrWhiteSpace(parameters.EvaluationUsageRelativePath)) { // create evaluation result statistics.EvaluationResult = new ModelEvaluationResult { // set the evaluation duration Duration = result.Duration.EvaluationDuration, // set the evaluation result Metrics = result.ModelMetrics, // create the evaluation usage files parsing report EvaluationUsageEventsParsingReport = CreateParsingReport(result.EvaluationFilesParsingReport, result.Duration.EvaluationUsageFilesParsingDuration, parameters.EvaluationUsageRelativePath) }; } return(statistics); }
/// <summary> /// Trains new model /// </summary> /// <param name="modelId">Model ID of the model to create</param> /// <param name="trainingParameters">Parameters of the new model to train</param> /// <param name="progressMessageReportDelegate">A delegate for handling progress messages</param> /// <param name="cancellationToken">A cancellation token used to abort the operation</param> /// <returns>The model training result</returns> public async Task <ModelTrainResult> TrainAsync(Guid modelId, ModelTrainingParameters trainingParameters, Action <string> progressMessageReportDelegate, CancellationToken cancellationToken) { Trace.TraceVerbose($"Model training started for model with id '{modelId}'."); progressMessageReportDelegate = progressMessageReportDelegate ?? (_ => { }); // create a temporary local folder for the model training files string trainingTempPath = Path.Combine(_trainedModelsLocalRootPath, Path.GetFileNameWithoutExtension(Path.GetRandomFileName())); Directory.CreateDirectory(trainingTempPath); IDocumentStore modelDocumentStore = null; try { // report progress progressMessageReportDelegate("Downloading Training blobs"); // download the training files TrainingLocalFilePaths localFilePaths = await DownloadTrainingBlobsAsync(modelId, trainingTempPath, trainingParameters, cancellationToken); // check if the operation was cancelled cancellationToken.ThrowIfCancellationRequested(); // create user history store if user-to-item is enabled if (trainingParameters.EnableUserToItemRecommendations) { Trace.TraceInformation($"Creating user history document store for model '{modelId}'"); modelDocumentStore = _documentStoreProvider.GetDocumentStore(modelId); modelDocumentStore.CreateIfNotExists(); } // create a model trainer var modelTrainer = new ModelTrainer(new Tracer(nameof(ModelTrainer)), modelDocumentStore, progressMessageReportDelegate); // train the model ModelTrainResult result = TrainModel(modelTrainer, modelId, trainingParameters, localFilePaths, cancellationToken); if (!result.IsCompletedSuccessfuly) { Trace.TraceWarning($"Model training failed for model with id '{modelId}'."); return(result); } // serialize and upload the trained model using (Stream modelStream = new MemoryStream()) { Trace.TraceInformation("Serializing the trained model to a stream"); SerializeTrainedModel(result.Model, modelStream, modelId); // rewind the stream before reading modelStream.Seek(0, SeekOrigin.Begin); // upload the serialized model to blob storage await UploadTrainedModelAsync(modelStream, modelId, cancellationToken); } // return the result Trace.TraceInformation($"Model training completed for model with id '{modelId}'. Result: {result}"); return(result); } finally { Trace.TraceInformation($"Deleting the training temporary local folder '{trainingTempPath}'."); Directory.Delete(trainingTempPath, true); } }
public void TrainSmallModelEnablingUserToItemRecommendationsTest() { const string baseFolder = nameof(TrainSmallModelEnablingUserToItemRecommendationsTest); Directory.CreateDirectory(baseFolder); int usersCount = 50; int usageEventsCount = 30000; var generator = new ModelTrainingFilesGenerator(usersCount); string catalogFilePath = Path.Combine(baseFolder, "catalog.csv"); generator.CreateCatalogFile(catalogFilePath); string usageFileFolderPath = Path.Combine(baseFolder, "usage"); Directory.CreateDirectory(usageFileFolderPath); generator.CreateUsageFile(Path.Combine(usageFileFolderPath, "usage.csv"), usageEventsCount); var parameters = ModelTrainingParameters.Default; parameters.EnableUserToItemRecommendations = true; int itemsCount = 0; var users = new HashSet <string>(); IDocumentStore documentStore = Substitute.For <IDocumentStore>(); documentStore.AddDocumentsAsync(Arg.Any <string>(), Arg.Any <IEnumerable <Document> >(), Arg.Any <CancellationToken>()) .Returns(info => { var docs = info.Arg <IEnumerable <Document> >().ToList(); foreach (Document document in docs) { users.Add(document.Id); itemsCount += document.Content?.Split(',').Length ?? 0; } return(Task.FromResult(docs.Count)); }); var trainer = new ModelTrainer(documentStore: documentStore); ModelTrainResult result = trainer.TrainModel(parameters, usageFileFolderPath, catalogFilePath, null, CancellationToken.None); // expect only one call to document store documentStore.ReceivedWithAnyArgs(1); // make sure that all the users got their history stored Assert.AreEqual(usersCount, users.Count); // make sure the amount of stored history doesn't exceeds 100 items per user Assert.IsTrue(itemsCount <= usersCount * 100); Assert.IsNotNull(result); Assert.IsTrue(result.IsCompletedSuccessfuly); Assert.IsNotNull(result.CatalogFilesParsingReport); Assert.IsTrue(result.CatalogFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.CatalogFilesParsingReport.HasErrors); Assert.IsNotNull(result.UsageFilesParsingReport); Assert.IsTrue(result.UsageFilesParsingReport.IsCompletedSuccessfuly); Assert.IsFalse(result.UsageFilesParsingReport.HasErrors); Assert.IsNull(result.ModelMetrics); Assert.IsNull(result.EvaluationFilesParsingReport); }