/// <summary> /// Starts model training /// </summary> /// <param name="modelId"></param> /// <param name="parameters"></param> /// <param name="cancellationToken"></param> /// <returns></returns> private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters, CancellationToken cancellationToken) { // create an auto reset event to ensure that model status updates happen one at a time var ongoingUpdateEvent = new AutoResetEvent(true); // create a progress messages event handler for updating the model status message in the registry Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler( progressMessage, modelId, ongoingUpdateEvent, cancellationToken); Trace.TraceInformation($"Starting model '{modelId}' training"); ModelTrainResult result = await _modelsProvider.TrainAsync( modelId, parameters, progressMessageHandler, cancellationToken); // get the model status ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed; Trace.TraceInformation($"Model training completed with status '{newModelStatus}'"); Trace.TraceInformation("Extracting model statistics from the model training result"); ModelStatistics modelStatistics = CreateModelStatistics(result, parameters); Trace.TraceInformation("Wait for any ongoing model status message updates before updating the final status"); ongoingUpdateEvent.WaitOne(); Trace.TraceInformation("Update the model status and statistics to the registry"); await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken, newModelStatus, result.CompletionMessage, modelStatistics); return(result.IsCompletedSuccessfuly); }
/// <summary> /// Starts model training /// </summary> /// <param name="modelId"></param> /// <param name="parameters"></param> /// <param name="cancellationToken"></param> /// <returns></returns> private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters, CancellationToken cancellationToken) { // create an auto reset event to ensure that model status updates happen one at a time var ongoingUpdateEvent = new AutoResetEvent(true); // create a progress messages event handler for updating the model status message in the registry Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler( progressMessage, modelId, ongoingUpdateEvent, cancellationToken); Trace.TraceInformation($"Iniciando entrenamiento del modelo '{modelId}'."); ModelTrainResult result = await _modelsProvider.TrainAsync( modelId, parameters, progressMessageHandler, cancellationToken); // get the model status ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed; Trace.TraceInformation($"Entrenamiento del Modelo completado con estado '{newModelStatus}'"); Trace.TraceInformation("Extrayendo estadísticas de los resultados de entrenamiento del modelo"); ModelStatistics modelStatistics = CreateModelStatistics(result, parameters); Trace.TraceInformation("Espere a que se actualice el mensaje de estado del modelo en curso antes de actualizar el estado final"); ongoingUpdateEvent.WaitOne(); Trace.TraceInformation("Actualice el estado del modelo y estadisticas al registro"); await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken, newModelStatus, result.CompletionMessage, modelStatistics); return(result.IsCompletedSuccessfuly); }
public async Task <IHttpActionResult> TrainNewModel(CancellationToken cancellationToken, [FromBody] ModelParameters modelParameters) { // validate input if (modelParameters == null) { var message = $"Invalid format. Expected a valid '{nameof(ModelParameters)}' JSON"; Trace.TraceVerbose(message); return(BadRequest(message)); } if (!ModelState.IsValid) { return(BadRequest(ModelState)); } ModelsRegistry modelsRegistry = WebAppContext.ModelsRegistry; Trace.TraceVerbose("Converting the model parameters to trainer settings, using default values where needed"); var @default = ModelTrainingParameters.Default; var settings = new ModelTrainingParameters { BlobContainerName = modelParameters.BlobContainerName, CatalogFileRelativeLocation = modelParameters.CatalogFileRelativeLocation?.Replace('\\', '/'), UsageFolderRelativeLocation = modelParameters.UsageFolderRelativeLocation?.Replace('\\', '/'), EvaluationUsageFolderRelativeLocation = modelParameters.EvaluationUsageFolderRelativeLocation?.Replace('\\', '/'), SupportThreshold = modelParameters.SupportThreshold ?? @default.SupportThreshold, CooccurrenceUnit = modelParameters.CooccurrenceUnit ?? @default.CooccurrenceUnit, SimilarityFunction = modelParameters.SimilarityFunction ?? @default.SimilarityFunction, EnableColdItemPlacement = modelParameters.EnableColdItemPlacement ?? @default.EnableColdItemPlacement, EnableColdToColdRecommendations = modelParameters.EnableColdToColdRecommendations ?? @default.EnableColdToColdRecommendations, EnableUserAffinity = modelParameters.EnableUserAffinity ?? @default.EnableUserAffinity, EnableUserToItemRecommendations = modelParameters.EnableUserToItemRecommendations ?? @default.EnableUserToItemRecommendations, AllowSeedItemsInRecommendations = modelParameters.AllowSeedItemsInRecommendations ?? @default.AllowSeedItemsInRecommendations, EnableBackfilling = modelParameters.EnableBackfilling ?? @default.EnableBackfilling, DecayPeriodInDays = modelParameters.DecayPeriodInDays ?? @default.DecayPeriodInDays }; Trace.TraceInformation("Creating new model in registry"); Model model = await modelsRegistry.CreateModelAsync(settings, modelParameters.Description, cancellationToken); Trace.TraceInformation($"Queueing a new train model message to the queue for model id {model.Id}"); ModelQueueMessage modelQueueMessage = new ModelQueueMessage { ModelId = model.Id }; await WebAppContext.TrainModelQueue.AddMessageAsync(modelQueueMessage, cancellationToken); // return the URL to the created model return(CreatedAtRoute(nameof(GetModel), new { modelId = model.Id }, model)); }
/// <summary> /// Creates a new instance of the <see cref="ModelTableEntity"/> class /// </summary> /// <param name="modelId">The model id</param> /// <param name="modelParameters">The training parameters of the model</param> /// <param name="modelStatistics">The model training statistics</param> public ModelTableEntity(Guid modelId, ModelTrainingParameters modelParameters = null, ModelStatistics modelStatistics = null) : this() { RowKey = modelId.ToString(); if (modelParameters != null) { ModelParameters = JsonConvert.SerializeObject(modelParameters); } if (modelStatistics != null) { ModelStatistics = JsonConvert.SerializeObject(modelStatistics); } }
/// <summary> /// Trains a model using the local training files /// </summary> private ModelTrainResult TrainModel(ModelTrainer modelTrainer, Guid modelId, ModelTrainingParameters trainingParameters, TrainingLocalFilePaths localFilePaths, CancellationToken cancellationToken) { try { Trace.TraceInformation($"Training model '{modelId}' using the local training files as input"); return(modelTrainer.TrainModel(trainingParameters, localFilePaths.UsageFolderPath, localFilePaths.CatalogFilePath, localFilePaths.EvaluationUsageFolderPath, cancellationToken)); } catch (Exception ex) { var exception = new Exception($"Exception while trying to train model with id: {modelId}", ex); Trace.TraceError(exception.ToString()); throw exception; } }
/// <summary> /// Creates a new model /// </summary> /// <param name="modelParameters">The new model parameters</param> /// <param name="description">The new model description</param> /// <param name="cancellationToken">The cancellation token assigned for the operation.</param> /// <returns>The newly created <see cref="Model"/> or <value>null</value> if failed to create</returns> public async Task <Model> CreateModelAsync(ModelTrainingParameters modelParameters, string description, CancellationToken cancellationToken) { ThrowIfDisposed(); if (modelParameters == null) { throw new ArgumentNullException(nameof(modelParameters)); } // allocate a new model id var modelId = Guid.NewGuid(); // create a new model entity var newModelEntity = new ModelTableEntity(modelId, modelParameters) { Description = description, ModelStatus = ModelStatus.Created.ToString(), CreationTime = DateTime.UtcNow }; try { Trace.TraceInformation($"Creating a new model ({modelId}) in the table"); if (!await _modelsTable.InsertEntityAsync(newModelEntity, cancellationToken)) { Trace.TraceError($"Failed to create table entry for model {modelId}"); return(null); } } catch (StorageException storageException) { var exception = new Exception( $"Exception while trying to create a new model entity ({modelId}) in the table", storageException); Trace.TraceError(exception.ToString()); throw exception; } // convert the entity to model Model newModel = ConvertEntityToModel(newModelEntity); return(newModel); }
/// <summary> /// Create model statistics out of the model training result /// </summary> private static ModelStatistics CreateModelStatistics(ModelTrainResult result, ModelTrainingParameters parameters) { var statistics = new ModelStatistics { // set the total duration TotalDuration = result.Duration.TotalDuration, // set the core training duration TrainingDuration = result.Duration.TrainingDuration, // set the storing user history duration StoringUserHistoryDuration = result.Duration.StoringUserHistoryDuration, // create the catalog parsing report CatalogParsingReport = CreateParsingReport(result.CatalogFilesParsingReport, result.Duration.CatalogParsingDuration, string.IsNullOrWhiteSpace(parameters.CatalogFileRelativePath) ? null : Path.GetDirectoryName(parameters.CatalogFileRelativePath)), // create the usage files parsing report UsageEventsParsingReport = CreateParsingReport(result.UsageFilesParsingReport, result.Duration.UsageFilesParsingDuration, parameters.UsageRelativePath), // set the number of items in catalog NumberOfCatalogItems = result.CatalogItemsCount, // set the number of valid items in usage files NumberOfUsageItems = result.UniqueItemsCount, // set the number of unique users in usage files NumberOfUsers = result.UniqueUsersCount, // set the catalog coverage when applicable CatalogCoverage = result.CatalogItemsCount != null && result.CatalogItemsCount != 0 ? (double)result.UniqueItemsCount / result.CatalogItemsCount : null, // set the catalog features weights, if calculated CatalogFeatureWeights = result.CatalogFeatureWeights?.Count > 0 ? result.CatalogFeatureWeights : null }; // set the evaluation statistics if available if (!string.IsNullOrWhiteSpace(parameters.EvaluationUsageRelativePath)) { // create evaluation result statistics.EvaluationResult = new ModelEvaluationResult { // set the evaluation duration Duration = result.Duration.EvaluationDuration, // set the evaluation result Metrics = result.ModelMetrics, // create the evaluation usage files parsing report EvaluationUsageEventsParsingReport = CreateParsingReport(result.EvaluationFilesParsingReport, result.Duration.EvaluationUsageFilesParsingDuration, parameters.EvaluationUsageRelativePath) }; } return(statistics); }
/// <summary> /// Trains new model /// </summary> /// <param name="modelId">Model ID of the model to create</param> /// <param name="trainingParameters">Parameters of the new model to train</param> /// <param name="progressMessageReportDelegate">A delegate for handling progress messages</param> /// <param name="cancellationToken">A cancellation token used to abort the operation</param> /// <returns>The model training result</returns> public async Task <ModelTrainResult> TrainAsync(Guid modelId, ModelTrainingParameters trainingParameters, Action <string> progressMessageReportDelegate, CancellationToken cancellationToken) { Trace.TraceVerbose($"Model training started for model with id '{modelId}'."); progressMessageReportDelegate = progressMessageReportDelegate ?? (_ => { }); // create a temporary local folder for the model training files string trainingTempPath = Path.Combine(_trainedModelsLocalRootPath, Path.GetFileNameWithoutExtension(Path.GetRandomFileName())); Directory.CreateDirectory(trainingTempPath); IDocumentStore modelDocumentStore = null; try { // report progress progressMessageReportDelegate("Downloading Training blobs"); // download the training files TrainingLocalFilePaths localFilePaths = await DownloadTrainingBlobsAsync(modelId, trainingTempPath, trainingParameters, cancellationToken); // check if the operation was cancelled cancellationToken.ThrowIfCancellationRequested(); // create user history store if user-to-item is enabled if (trainingParameters.EnableUserToItemRecommendations) { Trace.TraceInformation($"Creating user history document store for model '{modelId}'"); modelDocumentStore = _documentStoreProvider.GetDocumentStore(modelId); modelDocumentStore.CreateIfNotExists(); } // create a model trainer var modelTrainer = new ModelTrainer(new Tracer(nameof(ModelTrainer)), modelDocumentStore, progressMessageReportDelegate); // train the model ModelTrainResult result = TrainModel(modelTrainer, modelId, trainingParameters, localFilePaths, cancellationToken); if (!result.IsCompletedSuccessfuly) { Trace.TraceWarning($"Model training failed for model with id '{modelId}'."); return(result); } // serialize and upload the trained model using (Stream modelStream = new MemoryStream()) { Trace.TraceInformation("Serializing the trained model to a stream"); SerializeTrainedModel(result.Model, modelStream, modelId); // rewind the stream before reading modelStream.Seek(0, SeekOrigin.Begin); // upload the serialized model to blob storage await UploadTrainedModelAsync(modelStream, modelId, cancellationToken); } // return the result Trace.TraceInformation($"Model training completed for model with id '{modelId}'. Result: {result}"); return(result); } finally { Trace.TraceInformation($"Deleting the training temporary local folder '{trainingTempPath}'."); Directory.Delete(trainingTempPath, true); } }
/// <summary> /// Downloads the catalog, usage event files and evaluation usage files to local disk. /// </summary> private async Task <TrainingLocalFilePaths> DownloadTrainingBlobsAsync(Guid modelId, string localRootPath, ModelTrainingParameters trainingParameters, CancellationToken cancellationToken) { try { var trainingFiles = new TrainingLocalFilePaths { // set local usage directory name UsageFolderPath = Path.Combine(localRootPath, UsageDirectoryName) }; // create the local folder for usage events files Directory.CreateDirectory(trainingFiles.UsageFolderPath); // get the root blob container of the catalog and usage files IBlobContainer trainingBlobsContainer = _blobContainerProvider.GetBlobContainer(trainingParameters.BlobContainerName); // check if the provided path represents a single file if (await trainingBlobsContainer.ExistsAsync(trainingParameters.UsageRelativePath, cancellationToken)) { string usageEventsBlobName = trainingParameters.UsageRelativePath; // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } else { Trace.TraceInformation( $"Listing all the usage events blobs under '{trainingParameters.UsageRelativePath}'"); IList <string> usageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync( trainingParameters.UsageRelativePath, cancellationToken); Trace.TraceInformation( $"Downloading all the usage events blobs (Found {usageEventsBlobNames.Count})"); foreach (string usageEventsBlobName in usageEventsBlobNames) { // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } } // download the catalog file, if provided if (!string.IsNullOrWhiteSpace(trainingParameters.CatalogFileRelativePath)) { // set local catalog file path var catalogFileName = Path.GetFileName(trainingParameters.CatalogFileRelativePath); trainingFiles.CatalogFilePath = Path.Combine(localRootPath, catalogFileName); Trace.TraceInformation($"Downloading catalog blob '{trainingFiles.CatalogFilePath}'"); await trainingBlobsContainer.DownloadBlobAsync(trainingParameters.CatalogFileRelativePath, trainingFiles.CatalogFilePath, cancellationToken); } // download the evaluation files if provided if (!string.IsNullOrWhiteSpace(trainingParameters.EvaluationUsageRelativePath)) { // set local evaluation folder trainingFiles.EvaluationUsageFolderPath = Path.Combine(localRootPath, EvaluationUsageLocalDirectoryName); // create the local folder for evaluation usage events files Directory.CreateDirectory(trainingFiles.EvaluationUsageFolderPath); // check if the provided path represents a single file if (await trainingBlobsContainer.ExistsAsync(trainingParameters.EvaluationUsageRelativePath, cancellationToken)) { string usageEventsBlobName = trainingParameters.EvaluationUsageRelativePath; // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } else { Trace.TraceInformation( $"Listing all the evaluation usage events blobs under '{trainingParameters.EvaluationUsageRelativePath}'"); IList <string> evaluationUsageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync( trainingParameters.EvaluationUsageRelativePath, cancellationToken); Trace.TraceInformation( $"Downloading all the evaluation usage events blobs (Found {evaluationUsageEventsBlobNames.Count})"); foreach (string usageEventsBlobName in evaluationUsageEventsBlobNames) { // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } } } return(trainingFiles); } catch (StorageException storageException) { var exception = new Exception($"Failed downloading training files from storage. Model id: {modelId}", storageException); Trace.TraceError(exception.ToString()); throw exception; } }