/// <summary> /// Handles messages from the delete model queue /// </summary> public static async Task ProcessDeleteModelQueueMessage( [QueueTrigger(AzureModelQueueFactory.DeleteModelQueueName)] ModelQueueMessage message, CancellationToken cancellationToken) { // set the model id to context Guid modelId = message.ModelId; ContextManager.ModelId = modelId; Trace.TraceInformation($"Start handling the deletion of model {modelId}"); try { Trace.TraceVerbose("Deleting model from model provider"); await ModelsProvider.Value.DeleteModelAsync(modelId, cancellationToken); } catch (Exception exception) { // throw the exception and rely on the Web job infrastructure for retries Trace.TraceWarning($"Failed deleting model using the model provider. Exception: '{exception}'"); throw; } Trace.TraceInformation($"Successfully completed handling of model '{modelId}' deletion message"); }
public async Task <IHttpActionResult> TrainNewModel(CancellationToken cancellationToken, [FromBody] ModelParameters modelParameters) { // validate input if (modelParameters == null) { var message = $"Invalid format. Expected a valid '{nameof(ModelParameters)}' JSON"; Trace.TraceVerbose(message); return(BadRequest(message)); } if (!ModelState.IsValid) { return(BadRequest(ModelState)); } ModelsRegistry modelsRegistry = WebAppContext.ModelsRegistry; Trace.TraceVerbose("Converting the model parameters to trainer settings, using default values where needed"); var @default = ModelTrainingParameters.Default; var settings = new ModelTrainingParameters { BlobContainerName = modelParameters.BlobContainerName, CatalogFileRelativeLocation = modelParameters.CatalogFileRelativeLocation?.Replace('\\', '/'), UsageFolderRelativeLocation = modelParameters.UsageFolderRelativeLocation?.Replace('\\', '/'), EvaluationUsageFolderRelativeLocation = modelParameters.EvaluationUsageFolderRelativeLocation?.Replace('\\', '/'), SupportThreshold = modelParameters.SupportThreshold ?? @default.SupportThreshold, CooccurrenceUnit = modelParameters.CooccurrenceUnit ?? @default.CooccurrenceUnit, SimilarityFunction = modelParameters.SimilarityFunction ?? @default.SimilarityFunction, EnableColdItemPlacement = modelParameters.EnableColdItemPlacement ?? @default.EnableColdItemPlacement, EnableColdToColdRecommendations = modelParameters.EnableColdToColdRecommendations ?? @default.EnableColdToColdRecommendations, EnableUserAffinity = modelParameters.EnableUserAffinity ?? @default.EnableUserAffinity, EnableUserToItemRecommendations = modelParameters.EnableUserToItemRecommendations ?? @default.EnableUserToItemRecommendations, AllowSeedItemsInRecommendations = modelParameters.AllowSeedItemsInRecommendations ?? @default.AllowSeedItemsInRecommendations, EnableBackfilling = modelParameters.EnableBackfilling ?? @default.EnableBackfilling, DecayPeriodInDays = modelParameters.DecayPeriodInDays ?? @default.DecayPeriodInDays }; Trace.TraceInformation("Creating new model in registry"); Model model = await modelsRegistry.CreateModelAsync(settings, modelParameters.Description, cancellationToken); Trace.TraceInformation($"Queueing a new train model message to the queue for model id {model.Id}"); ModelQueueMessage modelQueueMessage = new ModelQueueMessage { ModelId = model.Id }; await WebAppContext.TrainModelQueue.AddMessageAsync(modelQueueMessage, cancellationToken); // return the URL to the created model return(CreatedAtRoute(nameof(GetModel), new { modelId = model.Id }, model)); }
/// <summary> /// Handles messages from the train model poison queue /// </summary> public static async Task ProcessTrainModelPoisonQueueMessage( [QueueTrigger(AzureModelQueueFactory.TrainModelQueueName + PoisonQueueSuffix)] ModelQueueMessage message, CancellationToken cancellationToken) { // set the model id to context Guid modelId = message.ModelId; ContextManager.ModelId = modelId; Trace.TraceInformation($"Handling model training poison message for model {modelId}"); // mark model as failed await MarkModelAsFailed(modelId, cancellationToken); Trace.TraceVerbose("Finished handling poison train model message"); }
/// <summary> /// Handles messages from the train model queue /// </summary> public static async Task ProcessTrainModelQueueMessage( [QueueTrigger(AzureModelQueueFactory.TrainModelQueueName)] ModelQueueMessage message, int dequeueCount, CancellationToken cancellationToken) { // set the model id to context Guid modelId = message.ModelId; ContextManager.ModelId = modelId; // Note: The following if statement technically should never resolve to true as the web job infra should // handle max dequeued counts, However, due to issue https://github.com/Azure/azure-webjobs-sdk/issues/899, // if the job takes too long to finish, infrastructure marks it as "never finished" and doesn't move it to // the poison queue even if the failure threshold is met. This results in a infinite loop for the message. // Here we manually update the status and return successful to avoid the loop. if (dequeueCount > MaxDequeueCount) { Trace.TraceError($"Aborting model training after {dequeueCount - 1} attempts"); await MarkModelAsFailed(modelId, cancellationToken); return; } Trace.TraceInformation("Start handling train model queue message"); // create a logic class using the model registry and provider var logic = new WebJobLogic(ModelsProvider.Value, ModelsRegistry.Value); try { // train the model await logic.TrainModelAsync(modelId, cancellationToken); } catch (TaskCanceledException exception) { // the training was cancelled Trace.TraceInformation($"Training of model was cancelled. Exception: '{exception}'"); // check if the cancelation was external if (cancellationToken.IsCancellationRequested) { // add model to delete queue Trace.TraceInformation("Queueing a message to delete model resources"); await DeleteModelQueue.Value.AddMessageAsync( new ModelQueueMessage { ModelId = modelId }, CancellationToken.None); } // throw the cancellation exception so that the Web job infrastructure could handle throw; } catch (Exception exception) { string errorMessage = $"Training of model failed with exception: '{exception}'"; Trace.TraceWarning(errorMessage); Trace.TraceInformation($"Updating model '{modelId}' status message with the error message: '{errorMessage}'"); await ModelsRegistry.Value.UpdateModelAsync(modelId, cancellationToken, statusMessage : errorMessage); // throw the exception and rely on the Web job infrastructure for retries throw; } Trace.TraceInformation("Successfully finished handling train model queue message"); }
/// <summary> /// Handles messages from the delete model poison queue /// </summary> public static void ProcessDeleteModelPoisonQueueMessage( [QueueTrigger(AzureModelQueueFactory.DeleteModelQueueName + PoisonQueueSuffix)] ModelQueueMessage message) { Trace.TraceWarning($"Failed handling delete model message for model '{message?.ModelId}'"); }