Exemplo n.º 1
0
        /// <summary>
        /// Starts model training
        /// </summary>
        /// <param name="modelId"></param>
        /// <param name="parameters"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters,
                                                  CancellationToken cancellationToken)
        {
            // create an auto reset event to ensure that model status updates happen one at a time
            var ongoingUpdateEvent = new AutoResetEvent(true);

            // create a progress messages event handler for updating the model status message in the registry
            Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler(
                progressMessage, modelId, ongoingUpdateEvent, cancellationToken);

            Trace.TraceInformation($"Starting model '{modelId}' training");
            ModelTrainResult result = await _modelsProvider.TrainAsync(
                modelId, parameters, progressMessageHandler, cancellationToken);

            // get the model status
            ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed;

            Trace.TraceInformation($"Model training completed with status '{newModelStatus}'");

            Trace.TraceInformation("Extracting model statistics from the model training result");
            ModelStatistics modelStatistics = CreateModelStatistics(result, parameters);

            Trace.TraceInformation("Wait for any ongoing model status message updates before updating the final status");
            ongoingUpdateEvent.WaitOne();

            Trace.TraceInformation("Update the model status and statistics to the registry");
            await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken,
                                                   newModelStatus, result.CompletionMessage, modelStatistics);

            return(result.IsCompletedSuccessfuly);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Starts model training
        /// </summary>
        /// <param name="modelId"></param>
        /// <param name="parameters"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        private async Task <bool> TrainModelAsync(Guid modelId, ModelTrainingParameters parameters,
                                                  CancellationToken cancellationToken)
        {
            // create an auto reset event to ensure that model status updates happen one at a time
            var ongoingUpdateEvent = new AutoResetEvent(true);

            // create a progress messages event handler for updating the model status message in the registry
            Action <string> progressMessageHandler = progressMessage => ModelTrainingProgressMessagesEventHandler(
                progressMessage, modelId, ongoingUpdateEvent, cancellationToken);

            Trace.TraceInformation($"Iniciando entrenamiento del modelo '{modelId}'.");
            ModelTrainResult result = await _modelsProvider.TrainAsync(
                modelId, parameters, progressMessageHandler, cancellationToken);

            // get the model status
            ModelStatus newModelStatus = result.IsCompletedSuccessfuly ? ModelStatus.Completed : ModelStatus.Failed;

            Trace.TraceInformation($"Entrenamiento del Modelo completado con estado '{newModelStatus}'");

            Trace.TraceInformation("Extrayendo estadísticas de los resultados de entrenamiento del modelo");
            ModelStatistics modelStatistics = CreateModelStatistics(result, parameters);

            Trace.TraceInformation("Espere a que se actualice el mensaje de estado del modelo en curso antes de actualizar el estado final");
            ongoingUpdateEvent.WaitOne();

            Trace.TraceInformation("Actualice el estado del modelo y estadisticas al registro");
            await _modelsRegistry.UpdateModelAsync(modelId, cancellationToken,
                                                   newModelStatus, result.CompletionMessage, modelStatistics);

            return(result.IsCompletedSuccessfuly);
        }
Exemplo n.º 3
0
        public async Task <IHttpActionResult> TrainNewModel(CancellationToken cancellationToken, [FromBody] ModelParameters modelParameters)
        {
            // validate input
            if (modelParameters == null)
            {
                var message = $"Invalid format. Expected a valid '{nameof(ModelParameters)}' JSON";
                Trace.TraceVerbose(message);
                return(BadRequest(message));
            }

            if (!ModelState.IsValid)
            {
                return(BadRequest(ModelState));
            }

            ModelsRegistry modelsRegistry = WebAppContext.ModelsRegistry;

            Trace.TraceVerbose("Converting the model parameters to trainer settings, using default values where needed");
            var @default = ModelTrainingParameters.Default;
            var settings = new ModelTrainingParameters
            {
                BlobContainerName                     = modelParameters.BlobContainerName,
                CatalogFileRelativeLocation           = modelParameters.CatalogFileRelativeLocation?.Replace('\\', '/'),
                UsageFolderRelativeLocation           = modelParameters.UsageFolderRelativeLocation?.Replace('\\', '/'),
                EvaluationUsageFolderRelativeLocation = modelParameters.EvaluationUsageFolderRelativeLocation?.Replace('\\', '/'),
                SupportThreshold                = modelParameters.SupportThreshold ?? @default.SupportThreshold,
                CooccurrenceUnit                = modelParameters.CooccurrenceUnit ?? @default.CooccurrenceUnit,
                SimilarityFunction              = modelParameters.SimilarityFunction ?? @default.SimilarityFunction,
                EnableColdItemPlacement         = modelParameters.EnableColdItemPlacement ?? @default.EnableColdItemPlacement,
                EnableColdToColdRecommendations =
                    modelParameters.EnableColdToColdRecommendations ?? @default.EnableColdToColdRecommendations,
                EnableUserAffinity = modelParameters.EnableUserAffinity ?? @default.EnableUserAffinity,
                EnableUserToItemRecommendations =
                    modelParameters.EnableUserToItemRecommendations ?? @default.EnableUserToItemRecommendations,
                AllowSeedItemsInRecommendations =
                    modelParameters.AllowSeedItemsInRecommendations ?? @default.AllowSeedItemsInRecommendations,
                EnableBackfilling = modelParameters.EnableBackfilling ?? @default.EnableBackfilling,
                DecayPeriodInDays = modelParameters.DecayPeriodInDays ?? @default.DecayPeriodInDays
            };

            Trace.TraceInformation("Creating new model in registry");
            Model model = await modelsRegistry.CreateModelAsync(settings, modelParameters.Description, cancellationToken);

            Trace.TraceInformation($"Queueing a new train model message to the queue for model id {model.Id}");
            ModelQueueMessage modelQueueMessage = new ModelQueueMessage {
                ModelId = model.Id
            };
            await WebAppContext.TrainModelQueue.AddMessageAsync(modelQueueMessage, cancellationToken);

            // return the URL to the created model
            return(CreatedAtRoute(nameof(GetModel), new { modelId = model.Id }, model));
        }
        /// <summary>
        /// Creates a new instance of the <see cref="ModelTableEntity"/> class
        /// </summary>
        /// <param name="modelId">The model id</param>
        /// <param name="modelParameters">The training parameters of the model</param>
        /// <param name="modelStatistics">The model training statistics</param>
        public ModelTableEntity(Guid modelId, ModelTrainingParameters modelParameters = null, ModelStatistics modelStatistics = null)
            : this()
        {
            RowKey = modelId.ToString();

            if (modelParameters != null)
            {
                ModelParameters = JsonConvert.SerializeObject(modelParameters);
            }

            if (modelStatistics != null)
            {
                ModelStatistics = JsonConvert.SerializeObject(modelStatistics);
            }
        }
Exemplo n.º 5
0
 /// <summary>
 /// Trains a model using the local training files
 /// </summary>
 private ModelTrainResult TrainModel(ModelTrainer modelTrainer, Guid modelId, ModelTrainingParameters trainingParameters,
                                     TrainingLocalFilePaths localFilePaths, CancellationToken cancellationToken)
 {
     try
     {
         Trace.TraceInformation($"Training model '{modelId}' using the local training files as input");
         return(modelTrainer.TrainModel(trainingParameters, localFilePaths.UsageFolderPath,
                                        localFilePaths.CatalogFilePath, localFilePaths.EvaluationUsageFolderPath, cancellationToken));
     }
     catch (Exception ex)
     {
         var exception = new Exception($"Exception while trying to train model with id: {modelId}", ex);
         Trace.TraceError(exception.ToString());
         throw exception;
     }
 }
Exemplo n.º 6
0
        /// <summary>
        /// Creates a new model
        /// </summary>
        /// <param name="modelParameters">The new model parameters</param>
        /// <param name="description">The new model description</param>
        /// <param name="cancellationToken">The cancellation token assigned for the operation.</param>
        /// <returns>The newly created <see cref="Model"/> or <value>null</value> if failed to create</returns>
        public async Task <Model> CreateModelAsync(ModelTrainingParameters modelParameters, string description, CancellationToken cancellationToken)
        {
            ThrowIfDisposed();

            if (modelParameters == null)
            {
                throw new ArgumentNullException(nameof(modelParameters));
            }

            // allocate a new model id
            var modelId = Guid.NewGuid();

            // create a new model entity
            var newModelEntity = new ModelTableEntity(modelId, modelParameters)
            {
                Description  = description,
                ModelStatus  = ModelStatus.Created.ToString(),
                CreationTime = DateTime.UtcNow
            };

            try
            {
                Trace.TraceInformation($"Creating a new model ({modelId}) in the table");
                if (!await _modelsTable.InsertEntityAsync(newModelEntity, cancellationToken))
                {
                    Trace.TraceError($"Failed to create table entry for model {modelId}");
                    return(null);
                }
            }
            catch (StorageException storageException)
            {
                var exception = new Exception(
                    $"Exception while trying to create a new model entity ({modelId}) in the table", storageException);
                Trace.TraceError(exception.ToString());
                throw exception;
            }

            // convert the entity to model
            Model newModel = ConvertEntityToModel(newModelEntity);

            return(newModel);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Create model statistics out of the model training result
        /// </summary>
        private static ModelStatistics CreateModelStatistics(ModelTrainResult result, ModelTrainingParameters parameters)
        {
            var statistics = new ModelStatistics
            {
                // set the total duration
                TotalDuration = result.Duration.TotalDuration,

                // set the core training duration
                TrainingDuration = result.Duration.TrainingDuration,

                // set the storing user history duration
                StoringUserHistoryDuration = result.Duration.StoringUserHistoryDuration,

                // create the catalog parsing report
                CatalogParsingReport = CreateParsingReport(result.CatalogFilesParsingReport,
                                                           result.Duration.CatalogParsingDuration,
                                                           string.IsNullOrWhiteSpace(parameters.CatalogFileRelativePath)
                        ? null
                        : Path.GetDirectoryName(parameters.CatalogFileRelativePath)),

                // create the usage files parsing report
                UsageEventsParsingReport = CreateParsingReport(result.UsageFilesParsingReport,
                                                               result.Duration.UsageFilesParsingDuration,
                                                               parameters.UsageRelativePath),

                // set the number of items in catalog
                NumberOfCatalogItems = result.CatalogItemsCount,

                // set the number of valid items in usage files
                NumberOfUsageItems = result.UniqueItemsCount,

                // set the number of unique users in usage files
                NumberOfUsers = result.UniqueUsersCount,

                // set the catalog coverage when applicable
                CatalogCoverage =
                    result.CatalogItemsCount != null &&
                    result.CatalogItemsCount != 0
                        ? (double)result.UniqueItemsCount / result.CatalogItemsCount
                        : null,

                // set the catalog features weights, if calculated
                CatalogFeatureWeights = result.CatalogFeatureWeights?.Count > 0 ? result.CatalogFeatureWeights : null
            };

            // set the evaluation statistics if available
            if (!string.IsNullOrWhiteSpace(parameters.EvaluationUsageRelativePath))
            {
                // create evaluation result
                statistics.EvaluationResult = new ModelEvaluationResult
                {
                    // set the evaluation duration
                    Duration = result.Duration.EvaluationDuration,

                    // set the evaluation result
                    Metrics = result.ModelMetrics,

                    // create the evaluation usage files parsing report
                    EvaluationUsageEventsParsingReport =
                        CreateParsingReport(result.EvaluationFilesParsingReport,
                                            result.Duration.EvaluationUsageFilesParsingDuration,
                                            parameters.EvaluationUsageRelativePath)
                };
            }

            return(statistics);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Trains new model
        /// </summary>
        /// <param name="modelId">Model ID of the model to create</param>
        /// <param name="trainingParameters">Parameters of the new model to train</param>
        /// <param name="progressMessageReportDelegate">A delegate for handling progress messages</param>
        /// <param name="cancellationToken">A cancellation token used to abort the operation</param>
        /// <returns>The model training result</returns>
        public async Task <ModelTrainResult> TrainAsync(Guid modelId, ModelTrainingParameters trainingParameters,
                                                        Action <string> progressMessageReportDelegate, CancellationToken cancellationToken)
        {
            Trace.TraceVerbose($"Model training started for model with id '{modelId}'.");
            progressMessageReportDelegate = progressMessageReportDelegate ?? (_ => { });

            // create a temporary local folder for the model training files
            string trainingTempPath = Path.Combine(_trainedModelsLocalRootPath,
                                                   Path.GetFileNameWithoutExtension(Path.GetRandomFileName()));

            Directory.CreateDirectory(trainingTempPath);
            IDocumentStore modelDocumentStore = null;

            try
            {
                // report progress
                progressMessageReportDelegate("Downloading Training blobs");

                // download the training files
                TrainingLocalFilePaths localFilePaths =
                    await DownloadTrainingBlobsAsync(modelId, trainingTempPath, trainingParameters, cancellationToken);

                // check if the operation was cancelled
                cancellationToken.ThrowIfCancellationRequested();

                // create user history store if user-to-item is enabled
                if (trainingParameters.EnableUserToItemRecommendations)
                {
                    Trace.TraceInformation($"Creating user history document store for model '{modelId}'");
                    modelDocumentStore = _documentStoreProvider.GetDocumentStore(modelId);
                    modelDocumentStore.CreateIfNotExists();
                }

                // create a model trainer
                var modelTrainer = new ModelTrainer(new Tracer(nameof(ModelTrainer)), modelDocumentStore,
                                                    progressMessageReportDelegate);

                // train the model
                ModelTrainResult result = TrainModel(modelTrainer, modelId, trainingParameters,
                                                     localFilePaths, cancellationToken);
                if (!result.IsCompletedSuccessfuly)
                {
                    Trace.TraceWarning($"Model training failed for model with id '{modelId}'.");
                    return(result);
                }

                // serialize and upload the trained model
                using (Stream modelStream = new MemoryStream())
                {
                    Trace.TraceInformation("Serializing the trained model to a stream");
                    SerializeTrainedModel(result.Model, modelStream, modelId);

                    // rewind the stream before reading
                    modelStream.Seek(0, SeekOrigin.Begin);

                    // upload the serialized model to blob storage
                    await UploadTrainedModelAsync(modelStream, modelId, cancellationToken);
                }

                // return the result
                Trace.TraceInformation($"Model training completed for model with id '{modelId}'. Result: {result}");
                return(result);
            }
            finally
            {
                Trace.TraceInformation($"Deleting the training temporary local folder '{trainingTempPath}'.");
                Directory.Delete(trainingTempPath, true);
            }
        }
Exemplo n.º 9
0
        /// <summary>
        /// Downloads the catalog, usage event files and evaluation usage files to local disk.
        /// </summary>
        private async Task <TrainingLocalFilePaths> DownloadTrainingBlobsAsync(Guid modelId, string localRootPath,
                                                                               ModelTrainingParameters trainingParameters, CancellationToken cancellationToken)
        {
            try
            {
                var trainingFiles = new TrainingLocalFilePaths
                {
                    // set local usage directory name
                    UsageFolderPath = Path.Combine(localRootPath, UsageDirectoryName)
                };

                // create the local folder for usage events files
                Directory.CreateDirectory(trainingFiles.UsageFolderPath);

                // get the root blob container of the catalog and usage files
                IBlobContainer trainingBlobsContainer =
                    _blobContainerProvider.GetBlobContainer(trainingParameters.BlobContainerName);

                // check if the provided path represents a single file
                if (await trainingBlobsContainer.ExistsAsync(trainingParameters.UsageRelativePath, cancellationToken))
                {
                    string usageEventsBlobName = trainingParameters.UsageRelativePath;

                    // set local usage events file path
                    string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty;
                    string usageFilePath      = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName);

                    Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'");
                    await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath,
                                                                   cancellationToken);
                }
                else
                {
                    Trace.TraceInformation(
                        $"Listing all the usage events blobs under '{trainingParameters.UsageRelativePath}'");
                    IList <string> usageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync(
                        trainingParameters.UsageRelativePath, cancellationToken);

                    Trace.TraceInformation(
                        $"Downloading all the usage events blobs (Found {usageEventsBlobNames.Count})");
                    foreach (string usageEventsBlobName in usageEventsBlobNames)
                    {
                        // set local usage events file path
                        string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty;
                        string usageFilePath      = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName);

                        Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'");
                        await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath,
                                                                       cancellationToken);
                    }
                }

                // download the catalog file, if provided
                if (!string.IsNullOrWhiteSpace(trainingParameters.CatalogFileRelativePath))
                {
                    // set local catalog file path
                    var catalogFileName = Path.GetFileName(trainingParameters.CatalogFileRelativePath);
                    trainingFiles.CatalogFilePath = Path.Combine(localRootPath, catalogFileName);

                    Trace.TraceInformation($"Downloading catalog blob '{trainingFiles.CatalogFilePath}'");
                    await trainingBlobsContainer.DownloadBlobAsync(trainingParameters.CatalogFileRelativePath,
                                                                   trainingFiles.CatalogFilePath, cancellationToken);
                }

                // download the evaluation files if provided
                if (!string.IsNullOrWhiteSpace(trainingParameters.EvaluationUsageRelativePath))
                {
                    // set local evaluation folder
                    trainingFiles.EvaluationUsageFolderPath = Path.Combine(localRootPath, EvaluationUsageLocalDirectoryName);

                    // create the local folder for evaluation usage events files
                    Directory.CreateDirectory(trainingFiles.EvaluationUsageFolderPath);

                    // check if the provided path represents a single file
                    if (await trainingBlobsContainer.ExistsAsync(trainingParameters.EvaluationUsageRelativePath,
                                                                 cancellationToken))
                    {
                        string usageEventsBlobName = trainingParameters.EvaluationUsageRelativePath;

                        // set local usage events file path
                        string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty;
                        string usageFilePath      =
                            Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName);

                        Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'");
                        await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath,
                                                                       cancellationToken);
                    }
                    else
                    {
                        Trace.TraceInformation(
                            $"Listing all the evaluation usage events blobs under '{trainingParameters.EvaluationUsageRelativePath}'");
                        IList <string> evaluationUsageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync(
                            trainingParameters.EvaluationUsageRelativePath, cancellationToken);

                        Trace.TraceInformation(
                            $"Downloading all the evaluation usage events blobs (Found {evaluationUsageEventsBlobNames.Count})");
                        foreach (string usageEventsBlobName in evaluationUsageEventsBlobNames)
                        {
                            // set local usage events file path
                            string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty;
                            string usageFilePath      =
                                Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName);

                            Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'");
                            await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath,
                                                                           cancellationToken);
                        }
                    }
                }

                return(trainingFiles);
            }
            catch (StorageException storageException)
            {
                var exception = new Exception($"Failed downloading training files from storage. Model id: {modelId}",
                                              storageException);
                Trace.TraceError(exception.ToString());
                throw exception;
            }
        }