/// <summary> /// Trains a model using the local training files /// </summary> private ModelTrainResult TrainModel(ModelTrainer modelTrainer, Guid modelId, ModelTrainingParameters trainingParameters, TrainingLocalFilePaths localFilePaths, CancellationToken cancellationToken) { try { Trace.TraceInformation($"Training model '{modelId}' using the local training files as input"); return(modelTrainer.TrainModel(trainingParameters, localFilePaths.UsageFolderPath, localFilePaths.CatalogFilePath, localFilePaths.EvaluationUsageFolderPath, cancellationToken)); } catch (Exception ex) { var exception = new Exception($"Exception while trying to train model with id: {modelId}", ex); Trace.TraceError(exception.ToString()); throw exception; } }
/// <summary> /// Trains new model /// </summary> /// <param name="modelId">Model ID of the model to create</param> /// <param name="trainingParameters">Parameters of the new model to train</param> /// <param name="progressMessageReportDelegate">A delegate for handling progress messages</param> /// <param name="cancellationToken">A cancellation token used to abort the operation</param> /// <returns>The model training result</returns> public async Task <ModelTrainResult> TrainAsync(Guid modelId, ModelTrainingParameters trainingParameters, Action <string> progressMessageReportDelegate, CancellationToken cancellationToken) { Trace.TraceVerbose($"Model training started for model with id '{modelId}'."); progressMessageReportDelegate = progressMessageReportDelegate ?? (_ => { }); // create a temporary local folder for the model training files string trainingTempPath = Path.Combine(_trainedModelsLocalRootPath, Path.GetFileNameWithoutExtension(Path.GetRandomFileName())); Directory.CreateDirectory(trainingTempPath); IDocumentStore modelDocumentStore = null; try { // report progress progressMessageReportDelegate("Downloading Training blobs"); // download the training files TrainingLocalFilePaths localFilePaths = await DownloadTrainingBlobsAsync(modelId, trainingTempPath, trainingParameters, cancellationToken); // check if the operation was cancelled cancellationToken.ThrowIfCancellationRequested(); // create user history store if user-to-item is enabled if (trainingParameters.EnableUserToItemRecommendations) { Trace.TraceInformation($"Creating user history document store for model '{modelId}'"); modelDocumentStore = _documentStoreProvider.GetDocumentStore(modelId); modelDocumentStore.CreateIfNotExists(); } // create a model trainer var modelTrainer = new ModelTrainer(new Tracer(nameof(ModelTrainer)), modelDocumentStore, progressMessageReportDelegate); // train the model ModelTrainResult result = TrainModel(modelTrainer, modelId, trainingParameters, localFilePaths, cancellationToken); if (!result.IsCompletedSuccessfuly) { Trace.TraceWarning($"Model training failed for model with id '{modelId}'."); return(result); } // serialize and upload the trained model using (Stream modelStream = new MemoryStream()) { Trace.TraceInformation("Serializing the trained model to a stream"); SerializeTrainedModel(result.Model, modelStream, modelId); // rewind the stream before reading modelStream.Seek(0, SeekOrigin.Begin); // upload the serialized model to blob storage await UploadTrainedModelAsync(modelStream, modelId, cancellationToken); } // return the result Trace.TraceInformation($"Model training completed for model with id '{modelId}'. Result: {result}"); return(result); } finally { Trace.TraceInformation($"Deleting the training temporary local folder '{trainingTempPath}'."); Directory.Delete(trainingTempPath, true); } }
/// <summary> /// Downloads the catalog, usage event files and evaluation usage files to local disk. /// </summary> private async Task <TrainingLocalFilePaths> DownloadTrainingBlobsAsync(Guid modelId, string localRootPath, ModelTrainingParameters trainingParameters, CancellationToken cancellationToken) { try { var trainingFiles = new TrainingLocalFilePaths { // set local usage directory name UsageFolderPath = Path.Combine(localRootPath, UsageDirectoryName) }; // create the local folder for usage events files Directory.CreateDirectory(trainingFiles.UsageFolderPath); // get the root blob container of the catalog and usage files IBlobContainer trainingBlobsContainer = _blobContainerProvider.GetBlobContainer(trainingParameters.BlobContainerName); // check if the provided path represents a single file if (await trainingBlobsContainer.ExistsAsync(trainingParameters.UsageRelativePath, cancellationToken)) { string usageEventsBlobName = trainingParameters.UsageRelativePath; // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } else { Trace.TraceInformation( $"Listing all the usage events blobs under '{trainingParameters.UsageRelativePath}'"); IList <string> usageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync( trainingParameters.UsageRelativePath, cancellationToken); Trace.TraceInformation( $"Downloading all the usage events blobs (Found {usageEventsBlobNames.Count})"); foreach (string usageEventsBlobName in usageEventsBlobNames) { // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.UsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } } // download the catalog file, if provided if (!string.IsNullOrWhiteSpace(trainingParameters.CatalogFileRelativePath)) { // set local catalog file path var catalogFileName = Path.GetFileName(trainingParameters.CatalogFileRelativePath); trainingFiles.CatalogFilePath = Path.Combine(localRootPath, catalogFileName); Trace.TraceInformation($"Downloading catalog blob '{trainingFiles.CatalogFilePath}'"); await trainingBlobsContainer.DownloadBlobAsync(trainingParameters.CatalogFileRelativePath, trainingFiles.CatalogFilePath, cancellationToken); } // download the evaluation files if provided if (!string.IsNullOrWhiteSpace(trainingParameters.EvaluationUsageRelativePath)) { // set local evaluation folder trainingFiles.EvaluationUsageFolderPath = Path.Combine(localRootPath, EvaluationUsageLocalDirectoryName); // create the local folder for evaluation usage events files Directory.CreateDirectory(trainingFiles.EvaluationUsageFolderPath); // check if the provided path represents a single file if (await trainingBlobsContainer.ExistsAsync(trainingParameters.EvaluationUsageRelativePath, cancellationToken)) { string usageEventsBlobName = trainingParameters.EvaluationUsageRelativePath; // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } else { Trace.TraceInformation( $"Listing all the evaluation usage events blobs under '{trainingParameters.EvaluationUsageRelativePath}'"); IList <string> evaluationUsageEventsBlobNames = await trainingBlobsContainer.ListBlobsAsync( trainingParameters.EvaluationUsageRelativePath, cancellationToken); Trace.TraceInformation( $"Downloading all the evaluation usage events blobs (Found {evaluationUsageEventsBlobNames.Count})"); foreach (string usageEventsBlobName in evaluationUsageEventsBlobNames) { // set local usage events file path string usageEventFileName = Path.GetFileName(usageEventsBlobName) ?? string.Empty; string usageFilePath = Path.Combine(trainingFiles.EvaluationUsageFolderPath, usageEventFileName); Trace.TraceInformation($"Downloading evaluation usage events blob '{usageEventsBlobName}'"); await trainingBlobsContainer.DownloadBlobAsync(usageEventsBlobName, usageFilePath, cancellationToken); } } } return(trainingFiles); } catch (StorageException storageException) { var exception = new Exception($"Failed downloading training files from storage. Model id: {modelId}", storageException); Trace.TraceError(exception.ToString()); throw exception; } }