static async Task Main(string[] args) { var directory = Directory.GetCurrentDirectory(); var text = File.ReadAllText($"{directory}\\TrainingRequest.json"); var request = JsonConvert.DeserializeObject <TrainingRequest>(text); var csb = new ServiceBusConnectionStringBuilder(request.ServiceBusConnectionString); csb.EntityPath = request.TrainingQueueName; var queueClient = new QueueClient(csb); foreach (var item in request.Items) { TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage { BlobFolderName = item.BlobFolder, BlobSasUrl = request.SasUrl, DocumentFormat = item.DocumentFormat, IncludeSubFolders = "false", UseLabelFile = "true" }; Console.WriteLine($"Sending Message for document format={item.DocumentFormat}"); string data = JsonConvert.SerializeObject(trainingRequestMessage); Message message = new Message(Encoding.UTF8.GetBytes(data)); await queueClient.SendAsync(message); } }
public static async Task <IActionResult> Run( [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req, ILogger log) { log.LogInformation($"HttpTrainRequest Function triggered by HttpRequest at: {DateTime.Now}"); string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); dynamic payload = JsonConvert.DeserializeObject(requestBody); var csb = new ServiceBusConnectionStringBuilder(serviceBusConnectionString); csb.EntityPath = trainingQueue; var queueClient = new QueueClient(csb); string formats = ""; foreach (var item in payload.Items) { formats += item.DocumentFormat + ", "; TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage { BlobFolderName = item.BlobFolder, BlobSasUrl = payload.SasUrl, DocumentFormat = item.DocumentFormat, IncludeSubFolders = "false", UseLabelFile = "true" }; Console.WriteLine($"Sending Message for document format={item.DocumentFormat}"); string data = JsonConvert.SerializeObject(trainingRequestMessage); Message message = new Message(Encoding.UTF8.GetBytes(data)); await queueClient.SendAsync(message); } return(new OkObjectResult($"HttpTrainRequest: your training request for document formats {formats.Substring(0, formats.Length-2)} submitted sucessfully.")); }
private void UpdateModelTraining(TrainingRequestMessage m, ILogger log) { using (SqlConnection connection = new SqlConnection(_dbConnectionString)) { connection.Open(); SqlCommand command = connection.CreateCommand(); SqlTransaction transaction; transaction = connection.BeginTransaction("TrainingRequestTransaction"); command.Connection = connection; command.Transaction = transaction; int currentVersion = 0; int newVersion = 0; try { command.CommandText = $"SELECT MAX(ModelVersion) AS Current_Version from ModelTraining WHERE DocumentFormat='{m.DocumentFormat}'"; SqlDataReader reader = command.ExecuteReader(); try { while (reader.Read()) { if (reader["Current_Version"] != System.DBNull.Value) { currentVersion = Convert.ToInt32(reader["Current_Version"]); } else { currentVersion = 0; } } } finally { reader.Close(); } newVersion = currentVersion + 1; // Add the row string insertClause = $"Insert into ModelTraining (DocumentFormat, ModelVersion, ModelId, CreatedDateTime, UpdatedDateTime, BlobSasUrl, BlobfolderName, IncludeSubFolders, UseLabelFile, AverageModelAccuracy, TrainingDocumentResults"; string valuesClause = $" VALUES ('{m.DocumentFormat}', '{newVersion}','{m.ModelId}', '{m.CreatedDateTime:yyyy-MM-dd HH:mm:ss.fff}', '{m.UpdatedDateTime:yyyy-MM-dd HH:mm:ss.fff}', '{m.BlobSasUrl}', '{m.BlobFolderName}','{m.IncludeSubFolders}', '{m.UseLabelFile}','{m.AverageModelAccuracy}','{m.TrainingDocumentResults}'"; insertClause += ") "; valuesClause += ")"; command.CommandText = insertClause + valuesClause; command.ExecuteNonQuery(); transaction.Commit(); } catch (Exception e) { log.LogError($"Exception prevented writing training request for document format {m.DocumentFormat} model id={m.ModelId} to database (transaction was rolled back). Message is {e.Message}"); transaction.Rollback(); throw e; } log.LogInformation($"{FUNCTION_NAME} Training request for document format {m.DocumentFormat}, version={newVersion}, model id={m.ModelId} was written to the database"); } }
public async Task Run([QueueTrigger("training", Connection = "IncomingConnection")] string message, ILogger log) { Utils utils = new Utils(log, _config["IncomingConnection"], FUNCTION_NAME); log.LogInformation($"{FUNCTION_NAME} function was triggered by receipt of message - Message:{message}"); try { TrainingRequestMessage trm = JsonConvert.DeserializeObject <TrainingRequestMessage>(message); string _apiKey = _config["RecognizerApiKey"]; string _baseUrl = _recognizerServiceBaseUrl; var uri = $"{_baseUrl}{ParsingConstants.FormRecognizerApiPath}"; JObject body = new JObject( new JProperty("source", trm.BlobSasUrl), new JProperty("sourceFilter", new JObject( new JProperty("prefix", trm.BlobFolderName), new JProperty("includeSubFolders", trm.IncludeSubFolders) ) ), new JProperty("useLabelFile", trm.UseLabelFile) ); string json = body.ToString(); string getUrl = ""; using (var content = new StringContent(json, System.Text.Encoding.UTF8, "application/json")) { _httpClient.DefaultRequestHeaders.Add(ParsingConstants.OcpApimSubscriptionKey, _apiKey); HttpResponseMessage response = await _httpClient.PostAsync(uri, content); if (response.IsSuccessStatusCode) { HttpHeaders headers = response.Headers; if (headers.TryGetValues("location", out IEnumerable <string> values)) { getUrl = values.First(); log.LogInformation($"{FUNCTION_NAME} Model training request accepted by Forms Recognizer {_baseUrl}"); } } else { var test = await response.Content.ReadAsStringAsync(); throw new Exception($"That didnt work. Trying to submit model training request {test} request was {json} Response:{response.StatusCode.ToString()}"); } } // TODO This needs to be in a 'monitor pattern' durable function // A job has been submitted, now we need to track the status of it. string responseBody = null; JObject jsonContent = null; _httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", _apiKey); for (int i = 0; i < _trainingContext.MaxRetries; i++) { HttpResponseMessage response = await _httpClient.GetAsync(getUrl); if (response.IsSuccessStatusCode) { responseBody = response.Content.ReadAsStringAsync().Result; jsonContent = JObject.Parse(responseBody); if (jsonContent["modelInfo"]["status"] != null) { string status = jsonContent["modelInfo"]["status"].ToString(); if (status == "ready") { log.LogInformation($"{FUNCTION_NAME} Training completed sucessfully"); trm.ModelId = jsonContent["modelInfo"]["modelId"].ToString(); string dateAsString = DateTime.Now.ToString(); try { dateAsString = jsonContent["modelInfo"]["createdDateTime"].ToString(); } catch {} DateTime dateValue; if (DateTime.TryParse(dateAsString, out dateValue)) { trm.CreatedDateTime = dateValue; } dateAsString = DateTime.Now.ToString(); try { dateAsString = jsonContent["modelInfo"]["lastUpdatedDateTime"].ToString(); } catch { } if (DateTime.TryParse(dateAsString, out dateValue)) { trm.UpdatedDateTime = dateValue; } string numberAsString = "0"; try { numberAsString = jsonContent["trainResult"]["averageModelAccuracy"].ToString(); } catch { } decimal numberValue = 0; if (Decimal.TryParse(numberAsString, out numberValue)) { trm.AverageModelAccuracy = numberValue; } else { trm.AverageModelAccuracy = 0; } trm.TrainingDocumentResults = jsonContent["trainResult"]["trainingDocuments"].ToString(); break; } if (status == "invalid") { throw new Exception($"{FUNCTION_NAME} Training failed. The response body was {responseBody}"); } if (i < _trainingContext.MaxRetries) { int waitDuration = (i + 1) * 3; log.LogDebug($"{FUNCTION_NAME} Training attempt {i}. Status is {status}. will sleep for {waitDuration} seconds and retry"); Thread.Sleep(1000 * waitDuration); continue; } else { throw new Exception($"{FUNCTION_NAME} Training did not complete in the allotted time and was abandoned. Value is {status}"); } } else { throw new Exception($"{FUNCTION_NAME} Hmmmmmnn? Training model progress check. Get request was sucessful, but status element is null?"); } } else { throw new Exception($"{FUNCTION_NAME} Training the model failed {uri} Response:{response.StatusCode.ToString()}"); } } // get here we are ready to go... log.LogInformation($"Model for invoice format {trm.DocumentFormat} was trained sucessfully. Model Id is {trm.ModelId}. Average model accuracy={trm.AverageModelAccuracy}"); UpdateModelTraining(trm, log); } // unexpected failure catch (Exception e) { _telemetryClient.TrackException(e); log.LogError($"{FUNCTION_NAME} Unexpected error. Exception Type: {e.GetType().ToString()} Message {e.Message}"); throw; } }
static async Task <int> Main(string[] args) { var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true); IConfigurationRoot configuration = builder.Build(); dbConnectionString = configuration.GetConnectionString("PlaceboDatabase"); storageConnectionString = configuration["storageConnectionString"]; trainingQueueName = configuration["TrainingQueueName"]; var shortDBConnectionString = dbConnectionString.Substring(0, 50); var shortStorageConnectionString = storageConnectionString.Substring(0, 50); Console.WriteLine($"Database connection string = {shortDBConnectionString}"); Console.WriteLine($"Storage connection string = {shortStorageConnectionString}"); Console.WriteLine($"Training Queue Name = {trainingQueueName}"); var rootCommand = new RootCommand { new Option <string>( "--documentFormat", description: "The document format for which the training assets should be uploaded"), new Option <string>( "--localPath", description: "The local folder containing the training assets"), new Option <string>( "--blobContainer", description: "The name of the blob container where the assets should be uploaded"), new Option <string>( "--blobContainerFolder", getDefaultValue: () => null, description: "The anme of a folder within the blob container where the assets should be uploaded"), }; rootCommand.Description = "This command uploads a set of model training assets for a document format (e.g. phoenix) from a local directory to Azure blob storage. " + "This triggers a model training run in azure. A new model is created based on the assets and a record of the new model is kept in the ModelTraining table in the " + "database. This new model becomes the latest model for that document format and is then used by the rcognizer component while processing future documents"; try { rootCommand.Handler = CommandHandler.Create <string, string, string, string>(async(documentFormat, localPath, blobContainer, blobContainerFolder) => { try { Console.WriteLine($"The value for --documentFormat is: {documentFormat}"); if (string.IsNullOrEmpty(documentFormat)) { throw new Exception($"--documentFormat {documentFormat} must be provided"); } Console.WriteLine($"The value for --localPath is: {localPath}"); if (string.IsNullOrEmpty(localPath)) { throw new Exception($"--localPath {localPath} must be provided"); } Console.WriteLine($"The value for --blobContainer is: {blobContainer}"); if (string.IsNullOrEmpty(blobContainer)) { throw new Exception($"--blobContainer {blobContainer} must be provided"); } Console.WriteLine($"The value for --blobContainerFolder is: {blobContainerFolder}"); if (string.IsNullOrEmpty(blobContainerFolder)) { throw new Exception($"--blobContainerFolder {blobContainerFolder} must be provided"); } if (!Directory.Exists(localPath)) { throw new Exception($"--localPath {localPath} does not exist or is not a directory"); } // Get hold of the storage account CloudStorageAccount storageAccount = null; try { storageAccount = CloudStorageAccount.Parse(storageConnectionString); var targetBlobClient = storageAccount.CreateCloudBlobClient(); var targetContainer = targetBlobClient.GetContainerReference(blobContainer); await targetContainer.CreateIfNotExistsAsync(); var directory = targetContainer.GetDirectoryReference(blobContainerFolder); BlobResultSegment resultSegment = null; BlobContinuationToken continuationToken = null; do { resultSegment = await directory.ListBlobsSegmentedAsync(true, BlobListingDetails.All, 50, continuationToken, null, null); if (resultSegment.Results.Count() > 0) { Console.WriteLine($"Container already contains {resultSegment.Results.Count()} blobs - they will be deleted"); } foreach (var blob in resultSegment.Results) { try { var blobToDelete = directory.GetBlockBlobReference(blob.Uri.ToString()); await blobToDelete.DeleteIfExistsAsync(); Console.WriteLine($"Deleted blob: {blobToDelete.Name}"); } catch (Exception e) { Console.WriteLine("Unable to delete blob {blob.Uri.ToString()}"); } } // Get the continuation token. If not null, get the next segment. continuationToken = resultSegment.ContinuationToken; } while (continuationToken != null); string[] fileEntries = Directory.GetFiles(localPath); Stopwatch innnerTimer = new Stopwatch(); Stopwatch outerTimer = new Stopwatch(); outerTimer.Start(); int i = 0; foreach (string fileName in fileEntries) { FileInfo f = new FileInfo(fileName); innnerTimer.Reset(); innnerTimer.Start(); using FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read); var blobToUpload = directory.GetBlockBlobReference(f.Name); await blobToUpload.UploadFromStreamAsync(fs); i++; innnerTimer.Stop(); Console.WriteLine($"Uploaded file {f.Name} to container {targetContainer.Name} in {innnerTimer.ElapsedMilliseconds} ms"); } outerTimer.Stop(); Console.WriteLine($"Uploaded {i} files to container {targetContainer.Name} in {outerTimer.ElapsedMilliseconds} ms"); var policy = new SharedAccessBlobPolicy { Permissions = SharedAccessBlobPermissions.List | SharedAccessBlobPermissions.Read, SharedAccessStartTime = DateTime.UtcNow.AddMinutes(-15), SharedAccessExpiryTime = DateTime.UtcNow.AddMinutes(120) }; var targetContainerToken = targetContainer.GetSharedAccessSignature(policy); var targetContainerSAS = string.Format("{0}{1}", targetContainer.Uri, targetContainerToken); Console.WriteLine($"targetContainerSAS={targetContainerSAS}"); TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage { BlobFolderName = blobContainerFolder, BlobSasUrl = targetContainerSAS, DocumentFormat = documentFormat, IncludeSubFolders = "false", UseLabelFile = "true" }; CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient(); // Retrieve a reference to a container. CloudQueue queue = queueClient.GetQueueReference(trainingQueueName); // Create the queue if it doesn't already exist await queue.CreateIfNotExistsAsync(); CloudQueueMessage message = new CloudQueueMessage(JsonConvert.SerializeObject(trainingRequestMessage)); await queue.AddMessageAsync(message); } catch (Exception e) { throw; } Console.WriteLine("done."); } catch (Exception e) { Console.WriteLine(e.Message); } } ); } catch (Exception e) { Console.WriteLine(e.Message); return(-1); } return(rootCommand.InvokeAsync(args).Result); }
static async Task <int> Main(string[] args) { var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true); IConfigurationRoot configuration = builder.Build(); storageConnectionString = configuration["storageConnectionString"]; trainingQueueName = configuration["TrainingQueueName"]; var shortStorageConnectionString = storageConnectionString.Substring(0, 50); Console.WriteLine($"Storage connection string = {shortStorageConnectionString}"); Console.WriteLine($"Training Queue Name = {trainingQueueName}"); var rootCommand = new RootCommand { new Option <string>( "--documentFormat", description: "The document format for which the training assets should be uploaded"), new Option <string>( "--labellingContainerSasUrl", description: "SAS Token for container that holds the folder with the labelling and training assets"), new Option <string>( "--blobContainerFolder", getDefaultValue: () => null, description: "The name of a folder within the blob container where the assets from the labelling tool aare stored"), }; rootCommand.Description = "This command triggers training of a forms recognizer model, based on assets produced by the labelling tool and stored in a folder in blob storage."; try { rootCommand.Handler = CommandHandler.Create <string, string, string>(async(documentFormat, labellingContainerSasUrl, blobContainerFolder) => { try { Console.WriteLine($"The value for --documentFormat is: {documentFormat}"); if (string.IsNullOrEmpty(documentFormat)) { throw new Exception($"--documentFormat {documentFormat} must be provided"); } Console.WriteLine($"The value for --labellingContainerSasUrl is: {labellingContainerSasUrl}"); if (string.IsNullOrEmpty(labellingContainerSasUrl)) { throw new Exception($"--blobContainer {labellingContainerSasUrl} must be provided"); } Console.WriteLine($"The value for --blobContainerFolder is: {blobContainerFolder}"); if (string.IsNullOrEmpty(blobContainerFolder)) { throw new Exception($"--blobContainerFolder {blobContainerFolder} must be provided"); } // Get hold of the storage account CloudStorageAccount storageAccount = null; try { storageAccount = CloudStorageAccount.Parse(storageConnectionString); TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage { BlobFolderName = blobContainerFolder, BlobSasUrl = labellingContainerSasUrl, DocumentFormat = documentFormat, IncludeSubFolders = "false", UseLabelFile = "true" }; CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient(); // Retrieve a reference to a container. CloudQueue queue = queueClient.GetQueueReference(trainingQueueName); // Create the queue if it doesn't already exist await queue.CreateIfNotExistsAsync(); CloudQueueMessage message = new CloudQueueMessage(JsonConvert.SerializeObject(trainingRequestMessage)); await queue.AddMessageAsync(message); } catch (Exception e) { throw; } Console.WriteLine("done."); } catch (Exception e) { Console.WriteLine(e.Message); } } ); } catch (Exception e) { Console.WriteLine(e.Message); return(-1); } return(rootCommand.InvokeAsync(args).Result); }