예제 #1
0
        static async Task Main(string[] args)
        {
            var directory = Directory.GetCurrentDirectory();
            var text      = File.ReadAllText($"{directory}\\TrainingRequest.json");
            var request   = JsonConvert.DeserializeObject <TrainingRequest>(text);
            var csb       = new ServiceBusConnectionStringBuilder(request.ServiceBusConnectionString);

            csb.EntityPath = request.TrainingQueueName;
            var queueClient = new QueueClient(csb);

            foreach (var item in request.Items)
            {
                TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage
                {
                    BlobFolderName    = item.BlobFolder,
                    BlobSasUrl        = request.SasUrl,
                    DocumentFormat    = item.DocumentFormat,
                    IncludeSubFolders = "false",
                    UseLabelFile      = "true"
                };
                Console.WriteLine($"Sending Message for document format={item.DocumentFormat}");
                string  data    = JsonConvert.SerializeObject(trainingRequestMessage);
                Message message = new Message(Encoding.UTF8.GetBytes(data));
                await queueClient.SendAsync(message);
            }
        }
예제 #2
0
        public static async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,
            ILogger log)
        {
            log.LogInformation($"HttpTrainRequest Function triggered by HttpRequest at: {DateTime.Now}");

            string  requestBody = await new StreamReader(req.Body).ReadToEndAsync();
            dynamic payload     = JsonConvert.DeserializeObject(requestBody);

            var csb = new ServiceBusConnectionStringBuilder(serviceBusConnectionString);

            csb.EntityPath = trainingQueue;
            var    queueClient = new QueueClient(csb);
            string formats     = "";

            foreach (var item in payload.Items)
            {
                formats += item.DocumentFormat + ", ";
                TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage
                {
                    BlobFolderName    = item.BlobFolder,
                    BlobSasUrl        = payload.SasUrl,
                    DocumentFormat    = item.DocumentFormat,
                    IncludeSubFolders = "false",
                    UseLabelFile      = "true"
                };
                Console.WriteLine($"Sending Message for document format={item.DocumentFormat}");
                string  data    = JsonConvert.SerializeObject(trainingRequestMessage);
                Message message = new Message(Encoding.UTF8.GetBytes(data));
                await queueClient.SendAsync(message);
            }

            return(new OkObjectResult($"HttpTrainRequest: your training request for document formats {formats.Substring(0, formats.Length-2)} submitted sucessfully."));
        }
예제 #3
0
파일: Trainer.cs 프로젝트: nikkh/placebo
        private void UpdateModelTraining(TrainingRequestMessage m, ILogger log)
        {
            using (SqlConnection connection = new SqlConnection(_dbConnectionString))
            {
                connection.Open();
                SqlCommand     command = connection.CreateCommand();
                SqlTransaction transaction;
                transaction         = connection.BeginTransaction("TrainingRequestTransaction");
                command.Connection  = connection;
                command.Transaction = transaction;
                int currentVersion = 0;
                int newVersion     = 0;
                try
                {
                    command.CommandText = $"SELECT MAX(ModelVersion) AS Current_Version from ModelTraining WHERE DocumentFormat='{m.DocumentFormat}'";
                    SqlDataReader reader = command.ExecuteReader();
                    try
                    {
                        while (reader.Read())
                        {
                            if (reader["Current_Version"] != System.DBNull.Value)
                            {
                                currentVersion = Convert.ToInt32(reader["Current_Version"]);
                            }
                            else
                            {
                                currentVersion = 0;
                            }
                        }
                    }
                    finally
                    {
                        reader.Close();
                    }

                    newVersion = currentVersion + 1;

                    // Add the row
                    string insertClause = $"Insert into ModelTraining (DocumentFormat, ModelVersion, ModelId, CreatedDateTime, UpdatedDateTime, BlobSasUrl, BlobfolderName, IncludeSubFolders, UseLabelFile, AverageModelAccuracy, TrainingDocumentResults";
                    string valuesClause = $" VALUES ('{m.DocumentFormat}', '{newVersion}','{m.ModelId}', '{m.CreatedDateTime:yyyy-MM-dd HH:mm:ss.fff}', '{m.UpdatedDateTime:yyyy-MM-dd HH:mm:ss.fff}', '{m.BlobSasUrl}', '{m.BlobFolderName}','{m.IncludeSubFolders}', '{m.UseLabelFile}','{m.AverageModelAccuracy}','{m.TrainingDocumentResults}'";
                    insertClause       += ") ";
                    valuesClause       += ")";
                    command.CommandText = insertClause + valuesClause;


                    command.ExecuteNonQuery();


                    transaction.Commit();
                }
                catch (Exception e)
                {
                    log.LogError($"Exception prevented writing training request for document format {m.DocumentFormat} model id={m.ModelId} to database (transaction was rolled back).  Message is {e.Message}");
                    transaction.Rollback();
                    throw e;
                }

                log.LogInformation($"{FUNCTION_NAME} Training request for document format {m.DocumentFormat}, version={newVersion}, model id={m.ModelId}  was written to the database");
            }
        }
예제 #4
0
파일: Trainer.cs 프로젝트: nikkh/placebo
        public async Task Run([QueueTrigger("training", Connection = "IncomingConnection")] string message, ILogger log)
        {
            Utils utils = new Utils(log, _config["IncomingConnection"], FUNCTION_NAME);

            log.LogInformation($"{FUNCTION_NAME} function was triggered by receipt of message  - Message:{message}");
            try
            {
                TrainingRequestMessage trm = JsonConvert.DeserializeObject <TrainingRequestMessage>(message);
                string _apiKey             = _config["RecognizerApiKey"];
                string _baseUrl            = _recognizerServiceBaseUrl;


                var uri = $"{_baseUrl}{ParsingConstants.FormRecognizerApiPath}";

                JObject body = new JObject(
                    new JProperty("source", trm.BlobSasUrl),
                    new JProperty("sourceFilter",
                                  new JObject(
                                      new JProperty("prefix", trm.BlobFolderName),
                                      new JProperty("includeSubFolders", trm.IncludeSubFolders)
                                      )
                                  ),
                    new JProperty("useLabelFile", trm.UseLabelFile)
                    );
                string json = body.ToString();

                string getUrl = "";
                using (var content = new StringContent(json, System.Text.Encoding.UTF8, "application/json"))
                {
                    _httpClient.DefaultRequestHeaders.Add(ParsingConstants.OcpApimSubscriptionKey, _apiKey);
                    HttpResponseMessage response = await _httpClient.PostAsync(uri, content);

                    if (response.IsSuccessStatusCode)
                    {
                        HttpHeaders headers = response.Headers;
                        if (headers.TryGetValues("location", out IEnumerable <string> values))
                        {
                            getUrl = values.First();
                            log.LogInformation($"{FUNCTION_NAME} Model training request accepted by Forms Recognizer {_baseUrl}");
                        }
                    }
                    else
                    {
                        var test = await response.Content.ReadAsStringAsync();

                        throw new Exception($"That didnt work.  Trying to submit model training request {test} request was {json} Response:{response.StatusCode.ToString()}");
                    }
                }

                // TODO This needs to be in a 'monitor pattern' durable function
                // A job has been submitted, now we need to track the status of it.

                string  responseBody = null;
                JObject jsonContent  = null;
                _httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", _apiKey);
                for (int i = 0; i < _trainingContext.MaxRetries; i++)
                {
                    HttpResponseMessage response = await _httpClient.GetAsync(getUrl);

                    if (response.IsSuccessStatusCode)
                    {
                        responseBody = response.Content.ReadAsStringAsync().Result;
                        jsonContent  = JObject.Parse(responseBody);
                        if (jsonContent["modelInfo"]["status"] != null)
                        {
                            string status = jsonContent["modelInfo"]["status"].ToString();
                            if (status == "ready")
                            {
                                log.LogInformation($"{FUNCTION_NAME} Training completed sucessfully");
                                trm.ModelId = jsonContent["modelInfo"]["modelId"].ToString();


                                string dateAsString = DateTime.Now.ToString();
                                try
                                {
                                    dateAsString = jsonContent["modelInfo"]["createdDateTime"].ToString();
                                }
                                catch {}

                                DateTime dateValue;
                                if (DateTime.TryParse(dateAsString, out dateValue))
                                {
                                    trm.CreatedDateTime = dateValue;
                                }

                                dateAsString = DateTime.Now.ToString();
                                try
                                {
                                    dateAsString = jsonContent["modelInfo"]["lastUpdatedDateTime"].ToString();
                                }
                                catch { }

                                if (DateTime.TryParse(dateAsString, out dateValue))
                                {
                                    trm.UpdatedDateTime = dateValue;
                                }



                                string numberAsString = "0";
                                try
                                {
                                    numberAsString = jsonContent["trainResult"]["averageModelAccuracy"].ToString();
                                }
                                catch { }

                                decimal numberValue = 0;
                                if (Decimal.TryParse(numberAsString, out numberValue))
                                {
                                    trm.AverageModelAccuracy = numberValue;
                                }
                                else
                                {
                                    trm.AverageModelAccuracy = 0;
                                }

                                trm.TrainingDocumentResults = jsonContent["trainResult"]["trainingDocuments"].ToString();
                                break;
                            }
                            if (status == "invalid")
                            {
                                throw new Exception($"{FUNCTION_NAME} Training failed. The response body was {responseBody}");
                            }

                            if (i < _trainingContext.MaxRetries)
                            {
                                int waitDuration = (i + 1) * 3;
                                log.LogDebug($"{FUNCTION_NAME} Training attempt {i}.  Status is {status}. will sleep for {waitDuration} seconds and retry");
                                Thread.Sleep(1000 * waitDuration);
                                continue;
                            }
                            else
                            {
                                throw new Exception($"{FUNCTION_NAME} Training did not complete in the allotted time and was abandoned.  Value is {status}");
                            }
                        }
                        else
                        {
                            throw new Exception($"{FUNCTION_NAME} Hmmmmmnn?  Training model progress check.  Get request was sucessful, but status element is null?");
                        }
                    }
                    else
                    {
                        throw new Exception($"{FUNCTION_NAME} Training the model failed {uri} Response:{response.StatusCode.ToString()}");
                    }
                }

                // get here we are ready to go...
                log.LogInformation($"Model for invoice format {trm.DocumentFormat} was trained sucessfully.  Model Id is {trm.ModelId}. Average model accuracy={trm.AverageModelAccuracy}");
                UpdateModelTraining(trm, log);
            }
            // unexpected failure
            catch (Exception e)
            {
                _telemetryClient.TrackException(e);
                log.LogError($"{FUNCTION_NAME} Unexpected error.  Exception Type: {e.GetType().ToString()} Message {e.Message}");
                throw;
            }
        }
예제 #5
0
        static async Task <int> Main(string[] args)
        {
            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true);

            IConfigurationRoot configuration = builder.Build();

            dbConnectionString      = configuration.GetConnectionString("PlaceboDatabase");
            storageConnectionString = configuration["storageConnectionString"];
            trainingQueueName       = configuration["TrainingQueueName"];
            var shortDBConnectionString      = dbConnectionString.Substring(0, 50);
            var shortStorageConnectionString = storageConnectionString.Substring(0, 50);

            Console.WriteLine($"Database connection string = {shortDBConnectionString}");
            Console.WriteLine($"Storage connection string = {shortStorageConnectionString}");

            Console.WriteLine($"Training Queue Name = {trainingQueueName}");

            var rootCommand = new RootCommand
            {
                new Option <string>(
                    "--documentFormat",
                    description: "The document format for which the training assets should be uploaded"),

                new Option <string>(
                    "--localPath",
                    description: "The local folder containing the training assets"),
                new Option <string>(
                    "--blobContainer",
                    description: "The name of the blob container where the assets should be uploaded"),
                new Option <string>(
                    "--blobContainerFolder",
                    getDefaultValue: () => null,
                    description: "The anme of a folder within the blob container where the assets should be uploaded"),
            };

            rootCommand.Description = "This command uploads a set of model training assets for a document format (e.g. phoenix) from a local directory to Azure blob storage.  " +
                                      "This triggers a model training run in azure.  A new model is created based on the assets and a record of the new model is kept in the ModelTraining table in the " +
                                      "database.  This new model becomes the latest model for that document format and is then used by the rcognizer component while processing future documents";
            try
            {
                rootCommand.Handler = CommandHandler.Create <string, string, string, string>(async(documentFormat, localPath, blobContainer, blobContainerFolder) =>
                {
                    try
                    {
                        Console.WriteLine($"The value for --documentFormat is: {documentFormat}");
                        if (string.IsNullOrEmpty(documentFormat))
                        {
                            throw new Exception($"--documentFormat {documentFormat} must be provided");
                        }

                        Console.WriteLine($"The value for --localPath is: {localPath}");
                        if (string.IsNullOrEmpty(localPath))
                        {
                            throw new Exception($"--localPath {localPath} must be provided");
                        }

                        Console.WriteLine($"The value for --blobContainer is: {blobContainer}");
                        if (string.IsNullOrEmpty(blobContainer))
                        {
                            throw new Exception($"--blobContainer {blobContainer} must be provided");
                        }

                        Console.WriteLine($"The value for --blobContainerFolder is: {blobContainerFolder}");
                        if (string.IsNullOrEmpty(blobContainerFolder))
                        {
                            throw new Exception($"--blobContainerFolder {blobContainerFolder} must be provided");
                        }

                        if (!Directory.Exists(localPath))
                        {
                            throw new Exception($"--localPath {localPath} does not exist or is not a directory");
                        }

                        // Get hold of the storage account
                        CloudStorageAccount storageAccount = null;
                        try
                        {
                            storageAccount = CloudStorageAccount.Parse(storageConnectionString);

                            var targetBlobClient = storageAccount.CreateCloudBlobClient();
                            var targetContainer  = targetBlobClient.GetContainerReference(blobContainer);
                            await targetContainer.CreateIfNotExistsAsync();
                            var directory = targetContainer.GetDirectoryReference(blobContainerFolder);

                            BlobResultSegment resultSegment         = null;
                            BlobContinuationToken continuationToken = null;

                            do
                            {
                                resultSegment = await directory.ListBlobsSegmentedAsync(true, BlobListingDetails.All, 50, continuationToken, null, null);
                                if (resultSegment.Results.Count() > 0)
                                {
                                    Console.WriteLine($"Container already contains {resultSegment.Results.Count()} blobs - they will be deleted");
                                }
                                foreach (var blob in resultSegment.Results)
                                {
                                    try
                                    {
                                        var blobToDelete = directory.GetBlockBlobReference(blob.Uri.ToString());
                                        await blobToDelete.DeleteIfExistsAsync();
                                        Console.WriteLine($"Deleted blob: {blobToDelete.Name}");
                                    }
                                    catch (Exception e)
                                    {
                                        Console.WriteLine("Unable to delete blob {blob.Uri.ToString()}");
                                    }
                                }

                                // Get the continuation token. If not null, get the next segment.
                                continuationToken = resultSegment.ContinuationToken;
                            } while (continuationToken != null);

                            string[] fileEntries  = Directory.GetFiles(localPath);
                            Stopwatch innnerTimer = new Stopwatch();
                            Stopwatch outerTimer  = new Stopwatch();
                            outerTimer.Start();
                            int i = 0;
                            foreach (string fileName in fileEntries)
                            {
                                FileInfo f = new FileInfo(fileName);
                                innnerTimer.Reset();
                                innnerTimer.Start();
                                using FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
                                var blobToUpload    = directory.GetBlockBlobReference(f.Name);
                                await blobToUpload.UploadFromStreamAsync(fs);
                                i++;
                                innnerTimer.Stop();

                                Console.WriteLine($"Uploaded file {f.Name} to container {targetContainer.Name} in {innnerTimer.ElapsedMilliseconds} ms");
                            }
                            outerTimer.Stop();
                            Console.WriteLine($"Uploaded {i} files to container {targetContainer.Name} in {outerTimer.ElapsedMilliseconds} ms");
                            var policy = new SharedAccessBlobPolicy
                            {
                                Permissions            = SharedAccessBlobPermissions.List | SharedAccessBlobPermissions.Read,
                                SharedAccessStartTime  = DateTime.UtcNow.AddMinutes(-15),
                                SharedAccessExpiryTime = DateTime.UtcNow.AddMinutes(120)
                            };

                            var targetContainerToken = targetContainer.GetSharedAccessSignature(policy);
                            var targetContainerSAS   = string.Format("{0}{1}", targetContainer.Uri, targetContainerToken);
                            Console.WriteLine($"targetContainerSAS={targetContainerSAS}");

                            TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage
                            {
                                BlobFolderName    = blobContainerFolder,
                                BlobSasUrl        = targetContainerSAS,
                                DocumentFormat    = documentFormat,
                                IncludeSubFolders = "false",
                                UseLabelFile      = "true"
                            };

                            CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient();

                            // Retrieve a reference to a container.
                            CloudQueue queue = queueClient.GetQueueReference(trainingQueueName);

                            // Create the queue if it doesn't already exist
                            await queue.CreateIfNotExistsAsync();

                            CloudQueueMessage message = new CloudQueueMessage(JsonConvert.SerializeObject(trainingRequestMessage));
                            await queue.AddMessageAsync(message);
                        }
                        catch (Exception e)
                        {
                            throw;
                        }


                        Console.WriteLine("done.");
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                    }
                }
                                                                                             );
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
                return(-1);
            }
            return(rootCommand.InvokeAsync(args).Result);
        }
예제 #6
0
        static async Task <int> Main(string[] args)
        {
            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.local.json", optional: true, reloadOnChange: true);

            IConfigurationRoot configuration = builder.Build();

            storageConnectionString = configuration["storageConnectionString"];
            trainingQueueName       = configuration["TrainingQueueName"];
            var shortStorageConnectionString = storageConnectionString.Substring(0, 50);

            Console.WriteLine($"Storage connection string = {shortStorageConnectionString}");

            Console.WriteLine($"Training Queue Name = {trainingQueueName}");

            var rootCommand = new RootCommand
            {
                new Option <string>(
                    "--documentFormat",
                    description: "The document format for which the training assets should be uploaded"),
                new Option <string>(
                    "--labellingContainerSasUrl",
                    description: "SAS Token for container that holds the folder with the labelling and training assets"),
                new Option <string>(
                    "--blobContainerFolder",
                    getDefaultValue: () => null,
                    description: "The name of a folder within the blob container where the assets from the labelling tool aare stored"),
            };

            rootCommand.Description = "This command triggers training of a forms recognizer model, based on assets produced by the labelling tool and stored in a folder in blob storage.";
            try
            {
                rootCommand.Handler = CommandHandler.Create <string, string, string>(async(documentFormat, labellingContainerSasUrl, blobContainerFolder) =>
                {
                    try
                    {
                        Console.WriteLine($"The value for --documentFormat is: {documentFormat}");
                        if (string.IsNullOrEmpty(documentFormat))
                        {
                            throw new Exception($"--documentFormat {documentFormat} must be provided");
                        }


                        Console.WriteLine($"The value for --labellingContainerSasUrl is: {labellingContainerSasUrl}");
                        if (string.IsNullOrEmpty(labellingContainerSasUrl))
                        {
                            throw new Exception($"--blobContainer {labellingContainerSasUrl} must be provided");
                        }

                        Console.WriteLine($"The value for --blobContainerFolder is: {blobContainerFolder}");
                        if (string.IsNullOrEmpty(blobContainerFolder))
                        {
                            throw new Exception($"--blobContainerFolder {blobContainerFolder} must be provided");
                        }

                        // Get hold of the storage account
                        CloudStorageAccount storageAccount = null;
                        try
                        {
                            storageAccount = CloudStorageAccount.Parse(storageConnectionString);
                            TrainingRequestMessage trainingRequestMessage = new TrainingRequestMessage
                            {
                                BlobFolderName    = blobContainerFolder,
                                BlobSasUrl        = labellingContainerSasUrl,
                                DocumentFormat    = documentFormat,
                                IncludeSubFolders = "false",
                                UseLabelFile      = "true"
                            };

                            CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient();

                            // Retrieve a reference to a container.
                            CloudQueue queue = queueClient.GetQueueReference(trainingQueueName);

                            // Create the queue if it doesn't already exist
                            await queue.CreateIfNotExistsAsync();

                            CloudQueueMessage message = new CloudQueueMessage(JsonConvert.SerializeObject(trainingRequestMessage));
                            await queue.AddMessageAsync(message);
                        }
                        catch (Exception e)
                        {
                            throw;
                        }


                        Console.WriteLine("done.");
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                    }
                }
                                                                                     );
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
                return(-1);
            }
            return(rootCommand.InvokeAsync(args).Result);
        }