public async Task TriggerProcessDocument([ServiceBusTrigger("%IncomingDocumentsQueue%", Connection = "IncomingDocumentServiceBusConnectionString")] Message message, [DurableClient] IDurableOrchestrationClient starter, ILogger log, ExecutionContext ec) { log.LogInformation($"{ec.FunctionName} function was triggered by receipt of service bus message {message.MessageId}"); var activity = message.ExtractActivity(); string payload = System.Text.Encoding.UTF8.GetString(message.Body); var body = JObject.Parse(payload); if (!CanProcessMessage(message.MessageId, body, log)) { log.LogWarning($"Message {message.MessageId} was ignored!! Please see previous log items for reasons."); return; } HorusSql.CheckAndCreateDatabaseIfNecessary(log); string blobUrl = body["data"]["url"].ToString(); string contentType = body["data"]["contentType"].ToString(); var job = new DocumentProcessingJob { StagingBlobUrl = blobUrl, ContentType = contentType }; string orchestrationId = await starter.StartNewAsync("DocumentProcessor", null, job); log.LogInformation($"{ec.FunctionName} processed message {message.MessageId}. Orchestration {orchestrationId} will process document: {blobUrl}"); }
private void CheckAndCreateDatabaseIfNecessary(ILogger log) { log.LogTrace($"Checking if processing database has been initialised"); HorusSql.CheckAndCreateDatabaseIfNecessary(log); using (SqlConnection connection = new SqlConnection(scoresSQLConnectionString)) { connection.Open(); SqlCommand command = connection.CreateCommand(); SqlDataReader reader; command.Connection = connection; log.LogTrace($"Checking if scores database has been initialised"); command.CommandText = "select name from sysobjects where name = 'ScoreSummary'"; using (reader = command.ExecuteReader()) { if (reader.HasRows) { log.LogTrace("Table ScoreSummary exists no need to create database tables"); return; } } log.LogInformation($"Creating tables in {connection.Database} database .."); SqlTransaction transaction = connection.BeginTransaction("InitializeDatabase"); command.Transaction = transaction; var commandStr = "If not exists (select name from sysobjects where name = 'GeneratedDocuments')" + "CREATE TABLE[dbo].[GeneratedDocuments]([Id][int] IDENTITY(1, 1) NOT NULL, [Account] [nvarchar](50) NULL, [SingleName] [nvarchar](50) NULL, [AddressLine1] [nvarchar](50) NULL, [AddressLine2] [nvarchar](50) NULL, " + "[PostalCode] [nvarchar](50) NULL, [City] [nvarchar](50) NULL, [Notes] [nvarchar](50) NULL, [DocumentNumber] [nvarchar](50) NOT NULL, [FileName] [nvarchar](50) NULL, [DocumentFormat] [nvarchar](50) NULL, " + "[DocumentDate] [datetime2](7) NULL, [PreTaxTotalValue] [decimal](19, 5) NULL, [TaxTotalValue] [decimal](19, 5) NULL, [ShippingTotalValue] [decimal](19, 5) NULL, [GrandTotalValue] [decimal](19, 5) NULL, [LineNumber] [nvarchar](5) NOT NULL, " + "[Title] [nvarchar](50) NULL, [Author] [nvarchar](50) NULL, [Isbn] [nvarchar](50) NULL, [Quantity] [decimal](19, 5) NULL, [Discount] [decimal](19, 5) NULL, [Price] [decimal](19, 5) NULL, [Taxable] [bit] NOT NULL, " + "[GoodsValue] [decimal](19, 5) NULL, [DiscountValue] [decimal](19, 5) NULL, [DiscountedGoodsValue] [decimal](19, 5) NULL, [TaxableValue] [decimal](19, 5) NULL "+ "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]"; command.CommandText = commandStr; command.ExecuteNonQuery(); log.LogTrace($"Table GeneratedDocuments was created."); commandStr = "If not exists (select name from sysobjects where name = 'ScoreSummary')" + "CREATE TABLE[dbo].[ScoreSummary]([Id][int] IDENTITY(1, 1) NOT NULL, " + "[Team] [nvarchar](50) NOT NULL, [TotalScore][int] NOT NULL, [InspectionTime] [datetime2](7) NOT NULL " + "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]"; command.CommandText = commandStr; command.ExecuteNonQuery(); log.LogTrace($"Table ScoreSummary was created."); commandStr = "If not exists (select name from sysobjects where name = 'ScoreDetail')" + "CREATE TABLE[dbo].[ScoreDetail]([Id][int] IDENTITY(1, 1) NOT NULL, " + "[Team][nvarchar](50) NOT NULL, [InspectionTime] [datetime2](7) NOT NULL, [Type] [nvarchar](50) NOT NULL, [Notes] [nvarchar] (max)NULL, [Score] [int] NOT NULL, [Status] [nvarchar](15) NOT NULL " + "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]"; command.CommandText = commandStr; command.ExecuteNonQuery(); log.LogTrace($"Table ScoreDetail was created."); transaction.Commit(); } }
private async Task <List <ScoreRecord> > CountProcessedDocuments(ILogger log) { var results = new List <ScoreRecord>(); log.LogTrace($"Checking for documents in SQL database"); int numDocs = HorusSql.GetDocumentCount(log); log.LogTrace($"{numDocs} documents have been analysed and saved to SQL"); results.Add(new ScoreRecord { Type = $"Processing", Notes = $"{numDocs} documents were detected in SQL database (3 points each)", Score = numDocs * 3 }); return(results); }
public async Task <ModelTrainingJob> TrainingCompleted([ActivityTrigger] ModelTrainingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec) { var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -"; var mtr = HorusSql.UpdateModelTraining(job, log); job.ModelVersion = mtr.ModelVersion.ToString(); var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName); await orchestrationContainer.CreateIfNotExistsAsync(); var jobBlobName = $"{job.DocumentFormat}{BaseConstants.TrainingJobFileExtension}"; var jobBlob = orchestrationContainer.GetBlockBlobReference(jobBlobName); await jobBlob.UploadTextAsync(JsonConvert.SerializeObject(job)); log.LogInformation($"{snip} - Completed successfully - Job blob {job.JobBlobName} was uploaded to container {job.OrchestrationContainerName}"); return(job); }
public async Task TriggerTrainModel([ServiceBusTrigger("%TrainingQueue%", Connection = "IncomingDocumentServiceBusConnectionString")] Message message, [DurableClient] IDurableOrchestrationClient starter, ILogger log, ExecutionContext ec) { log.LogInformation($"{ec.FunctionName} function was triggered by receipt of service bus message {message.MessageId}"); string payload = System.Text.Encoding.UTF8.GetString(message.Body); var trm = JsonConvert.DeserializeObject <TrainingRequestMessage>(payload); HorusSql.CheckAndCreateDatabaseIfNecessary(log); var job = new ModelTrainingJob { BlobFolderName = trm.BlobFolderName, BlobSasUrl = trm.BlobSasUrl, DocumentFormat = trm.DocumentFormat, IncludeSubFolders = "false", UseLabelFile = "true" }; string orchestrationId = await starter.StartNewAsync("ModelTrainer", null, job); log.LogInformation($"{ec.FunctionName} processed message {message.MessageId}. Orchestration {orchestrationId}"); }
public async override Task <DocumentProcessingJob> Save(DocumentProcessingJob job, ILogger log, string snip) { var documentBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.DocumentBlobUrl)); string documentBlobContents; using (var memoryStream = new MemoryStream()) { await documentBlob.DownloadToStreamAsync(memoryStream); documentBlobContents = Encoding.UTF8.GetString(memoryStream.ToArray()); } var document = JsonConvert.DeserializeObject <Document>(documentBlobContents); HorusSql.SaveDocument(document, log); var results = $"{snip} document {document.DocumentNumber} was saved to SQL"; log.LogDebug(results); return(job); }
private async Task <List <ScoreRecord> > InspectModelRegistration() { var results = new List <ScoreRecord>(); log.LogTrace($"Checking that a Model has been registered for each document type"); var documentTypesForChallenge = Environment.GetEnvironmentVariable("DocumentTypesForChallenge").Split(',').ToList(); foreach (var documentType in documentTypesForChallenge) { log.LogTrace($"Checking {documentType}"); var mtr = HorusSql.GetModelIdByDocumentFormat(documentType); if (mtr.DocumentFormat != null) { log.LogTrace($"Model {mtr.ModelId} has been registered for {documentType}"); results.Add(new ScoreRecord { Type = $"Training", Notes = $"{mtr.ModelId} has been registered for document type {documentType}", Score = 500 }); } } return(results); }
public async Task <DocumentProcessingJob> StartRecognizer([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec) { var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - "; var model = HorusSql.GetModelIdByDocumentFormat(job.DocumentFormat); job.Model = model; log.LogTrace($"{snip} Document Name={job.DocumentName}, Format={job.DocumentFormat}, Model={model.ModelId}, Version={model.ModelVersion}"); var queryString = HttpUtility.ParseQueryString(string.Empty); queryString["includeTextDetails"] = "True"; var uri = $"{recognizerServiceBaseUrl}{BaseConstants.FormRecognizerApiPath}/{model.ModelId}/{BaseConstants.FormRecognizerAnalyzeVerb}?{queryString}"; log.LogTrace($"{snip} Recognizer Uri={uri}"); HttpResponseMessage response; byte[] image = null; byte[] md5hash = null; var orchestrationBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.OrchestrationBlobUrl)); using (var memoryStream = new MemoryStream()) { await orchestrationBlob.DownloadToStreamAsync(memoryStream); image = memoryStream.ToArray(); using (var md5 = MD5.Create()) { memoryStream.Position = 0; md5hash = md5.ComputeHash(memoryStream); } } job.Thumbprint = BitConverter.ToString(md5hash).Replace("-", " "); log.LogTrace($"{snip} Orchestration Blob={job.OrchestrationBlobName} downloaded. Thumbprint={job.Thumbprint}"); client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", recognizerApiKey); using (var postContent = new ByteArrayContent(image)) { postContent.Headers.ContentType = new MediaTypeHeaderValue(job.ContentType); response = await client.PostAsync(uri, postContent); } string getUrl = ""; if (response.IsSuccessStatusCode) { log.LogTrace($"{snip} Recognition request successful."); HttpHeaders headers = response.Headers; if (headers.TryGetValues("operation-location", out IEnumerable <string> values)) { getUrl = values.First(); log.LogTrace($"{snip} Recognition progress can be tracked at {getUrl}"); } } else { log.LogTrace($"{snip} Recognition request unsuccessful."); throw new Exception($"{snip} That didnt work. Trying to submit image for analysis {uri} Content:{response.Content.ReadAsStringAsync().Result}"); } job.RecognizerStatusUrl = getUrl; log.LogInformation($"{snip} Completed successfully"); return(job); }
private async Task <List <ScoreRecord> > CheckIndividualDocuments(ILogger log) { var results = new List <ScoreRecord>(); log.LogTrace($"Checking accuracy of document recognition"); log.LogTrace($"Reading expected results from SQL Database"); var checks = new List <DocumentCheckRequest>(); using (SqlConnection connection = new SqlConnection(scoresSQLConnectionString)) { connection.Open(); SqlCommand command = connection.CreateCommand(); command.Connection = connection; try { string previousDocumentFormat = ""; string previousDocumentNumber = ""; command.CommandText = "SELECT * FROM [dbo].[GeneratedDocuments] order by DocumentFormat, DocumentNumber, LineNumber"; SqlDataReader reader = command.ExecuteReader(); try { DocumentCheckRequest checkRequest = null; while (reader.Read()) { bool newDocument = false; string currentDocumentFormat = (string)reader["DocumentFormat"]; string currentDocumentNumber = (string)reader["DocumentNumber"]; if (currentDocumentFormat != previousDocumentFormat) { newDocument = true; previousDocumentFormat = currentDocumentFormat; } if (currentDocumentNumber != previousDocumentNumber) { newDocument = true; previousDocumentNumber = currentDocumentNumber; } if (newDocument) { if (checkRequest != null) { checks.Add(checkRequest); } checkRequest = new DocumentCheckRequest { Account = (string)reader["Account"], DocumentNumber = (string)reader["DocumentNumber"], DocumentDate = (DateTime)reader["DocumentDate"], PostalCode = (string)reader["PostalCode"], GrandTotalValue = Convert.ToDouble(reader["GrandTotalValue"]), PreTaxTotalValue = Convert.ToDouble(reader["PreTaxTotalValue"]), ShippingTotalValue = Convert.ToDouble(reader["ShippingTotalValue"]), TaxTotalValue = Convert.ToDouble(reader["TaxTotalValue"]), FileName = (string)reader["FileName"], DocumentFormat = (string)reader["DocumentFormat"], }; } var line = new DocumentLineCheckRequest { Discount = Convert.ToDouble(reader["Discount"]), LineNumber = (string)reader["LineNumber"], ProductCode = (string)reader["Isbn"], ProductDescription = (string)reader["Title"], Price = Convert.ToDouble(reader["Price"]), Quantity = Convert.ToDouble(reader["Quantity"]), Taxable = (bool)reader["Taxable"], DiscountedGoodsValue = Convert.ToDouble(reader["DiscountedGoodsValue"]), DiscountValue = Convert.ToDouble(reader["DiscountValue"]), GoodsValue = Convert.ToDouble(reader["GoodsValue"]), TaxableValue = Convert.ToDouble(reader["TaxableValue"]), }; checkRequest.Lines.Add(line); } if (checkRequest != null) { checks.Add(checkRequest); } } catch (Exception e) { log.LogError(e.Message); } finally { reader.Close(); } } catch (Exception e) { log.LogError($"Exception prevented reading expected results from SQL database {connection.Database} Message is {e.Message}"); throw e; } log.LogInformation($"Expected Results read from SQL database {connection.Database}"); } log.LogInformation($"We will be checking the actual vs expected results for {checks.Count} documents"); foreach (var check in checks) { string fileName = $"{check.DocumentFormat}-{check.FileName}"; log.LogDebug($"Loading document {fileName} from processing database"); Document document = null; try { log.LogTrace($"Checking {fileName}"); document = HorusSql.LoadDocument(fileName, log); } catch (Exception) { log.LogWarning($"Unable to load document {fileName} from processing database."); continue; } if (document == null) { log.LogTrace($"Document {check.DocumentNumber} has not been processed sucessfully and will be skipped"); continue; } var checkResults = CompareActualWithExpectedResults(document, check, log); results.AddRange(checkResults); } return(results); }