Beispiel #1
0
        public async Task TriggerProcessDocument([ServiceBusTrigger("%IncomingDocumentsQueue%", Connection = "IncomingDocumentServiceBusConnectionString")] Message message, [DurableClient] IDurableOrchestrationClient starter, ILogger log, ExecutionContext ec)
        {
            log.LogInformation($"{ec.FunctionName} function was triggered by receipt of service bus message {message.MessageId}");
            var    activity = message.ExtractActivity();
            string payload  = System.Text.Encoding.UTF8.GetString(message.Body);
            var    body     = JObject.Parse(payload);

            if (!CanProcessMessage(message.MessageId, body, log))
            {
                log.LogWarning($"Message {message.MessageId} was ignored!!  Please see previous log items for reasons.");
                return;
            }

            HorusSql.CheckAndCreateDatabaseIfNecessary(log);

            string blobUrl     = body["data"]["url"].ToString();
            string contentType = body["data"]["contentType"].ToString();

            var job = new DocumentProcessingJob {
                StagingBlobUrl = blobUrl, ContentType = contentType
            };
            string orchestrationId = await starter.StartNewAsync("DocumentProcessor", null, job);

            log.LogInformation($"{ec.FunctionName} processed message {message.MessageId}.  Orchestration {orchestrationId} will process document: {blobUrl}");
        }
Beispiel #2
0
        public async override Task <DocumentProcessingJob> Save(DocumentProcessingJob job, ILogger log, string snip)
        {
            var documentBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.DocumentBlobUrl));

            string documentBlobContents;

            using (var memoryStream = new MemoryStream())
            {
                await documentBlob.DownloadToStreamAsync(memoryStream);

                documentBlobContents = Encoding.UTF8.GetString(memoryStream.ToArray());
            }

            var      document = JsonConvert.DeserializeObject <Document>(documentBlobContents);
            Database database = await cosmosClient.CreateDatabaseIfNotExistsAsync(cosmosDatabaseId);

            ContainerProperties containerProperties = new ContainerProperties(cosmosContainerId, partitionKeyPath: "/Account");
            Container           container           = await database.CreateContainerIfNotExistsAsync(
                containerProperties,
                throughput : 400);

            _ = await container.CreateItemAsync(document, new PartitionKey(document.Account),
                                                new ItemRequestOptions()
            {
                EnableContentResponseOnWrite = false
            });

            log.LogDebug($"{snip} document {document.DocumentNumber} was saved to Cosmos - database={cosmosDatabaseId}, container={cosmosContainerId})");
            return(job);
        }
Beispiel #3
0
        public async Task <DocumentProcessingJob> Processor([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip   = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            var engine = (IProcessingEngine)EngineFactory.GetEngine(processingEngineAssembly, processingEngineType);

            log.LogDebug($"{snip} Processing engine: {engine.GetType()}");
            var    document          = engine.Process(job, log, snip);
            string documentForOutput = "********";

            if (!string.IsNullOrEmpty(document.DocumentNumber))
            {
                documentForOutput = document.DocumentNumber;
            }
            log.LogDebug($"Orchestration {job.OrchestrationId}: {ec.FunctionName} - Document {documentForOutput} was parsed form recognizer output in {document.TimeToShred} ms");
            var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName);
            var documentBlobName       = $"{job.OrchestrationBlobName}{BaseConstants.DocumentExtension}";

            job.DocumentName = documentBlobName;
            var documentBlob = orchestrationContainer.GetBlockBlobReference(documentBlobName);
            await documentBlob.UploadTextAsync(JsonConvert.SerializeObject(document));

            job.DocumentBlobUrl = documentBlob.Uri.ToString();
            log.LogInformation($"{snip} - Completed successfully");
            return(job);
        }
Beispiel #4
0
        public async Task <DocumentProcessingJob> CheckRecognizerStatus([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var     snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            string  responseBody;
            JObject jsonContent;
            string  jobStatus;

            client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", recognizerApiKey);
            var response = await client.GetAsync(job.RecognizerStatusUrl);

            if (response.IsSuccessStatusCode)
            {
                responseBody = response.Content.ReadAsStringAsync().Result;
                jsonContent  = JObject.Parse(responseBody);
                if (jsonContent["status"] != null)
                {
                    jobStatus = jsonContent["status"].ToString();
                    if (jobStatus == "succeeded")
                    {
                        job.RecognizerResponse = responseBody;
                    }
                    job.LatestRecognizerStatus = jobStatus;
                }
                else
                {
                    throw new Exception($"{snip} Hmmmmmnn?  Checking analysis progress.  Get request was sucessful, but status element is null?");
                }
            }
            else
            {
                throw new Exception($"{snip} That didnt work.  Trying to submit image for analysis {job.RecognizerStatusUrl} Response:{response.StatusCode.ToString()}");
            }
            log.LogInformation($"{snip} - Job Status: {jobStatus}");
            return(job);
        }
Beispiel #5
0
        public async Task <DocumentProcessingJob> StartPreprocessor([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            var orchestrationContainer = orchestrationBlobClient.GetContainerReference($"{job.OrchestrationId}");

            job.OrchestrationContainerName = orchestrationContainer.Name;
            await orchestrationContainer.CreateIfNotExistsAsync();

            log.LogTrace($"orchestrationContainerName={job.OrchestrationContainerName}");

            var stagingBlobUri       = new Uri(job.StagingBlobUrl);
            var stagingContainerName = stagingBlobUri.Segments[1].Split('/')[0];

            job.DocumentFormat = stagingContainerName;
            var    stagingContainer      = stagingBlobClient.GetContainerReference(stagingContainerName);
            string orchestrationBlobName = $"{stagingContainerName}-{Uri.UnescapeDataString(stagingBlobUri.Segments.Last())}";

            job.OrchestrationBlobName = orchestrationBlobName;
            log.LogTrace($"orchestrationBlobName={job.OrchestrationBlobName}");
            var orchestrationBlob = orchestrationContainer.GetBlockBlobReference(orchestrationBlobName);

            job.OrchestrationBlobUrl = orchestrationBlob.Uri.ToString();
            var stagingBlob = await stagingBlobClient.GetBlobReferenceFromServerAsync(stagingBlobUri);

            await orchestrationBlob.StartCopyAsync(new Uri(GetSharedAccessUri(stagingBlob.Name, stagingContainer)));

            log.LogInformation($"{snip} - Completed successfully");
            return(job);
        }
Beispiel #6
0
        public async Task <DocumentProcessingJob> PreprocessorCompleted([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip        = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            var stagingBlob = await stagingBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.StagingBlobUrl));

            await stagingBlob.DeleteAsync();

            log.LogDebug($"{snip} staging blob with Uri={job.StagingBlobUrl} was deleted.");
            log.LogInformation($"{snip} Completed successfully");
            return(job);
        }
Beispiel #7
0
        public async Task <DocumentProcessingJob> Persistor([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip   = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - ";
            var engine = (IPersistenceEngine)EngineFactory.GetEngine(persistenceEngineAssembly, persistenceEngineType);

            log.LogDebug($"{snip} Persistence engine: {engine.GetType()}");
            await engine.Save(job, log, snip);

            log.LogInformation($"{snip} - Completed successfully");
            return(job);
        }
Beispiel #8
0
        public async Task <DocumentProcessingJob> Integrator([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip   = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - ";
            var engine = (IIntegrationEngine)EngineFactory.GetEngine(integrationEngineAssembly, integrationEngineType);

            log.LogDebug($"{snip} Integration engine: {engine.GetType()}");
            var retval = await engine.Integrate(job, log, snip);

            log.LogTrace(retval.ToString());
            log.LogInformation($"{snip} - Completed successfully");
            return(job);
        }
Beispiel #9
0
        public async Task <DocumentProcessingJob> Finalizer([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - ";
            var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName);
            var jobBlobName            = $"{job.OrchestrationBlobName}{BaseConstants.ProcessingJobFileExtension}";

            job.JobBlobName = jobBlobName;
            var jobBlob = orchestrationContainer.GetBlockBlobReference(jobBlobName);
            await jobBlob.UploadTextAsync(JsonConvert.SerializeObject(job));

            log.LogInformation($"{snip} Completed successfully");
            return(job);
        }
Beispiel #10
0
        public async Task <DocumentProcessingJob> RecognizerCompleted([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName);
            var recognizedBlobName     = $"{job.OrchestrationBlobName}{BaseConstants.RecognizedExtension}";

            job.RecognizedBlobName = recognizedBlobName;
            var recognizedBlob = orchestrationContainer.GetBlockBlobReference(recognizedBlobName);
            await recognizedBlob.UploadTextAsync(job.RecognizerResponse);

            log.LogInformation($"{snip} - Completed successfully");
            return(job);
        }
Beispiel #11
0
        public async Task <string> CheckPreprocessorStatus([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var    snip              = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            string jobStatus         = "Pending";
            var    orchestrationBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.OrchestrationBlobUrl));

            await orchestrationBlob.FetchAttributesAsync();

            if (orchestrationBlob.CopyState.Status == CopyStatus.Pending)
            {
                jobStatus = "Pending";
            }
            if (orchestrationBlob.CopyState.Status == CopyStatus.Success)
            {
                jobStatus = "Completed";
            }
            log.LogInformation($"{snip} Job Status: {jobStatus}");
            return(jobStatus);
        }
Beispiel #12
0
        public async override Task <DocumentProcessingJob> Save(DocumentProcessingJob job, ILogger log, string snip)
        {
            var documentBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.DocumentBlobUrl));

            string documentBlobContents;

            using (var memoryStream = new MemoryStream())
            {
                await documentBlob.DownloadToStreamAsync(memoryStream);

                documentBlobContents = Encoding.UTF8.GetString(memoryStream.ToArray());
            }

            var document = JsonConvert.DeserializeObject <Document>(documentBlobContents);

            HorusSql.SaveDocument(document, log);
            var results = $"{snip} document {document.DocumentNumber} was saved to SQL";

            log.LogDebug(results);
            return(job);
        }
Beispiel #13
0
        public async Task <DocumentProcessingJob> ProcessingErrorHandler([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            try
            {
                var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
                var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName);
                await orchestrationContainer.CreateIfNotExistsAsync();

                var jobBlobName = $"{job.DocumentFormat}{BaseConstants.TrainingJobFileExtension}";
                var jobBlob     = orchestrationContainer.GetBlockBlobReference(jobBlobName);
                await jobBlob.UploadTextAsync(JsonConvert.SerializeObject(job));

                var exceptionBlobName = $"{job.DocumentFormat}{BaseConstants.ExceptionExtension}";
                var exceptionBlob     = orchestrationContainer.GetBlockBlobReference(exceptionBlobName);
                await exceptionBlob.UploadTextAsync(JsonConvert.SerializeObject(job.Exception));

                log.LogInformation($"{snip} - Exception Handled - Exception of Type {job.Exception.GetType()} added to blob {job.JobBlobName} was uploaded to container{job.OrchestrationContainerName}");
                return(job);
            }
            catch (Exception ex)
            {
                throw new HorusTerminalException(ex);
            }
        }
Beispiel #14
0
        public async Task <DocumentProcessingJob> StartRecognizer([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip  = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - ";
            var model = HorusSql.GetModelIdByDocumentFormat(job.DocumentFormat);

            job.Model = model;
            log.LogTrace($"{snip} Document Name={job.DocumentName}, Format={job.DocumentFormat}, Model={model.ModelId}, Version={model.ModelVersion}");

            var queryString = HttpUtility.ParseQueryString(string.Empty);

            queryString["includeTextDetails"] = "True";
            var uri = $"{recognizerServiceBaseUrl}{BaseConstants.FormRecognizerApiPath}/{model.ModelId}/{BaseConstants.FormRecognizerAnalyzeVerb}?{queryString}";

            log.LogTrace($"{snip} Recognizer Uri={uri}");

            HttpResponseMessage response;

            byte[] image             = null;
            byte[] md5hash           = null;
            var    orchestrationBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.OrchestrationBlobUrl));

            using (var memoryStream = new MemoryStream())
            {
                await orchestrationBlob.DownloadToStreamAsync(memoryStream);

                image = memoryStream.ToArray();
                using (var md5 = MD5.Create())
                {
                    memoryStream.Position = 0;
                    md5hash = md5.ComputeHash(memoryStream);
                }
            }
            job.Thumbprint = BitConverter.ToString(md5hash).Replace("-", " ");
            log.LogTrace($"{snip} Orchestration Blob={job.OrchestrationBlobName} downloaded.  Thumbprint={job.Thumbprint}");

            client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", recognizerApiKey);
            using (var postContent = new ByteArrayContent(image))
            {
                postContent.Headers.ContentType = new MediaTypeHeaderValue(job.ContentType);
                response = await client.PostAsync(uri, postContent);
            }

            string getUrl = "";

            if (response.IsSuccessStatusCode)
            {
                log.LogTrace($"{snip} Recognition request successful.");
                HttpHeaders headers = response.Headers;
                if (headers.TryGetValues("operation-location", out IEnumerable <string> values))
                {
                    getUrl = values.First();
                    log.LogTrace($"{snip} Recognition progress can be tracked at {getUrl}");
                }
            }
            else
            {
                log.LogTrace($"{snip} Recognition request unsuccessful.");
                throw new Exception($"{snip} That didnt work.  Trying to submit image for analysis {uri} Content:{response.Content.ReadAsStringAsync().Result}");
            }

            job.RecognizerStatusUrl = getUrl;
            log.LogInformation($"{snip} Completed successfully");
            return(job);
        }
Beispiel #15
0
 public abstract Document Process(DocumentProcessingJob job, ILogger log, string snip);
Beispiel #16
0
        public override Document Process(DocumentProcessingJob job, ILogger log, string snip)
        {
            Stopwatch timer = new Stopwatch();

            timer.Start();
            Document document = new Document {
                FileName = job.JobBlobName
            };

            document.UniqueRunIdentifier = job.OrchestrationId;
            document.FileName            = job.OrchestrationBlobName;
            JObject jsonContent = JObject.Parse(job.RecognizerResponse);

            if (jsonContent["status"] != null)
            {
                document.RecognizerStatus = jsonContent["status"].ToString();
            }
            if (jsonContent["errors"] != null)
            {
                document.RecognizerErrors = jsonContent["errors"].ToString();
            }

            // Fill out the document object
            var nittyGritty = (JObject)jsonContent["analyzeResult"]["documentResults"][0]["fields"];
            var temp        = nittyGritty.ToString();

            document.ShreddingUtcDateTime = DateTime.Now;
            document.OrderNumber          = GetString(HorusConstants.OrderNumber, nittyGritty, document);
            document.OrderDate            = GetDate(HorusConstants.OrderDate, nittyGritty, document);
            document.TaxDate        = GetDate(HorusConstants.TaxDate, nittyGritty, document);
            document.DocumentNumber = GetString(HorusConstants.InvoiceNumber, nittyGritty, document);
            document.Account        = GetString(HorusConstants.Account, nittyGritty, document);
            document.NetTotal       = GetNumber(HorusConstants.NetTotal, nittyGritty, document) ?? 0;
            document.VatAmount      = GetNumber(HorusConstants.VatAmount, nittyGritty, document) ?? 0;
            document.ShippingTotal  = GetNumber(HorusConstants.ShippingTotal, nittyGritty, document) ?? 0;
            document.GrandTotal     = GetNumber(HorusConstants.GrandTotal, nittyGritty, document) ?? 0;
            document.PostCode       = GetString(HorusConstants.PostCode, nittyGritty, document);
            document.TimeToShred    = 0; // Set after processing complete
            document.Thumbprint     = job.Thumbprint;
            document.ModelId        = job.Model.ModelId;
            document.ModelVersion   = job.Model.ModelVersion.ToString();
            if (document.TaxDate != null && document.TaxDate.HasValue)
            {
                document.TaxPeriod = document.TaxDate.Value.Year.ToString() + document.TaxDate.Value.Month.ToString();
            }

            // Lines

            for (int i = 1; i < HorusConstants.MAX_DOCUMENT_LINES; i++)
            {
                var    lineNumber        = i.ToString("D2");
                string lineItemId        = $"{HorusConstants.LineItemPrefix}{lineNumber}";
                string unitPriceId       = $"{HorusConstants.UnitPricePrefix}{lineNumber}";
                string quantityId        = $"{HorusConstants.QuantityPrefix}{lineNumber}";
                string netPriceId        = $"{HorusConstants.NetPricePrefix}{lineNumber}";
                string vatCodeId         = $"{HorusConstants.VatCodePrefix}{lineNumber}";
                string discountPercentId = $"{HorusConstants.DiscountPercentPrefix}{lineNumber}";
                string taxableId         = $"{HorusConstants.TaxablePrefix}{lineNumber}";

                // presence of any one of the following items will mean the document line is considered to exist.
                string[] elements = { unitPriceId, netPriceId, lineItemId };

                if (AnyElementsPresentForThisLine(nittyGritty, lineNumber, elements))
                {
                    log.LogTrace($"{snip}{lineItemId}: {GetString(lineItemId, nittyGritty, document)}");
                    DocumentLineItem lineItem = new DocumentLineItem();

                    // aid debug
                    string test = nittyGritty.ToString();
                    //
                    lineItem.ItemDescription    = GetString(lineItemId, nittyGritty, document, DocumentErrorSeverity.Terminal);
                    lineItem.DocumentLineNumber = lineNumber;
                    lineItem.LineQuantity       = GetNumber(quantityId, nittyGritty, document).ToString();
                    lineItem.NetAmount          = GetNumber(netPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                    lineItem.UnitPrice          = GetNumber(unitPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                    lineItem.VATCode            = GetString(vatCodeId, nittyGritty, document, DocumentErrorSeverity.Warning);
                    lineItem.DiscountPercent    = GetNumber(discountPercentId, nittyGritty, document, DocumentErrorSeverity.Warning) ?? 0;
                    lineItem.Taxableindicator   = GetString(taxableId, nittyGritty, document, DocumentErrorSeverity.Warning);
                    document.LineItems.Add(lineItem);
                }
                else
                {
                    break;
                }
            }

            timer.Stop();
            document.TimeToShred = timer.ElapsedMilliseconds;
            return(document);
        }
Beispiel #17
0
 public async override Task <DocumentProcessingJob> Integrate(DocumentProcessingJob job, ILogger log, string snip)
 {
     job.Notes.Add("Note added by Horus Integration Engine");
     log.LogDebug($"{snip} HorusIntegrationEngine doesn't do anything, but you can develop your own and plug it in (e.g. send a service bus message, call a logic app)?");
     return(job);
 }
Beispiel #18
0
 public abstract Task <DocumentProcessingJob> Save(DocumentProcessingJob job, ILogger log, string snip);