Beispiel #1
0
        public async Task TriggerProcessDocument([ServiceBusTrigger("%IncomingDocumentsQueue%", Connection = "IncomingDocumentServiceBusConnectionString")] Message message, [DurableClient] IDurableOrchestrationClient starter, ILogger log, ExecutionContext ec)
        {
            log.LogInformation($"{ec.FunctionName} function was triggered by receipt of service bus message {message.MessageId}");
            var    activity = message.ExtractActivity();
            string payload  = System.Text.Encoding.UTF8.GetString(message.Body);
            var    body     = JObject.Parse(payload);

            if (!CanProcessMessage(message.MessageId, body, log))
            {
                log.LogWarning($"Message {message.MessageId} was ignored!!  Please see previous log items for reasons.");
                return;
            }

            HorusSql.CheckAndCreateDatabaseIfNecessary(log);

            string blobUrl     = body["data"]["url"].ToString();
            string contentType = body["data"]["contentType"].ToString();

            var job = new DocumentProcessingJob {
                StagingBlobUrl = blobUrl, ContentType = contentType
            };
            string orchestrationId = await starter.StartNewAsync("DocumentProcessor", null, job);

            log.LogInformation($"{ec.FunctionName} processed message {message.MessageId}.  Orchestration {orchestrationId} will process document: {blobUrl}");
        }
Beispiel #2
0
        private void CheckAndCreateDatabaseIfNecessary(ILogger log)
        {
            log.LogTrace($"Checking if processing database has been initialised");
            HorusSql.CheckAndCreateDatabaseIfNecessary(log);

            using (SqlConnection connection = new SqlConnection(scoresSQLConnectionString))
            {
                connection.Open();
                SqlCommand    command = connection.CreateCommand();
                SqlDataReader reader;
                command.Connection = connection;
                log.LogTrace($"Checking if scores database has been initialised");
                command.CommandText = "select name from sysobjects where name = 'ScoreSummary'";
                using (reader = command.ExecuteReader())
                {
                    if (reader.HasRows)
                    {
                        log.LogTrace("Table ScoreSummary exists no need to create database tables");
                        return;
                    }
                }

                log.LogInformation($"Creating tables in {connection.Database} database ..");
                SqlTransaction transaction = connection.BeginTransaction("InitializeDatabase");
                command.Transaction = transaction;

                var commandStr = "If not exists (select name from sysobjects where name = 'GeneratedDocuments')" +
                                 "CREATE TABLE[dbo].[GeneratedDocuments]([Id][int] IDENTITY(1, 1) NOT NULL, [Account] [nvarchar](50) NULL, [SingleName] [nvarchar](50) NULL, [AddressLine1] [nvarchar](50) NULL, [AddressLine2] [nvarchar](50) NULL, " +
                                 "[PostalCode] [nvarchar](50) NULL, [City] [nvarchar](50) NULL, [Notes] [nvarchar](50) NULL, [DocumentNumber] [nvarchar](50) NOT NULL, [FileName] [nvarchar](50) NULL, [DocumentFormat] [nvarchar](50) NULL, " +
                                 "[DocumentDate] [datetime2](7) NULL, [PreTaxTotalValue] [decimal](19, 5) NULL, [TaxTotalValue] [decimal](19, 5) NULL, [ShippingTotalValue] [decimal](19, 5) NULL, [GrandTotalValue]  [decimal](19, 5) NULL, [LineNumber] [nvarchar](5) NOT NULL, " +
                                 "[Title] [nvarchar](50) NULL, [Author] [nvarchar](50) NULL, [Isbn] [nvarchar](50) NULL, [Quantity] [decimal](19, 5) NULL, [Discount] [decimal](19, 5) NULL, [Price] [decimal](19, 5) NULL, [Taxable] [bit] NOT NULL, " +
                                 "[GoodsValue] [decimal](19, 5) NULL, [DiscountValue] [decimal](19, 5) NULL,	[DiscountedGoodsValue] [decimal](19, 5) NULL, [TaxableValue] [decimal](19, 5) NULL "+
                                 "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]";
                command.CommandText = commandStr;
                command.ExecuteNonQuery();
                log.LogTrace($"Table GeneratedDocuments was created.");

                commandStr = "If not exists (select name from sysobjects where name = 'ScoreSummary')" +
                             "CREATE TABLE[dbo].[ScoreSummary]([Id][int] IDENTITY(1, 1) NOT NULL, " +
                             "[Team] [nvarchar](50) NOT NULL, [TotalScore][int] NOT NULL, [InspectionTime] [datetime2](7) NOT NULL " +
                             "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]";
                command.CommandText = commandStr;
                command.ExecuteNonQuery();
                log.LogTrace($"Table ScoreSummary was created.");

                commandStr = "If not exists (select name from sysobjects where name = 'ScoreDetail')" +
                             "CREATE TABLE[dbo].[ScoreDetail]([Id][int] IDENTITY(1, 1) NOT NULL, " +
                             "[Team][nvarchar](50) NOT NULL, [InspectionTime] [datetime2](7) NOT NULL, [Type] [nvarchar](50) NOT NULL, [Notes] [nvarchar] (max)NULL, [Score] [int] NOT NULL, [Status] [nvarchar](15)  NOT NULL " +
                             "PRIMARY KEY CLUSTERED ([Id] ASC)WITH(STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON[PRIMARY]) ON[PRIMARY]";
                command.CommandText = commandStr;
                command.ExecuteNonQuery();
                log.LogTrace($"Table ScoreDetail was created.");

                transaction.Commit();
            }
        }
Beispiel #3
0
        private async Task <List <ScoreRecord> > CountProcessedDocuments(ILogger log)
        {
            var results = new List <ScoreRecord>();

            log.LogTrace($"Checking for documents in SQL database");
            int numDocs = HorusSql.GetDocumentCount(log);

            log.LogTrace($"{numDocs} documents have been analysed and saved to SQL");
            results.Add(new ScoreRecord {
                Type = $"Processing", Notes = $"{numDocs} documents were detected in SQL database (3 points each)", Score = numDocs * 3
            });
            return(results);
        }
Beispiel #4
0
        public async Task <ModelTrainingJob> TrainingCompleted([ActivityTrigger] ModelTrainingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} -";
            var mtr  = HorusSql.UpdateModelTraining(job, log);

            job.ModelVersion = mtr.ModelVersion.ToString();
            var orchestrationContainer = orchestrationBlobClient.GetContainerReference(job.OrchestrationContainerName);
            await orchestrationContainer.CreateIfNotExistsAsync();

            var jobBlobName = $"{job.DocumentFormat}{BaseConstants.TrainingJobFileExtension}";
            var jobBlob     = orchestrationContainer.GetBlockBlobReference(jobBlobName);
            await jobBlob.UploadTextAsync(JsonConvert.SerializeObject(job));

            log.LogInformation($"{snip} - Completed successfully - Job blob {job.JobBlobName} was uploaded to container {job.OrchestrationContainerName}");
            return(job);
        }
Beispiel #5
0
        public async Task TriggerTrainModel([ServiceBusTrigger("%TrainingQueue%", Connection = "IncomingDocumentServiceBusConnectionString")] Message message, [DurableClient] IDurableOrchestrationClient starter, ILogger log, ExecutionContext ec)
        {
            log.LogInformation($"{ec.FunctionName} function was triggered by receipt of service bus message {message.MessageId}");
            string payload = System.Text.Encoding.UTF8.GetString(message.Body);
            var    trm     = JsonConvert.DeserializeObject <TrainingRequestMessage>(payload);

            HorusSql.CheckAndCreateDatabaseIfNecessary(log);

            var job = new ModelTrainingJob {
                BlobFolderName    = trm.BlobFolderName,
                BlobSasUrl        = trm.BlobSasUrl,
                DocumentFormat    = trm.DocumentFormat,
                IncludeSubFolders = "false",
                UseLabelFile      = "true"
            };
            string orchestrationId = await starter.StartNewAsync("ModelTrainer", null, job);

            log.LogInformation($"{ec.FunctionName} processed message {message.MessageId}.  Orchestration {orchestrationId}");
        }
Beispiel #6
0
        public async override Task <DocumentProcessingJob> Save(DocumentProcessingJob job, ILogger log, string snip)
        {
            var documentBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.DocumentBlobUrl));

            string documentBlobContents;

            using (var memoryStream = new MemoryStream())
            {
                await documentBlob.DownloadToStreamAsync(memoryStream);

                documentBlobContents = Encoding.UTF8.GetString(memoryStream.ToArray());
            }

            var document = JsonConvert.DeserializeObject <Document>(documentBlobContents);

            HorusSql.SaveDocument(document, log);
            var results = $"{snip} document {document.DocumentNumber} was saved to SQL";

            log.LogDebug(results);
            return(job);
        }
Beispiel #7
0
        private async Task <List <ScoreRecord> > InspectModelRegistration()
        {
            var results = new List <ScoreRecord>();

            log.LogTrace($"Checking that a Model has been registered for each document type");
            var documentTypesForChallenge = Environment.GetEnvironmentVariable("DocumentTypesForChallenge").Split(',').ToList();

            foreach (var documentType in documentTypesForChallenge)
            {
                log.LogTrace($"Checking {documentType}");
                var mtr = HorusSql.GetModelIdByDocumentFormat(documentType);
                if (mtr.DocumentFormat != null)
                {
                    log.LogTrace($"Model {mtr.ModelId} has been registered for {documentType}");
                    results.Add(new ScoreRecord {
                        Type = $"Training", Notes = $"{mtr.ModelId} has been registered for document type {documentType}", Score = 500
                    });
                }
            }
            return(results);
        }
Beispiel #8
0
        public async Task <DocumentProcessingJob> StartRecognizer([ActivityTrigger] DocumentProcessingJob job, ILogger log, Microsoft.Azure.WebJobs.ExecutionContext ec)
        {
            var snip  = $"Orchestration { job.OrchestrationId}: { ec.FunctionName} - ";
            var model = HorusSql.GetModelIdByDocumentFormat(job.DocumentFormat);

            job.Model = model;
            log.LogTrace($"{snip} Document Name={job.DocumentName}, Format={job.DocumentFormat}, Model={model.ModelId}, Version={model.ModelVersion}");

            var queryString = HttpUtility.ParseQueryString(string.Empty);

            queryString["includeTextDetails"] = "True";
            var uri = $"{recognizerServiceBaseUrl}{BaseConstants.FormRecognizerApiPath}/{model.ModelId}/{BaseConstants.FormRecognizerAnalyzeVerb}?{queryString}";

            log.LogTrace($"{snip} Recognizer Uri={uri}");

            HttpResponseMessage response;

            byte[] image             = null;
            byte[] md5hash           = null;
            var    orchestrationBlob = await orchestrationBlobClient.GetBlobReferenceFromServerAsync(new Uri(job.OrchestrationBlobUrl));

            using (var memoryStream = new MemoryStream())
            {
                await orchestrationBlob.DownloadToStreamAsync(memoryStream);

                image = memoryStream.ToArray();
                using (var md5 = MD5.Create())
                {
                    memoryStream.Position = 0;
                    md5hash = md5.ComputeHash(memoryStream);
                }
            }
            job.Thumbprint = BitConverter.ToString(md5hash).Replace("-", " ");
            log.LogTrace($"{snip} Orchestration Blob={job.OrchestrationBlobName} downloaded.  Thumbprint={job.Thumbprint}");

            client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", recognizerApiKey);
            using (var postContent = new ByteArrayContent(image))
            {
                postContent.Headers.ContentType = new MediaTypeHeaderValue(job.ContentType);
                response = await client.PostAsync(uri, postContent);
            }

            string getUrl = "";

            if (response.IsSuccessStatusCode)
            {
                log.LogTrace($"{snip} Recognition request successful.");
                HttpHeaders headers = response.Headers;
                if (headers.TryGetValues("operation-location", out IEnumerable <string> values))
                {
                    getUrl = values.First();
                    log.LogTrace($"{snip} Recognition progress can be tracked at {getUrl}");
                }
            }
            else
            {
                log.LogTrace($"{snip} Recognition request unsuccessful.");
                throw new Exception($"{snip} That didnt work.  Trying to submit image for analysis {uri} Content:{response.Content.ReadAsStringAsync().Result}");
            }

            job.RecognizerStatusUrl = getUrl;
            log.LogInformation($"{snip} Completed successfully");
            return(job);
        }
Beispiel #9
0
        private async Task <List <ScoreRecord> > CheckIndividualDocuments(ILogger log)
        {
            var results = new List <ScoreRecord>();

            log.LogTrace($"Checking accuracy of document recognition");
            log.LogTrace($"Reading expected results from SQL Database");
            var checks = new List <DocumentCheckRequest>();

            using (SqlConnection connection = new SqlConnection(scoresSQLConnectionString))
            {
                connection.Open();
                SqlCommand command = connection.CreateCommand();
                command.Connection = connection;
                try
                {
                    string previousDocumentFormat = "";
                    string previousDocumentNumber = "";

                    command.CommandText = "SELECT * FROM [dbo].[GeneratedDocuments] order by DocumentFormat, DocumentNumber, LineNumber";
                    SqlDataReader reader = command.ExecuteReader();
                    try
                    {
                        DocumentCheckRequest checkRequest = null;
                        while (reader.Read())
                        {
                            bool   newDocument           = false;
                            string currentDocumentFormat = (string)reader["DocumentFormat"];
                            string currentDocumentNumber = (string)reader["DocumentNumber"];

                            if (currentDocumentFormat != previousDocumentFormat)
                            {
                                newDocument            = true;
                                previousDocumentFormat = currentDocumentFormat;
                            }
                            if (currentDocumentNumber != previousDocumentNumber)
                            {
                                newDocument            = true;
                                previousDocumentNumber = currentDocumentNumber;
                            }

                            if (newDocument)
                            {
                                if (checkRequest != null)
                                {
                                    checks.Add(checkRequest);
                                }
                                checkRequest = new DocumentCheckRequest
                                {
                                    Account            = (string)reader["Account"],
                                    DocumentNumber     = (string)reader["DocumentNumber"],
                                    DocumentDate       = (DateTime)reader["DocumentDate"],
                                    PostalCode         = (string)reader["PostalCode"],
                                    GrandTotalValue    = Convert.ToDouble(reader["GrandTotalValue"]),
                                    PreTaxTotalValue   = Convert.ToDouble(reader["PreTaxTotalValue"]),
                                    ShippingTotalValue = Convert.ToDouble(reader["ShippingTotalValue"]),
                                    TaxTotalValue      = Convert.ToDouble(reader["TaxTotalValue"]),
                                    FileName           = (string)reader["FileName"],
                                    DocumentFormat     = (string)reader["DocumentFormat"],
                                };
                            }

                            var line = new DocumentLineCheckRequest
                            {
                                Discount           = Convert.ToDouble(reader["Discount"]),
                                LineNumber         = (string)reader["LineNumber"],
                                ProductCode        = (string)reader["Isbn"],
                                ProductDescription = (string)reader["Title"],
                                Price                = Convert.ToDouble(reader["Price"]),
                                Quantity             = Convert.ToDouble(reader["Quantity"]),
                                Taxable              = (bool)reader["Taxable"],
                                DiscountedGoodsValue = Convert.ToDouble(reader["DiscountedGoodsValue"]),
                                DiscountValue        = Convert.ToDouble(reader["DiscountValue"]),
                                GoodsValue           = Convert.ToDouble(reader["GoodsValue"]),
                                TaxableValue         = Convert.ToDouble(reader["TaxableValue"]),
                            };
                            checkRequest.Lines.Add(line);
                        }
                        if (checkRequest != null)
                        {
                            checks.Add(checkRequest);
                        }
                    }
                    catch (Exception e)
                    {
                        log.LogError(e.Message);
                    }
                    finally
                    {
                        reader.Close();
                    }
                }
                catch (Exception e)
                {
                    log.LogError($"Exception prevented reading expected results from SQL database {connection.Database}  Message is {e.Message}");
                    throw e;
                }
                log.LogInformation($"Expected Results read from SQL database {connection.Database}");
            }

            log.LogInformation($"We will be checking the actual vs expected results for {checks.Count} documents");
            foreach (var check in checks)
            {
                string fileName = $"{check.DocumentFormat}-{check.FileName}";
                log.LogDebug($"Loading document {fileName} from processing database");
                Document document = null;
                try
                {
                    log.LogTrace($"Checking {fileName}");
                    document = HorusSql.LoadDocument(fileName, log);
                }
                catch (Exception)
                {
                    log.LogWarning($"Unable to load document {fileName} from processing database.");
                    continue;
                }
                if (document == null)
                {
                    log.LogTrace($"Document {check.DocumentNumber} has not been processed sucessfully and will be skipped");
                    continue;
                }
                var checkResults = CompareActualWithExpectedResults(document, check, log);
                results.AddRange(checkResults);
            }
            return(results);
        }