public override Document Process(DocumentProcessingJob job, ILogger log, string snip) { Stopwatch timer = new Stopwatch(); timer.Start(); Document document = new Document { FileName = job.JobBlobName }; document.UniqueRunIdentifier = job.OrchestrationId; document.FileName = job.OrchestrationBlobName; JObject jsonContent = JObject.Parse(job.RecognizerResponse); if (jsonContent["status"] != null) { document.RecognizerStatus = jsonContent["status"].ToString(); } if (jsonContent["errors"] != null) { document.RecognizerErrors = jsonContent["errors"].ToString(); } // Fill out the document object var nittyGritty = (JObject)jsonContent["analyzeResult"]["documentResults"][0]["fields"]; var temp = nittyGritty.ToString(); document.ShreddingUtcDateTime = DateTime.Now; document.OrderNumber = GetString(HorusConstants.OrderNumber, nittyGritty, document); document.OrderDate = GetDate(HorusConstants.OrderDate, nittyGritty, document); document.TaxDate = GetDate(HorusConstants.TaxDate, nittyGritty, document); document.DocumentNumber = GetString(HorusConstants.InvoiceNumber, nittyGritty, document); document.Account = GetString(HorusConstants.Account, nittyGritty, document); document.NetTotal = GetNumber(HorusConstants.NetTotal, nittyGritty, document) ?? 0; document.VatAmount = GetNumber(HorusConstants.VatAmount, nittyGritty, document) ?? 0; document.ShippingTotal = GetNumber(HorusConstants.ShippingTotal, nittyGritty, document) ?? 0; document.GrandTotal = GetNumber(HorusConstants.GrandTotal, nittyGritty, document) ?? 0; document.PostCode = GetString(HorusConstants.PostCode, nittyGritty, document); document.TimeToShred = 0; // Set after processing complete document.Thumbprint = job.Thumbprint; document.ModelId = job.Model.ModelId; document.ModelVersion = job.Model.ModelVersion.ToString(); if (document.TaxDate != null && document.TaxDate.HasValue) { document.TaxPeriod = document.TaxDate.Value.Year.ToString() + document.TaxDate.Value.Month.ToString(); } // Lines for (int i = 1; i < HorusConstants.MAX_DOCUMENT_LINES; i++) { var lineNumber = i.ToString("D2"); string lineItemId = $"{HorusConstants.LineItemPrefix}{lineNumber}"; string unitPriceId = $"{HorusConstants.UnitPricePrefix}{lineNumber}"; string quantityId = $"{HorusConstants.QuantityPrefix}{lineNumber}"; string netPriceId = $"{HorusConstants.NetPricePrefix}{lineNumber}"; string vatCodeId = $"{HorusConstants.VatCodePrefix}{lineNumber}"; string discountPercentId = $"{HorusConstants.DiscountPercentPrefix}{lineNumber}"; string taxableId = $"{HorusConstants.TaxablePrefix}{lineNumber}"; // presence of any one of the following items will mean the document line is considered to exist. string[] elements = { unitPriceId, netPriceId, lineItemId }; if (AnyElementsPresentForThisLine(nittyGritty, lineNumber, elements)) { log.LogTrace($"{snip}{lineItemId}: {GetString(lineItemId, nittyGritty, document)}"); DocumentLineItem lineItem = new DocumentLineItem(); // aid debug string test = nittyGritty.ToString(); // lineItem.ItemDescription = GetString(lineItemId, nittyGritty, document, DocumentErrorSeverity.Terminal); lineItem.DocumentLineNumber = lineNumber; lineItem.LineQuantity = GetNumber(quantityId, nittyGritty, document).ToString(); lineItem.NetAmount = GetNumber(netPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0; lineItem.UnitPrice = GetNumber(unitPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0; lineItem.VATCode = GetString(vatCodeId, nittyGritty, document, DocumentErrorSeverity.Warning); lineItem.DiscountPercent = GetNumber(discountPercentId, nittyGritty, document, DocumentErrorSeverity.Warning) ?? 0; lineItem.Taxableindicator = GetString(taxableId, nittyGritty, document, DocumentErrorSeverity.Warning); document.LineItems.Add(lineItem); } else { break; } } timer.Stop(); document.TimeToShred = timer.ElapsedMilliseconds; return(document); }
public async Task Run([BlobTrigger("process-in-json/{name}", Connection = "IncomingConnection")] Stream incomingBlobStream, string name, ILogger log) { log.LogInformation($"{FUNCTION_NAME} function was triggered by receipt of blob - Name:{name} Size: {incomingBlobStream.Length} Bytes Container: {_processingContext.InboundDocumentContainer}"); string thumbprint = ""; string modelId = ""; string modelVersion = ""; string operationId = ""; string parentId = ""; Utils utils = new Utils(log, _config["IncomingConnection"], FUNCTION_NAME); IDictionary <string, string> metadata = new Dictionary <string, string>(); Document document = new Document { FileName = name }; document.UniqueRunIdentifier = Guid.NewGuid().ToString(); try { try { metadata = await utils.GetBlobMetadataAsync(name, _processingContext.InboundDocumentContainer); if (metadata != null) { if (metadata.ContainsKey(ParsingConstants.UniqueRunIdentifierKey)) { if (!String.IsNullOrEmpty(metadata[ParsingConstants.UniqueRunIdentifierKey])) { document.UniqueRunIdentifier = metadata[ParsingConstants.UniqueRunIdentifierKey]; log.LogDebug($"{FUNCTION_NAME} unique run identifier was set to {document.UniqueRunIdentifier}"); } } if (metadata.ContainsKey(ParsingConstants.ThumbprintKey)) { if (!String.IsNullOrEmpty(metadata[ParsingConstants.ThumbprintKey])) { thumbprint = metadata[ParsingConstants.ThumbprintKey]; log.LogDebug($"{FUNCTION_NAME} thumbprint is {thumbprint}"); } } if (metadata.ContainsKey(ParsingConstants.ModelIdKey)) { if (!String.IsNullOrEmpty(metadata[ParsingConstants.ModelIdKey])) { modelId = metadata[ParsingConstants.ModelIdKey]; log.LogDebug($"{FUNCTION_NAME} modelId is {modelId}"); } } if (metadata.ContainsKey(ParsingConstants.ModelVersionKey)) { if (!String.IsNullOrEmpty(metadata[ParsingConstants.ModelVersionKey])) { modelVersion = metadata[ParsingConstants.ModelVersionKey]; log.LogDebug($"{FUNCTION_NAME} modelVersion is {modelVersion}"); } } if (metadata.ContainsKey(ParsingConstants.TelemetryOperationIdKey) && metadata.ContainsKey(ParsingConstants.TelemetryOperationParentIdKey)) { if (!String.IsNullOrEmpty(metadata[ParsingConstants.TelemetryOperationIdKey]) && !String.IsNullOrEmpty(metadata[ParsingConstants.TelemetryOperationIdKey])) { operationId = metadata[ParsingConstants.TelemetryOperationIdKey]; parentId = metadata[ParsingConstants.TelemetryOperationParentIdKey]; log.LogDebug($"{FUNCTION_NAME} setting Application Insights Telemetry. OperationId = {operationId}, ParentId = {parentId}"); } } } } catch (Exception e) { log.LogWarning($"{FUNCTION_NAME} Error reading incoming blob metadata. Exception Type: {e.GetType()} Message: {e.Message}"); } _telemetryClient.Context.Operation.Id = operationId; _telemetryClient.Context.Operation.ParentId = parentId; log.LogInformation("Check the telemetry operation and parent associated with this request - search gooseberry!"); string content = ""; using (var sr = new StreamReader(incomingBlobStream)) { content = sr.ReadToEnd(); } Stopwatch timer = new Stopwatch(); timer.Start(); JObject jsonContent = JObject.Parse(content); if (jsonContent["status"] != null) { document.RecognizerStatus = jsonContent["status"].ToString(); } if (jsonContent["errors"] != null) { document.RecognizerErrors = jsonContent["errors"].ToString(); } // Fill out the document object var nittyGritty = (JObject)jsonContent["analyzeResult"]["documentResults"][0]["fields"]; log.LogInformation($"{FUNCTION_NAME} Creating document based on forms recognizer output"); document.ShreddingUtcDateTime = DateTime.Now; document.OrderNumber = GetString(ParsingConstants.OrderNumber, nittyGritty, document); document.OrderDate = GetDate(ParsingConstants.OrderDate, nittyGritty, document); document.TaxDate = GetDate(ParsingConstants.TaxDate, nittyGritty, document); document.DocumentNumber = GetString(ParsingConstants.InvoiceNumber, nittyGritty, document); document.Account = GetString(ParsingConstants.Account, nittyGritty, document); document.NetTotal = GetNumber(ParsingConstants.NetTotal, nittyGritty, document) ?? 0; document.VatAmount = GetNumber(ParsingConstants.VatAmount, nittyGritty, document) ?? 0; document.GrandTotal = GetNumber(ParsingConstants.GrandTotal, nittyGritty, document) ?? 0; document.PostCode = GetString(ParsingConstants.PostCode, nittyGritty, document); document.TimeToShred = 0; // Set after processing complete document.Thumbprint = thumbprint; document.ModelId = modelId; document.ModelVersion = modelVersion; if (document.TaxDate != null && document.TaxDate.HasValue) { document.TaxPeriod = document.TaxDate.Value.Year.ToString() + document.TaxDate.Value.Month.ToString(); } // Lines for (int i = 1; i < ParsingConstants.MAX_DOCUMENT_LINES; i++) { var lineNumber = i.ToString("D2"); string lineItemId = $"{ParsingConstants.LineItemPrefix}{lineNumber}"; string unitPriceId = $"{ParsingConstants.UnitPricePrefix}{lineNumber}"; string quantityId = $"{ParsingConstants.QuantityPrefix}{lineNumber}"; string netPriceId = $"{ParsingConstants.NetPricePrefix}{lineNumber}"; string vatCodeId = $"{ParsingConstants.VatCodePrefix}{lineNumber}"; // presence of any one of the following items will mean the document line is considered to exist. string[] elements = { unitPriceId, netPriceId, lineItemId }; if (AnyElementsPresentForThisLine(nittyGritty, lineNumber, elements)) { log.LogDebug($"{FUNCTION_NAME} {lineItemId}: {GetString(lineItemId, nittyGritty, document)}"); DocumentLineItem lineItem = new DocumentLineItem(); // aid debug string test = nittyGritty.ToString(); // lineItem.ItemDescription = GetString(lineItemId, nittyGritty, document, DocumentErrorSeverity.Terminal); lineItem.DocumentLineNumber = lineNumber; lineItem.LineQuantity = GetNumber(quantityId, nittyGritty, document).ToString(); lineItem.NetAmount = GetNumber(netPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0; lineItem.UnitPrice = GetNumber(unitPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0; lineItem.VATCode = GetString(vatCodeId, nittyGritty, document, DocumentErrorSeverity.Warning); document.LineItems.Add(lineItem); } else { break; } } timer.Stop(); document.TimeToShred = timer.ElapsedMilliseconds; string documentForOutput = "********"; if (!string.IsNullOrEmpty(document.DocumentNumber)) { documentForOutput = document.DocumentNumber; } log.LogInformation($"{FUNCTION_NAME} Document {documentForOutput} was parsed form recognizer output in {document.TimeToShred} ms"); metadata = new Dictionary <string, string>(); metadata.Add(ParsingConstants.ThumbprintKey, thumbprint); metadata.Add(ParsingConstants.UniqueRunIdentifierKey, document.UniqueRunIdentifier); string documentName = $"{name.Substring(0, name.LastIndexOf('-'))}{ParsingConstants.DocumentExtension}"; await utils.MoveBlobAsync(_processingContext.InboundDocumentContainer, _processingContext.ProcessingCompleteContainer, name); await utils.SaveDocumentAsync(document, _processingContext.ProcessingCompleteContainer, documentName, metadata); await SendToPersistenceQueue(document, log); log.LogInformation($"{FUNCTION_NAME} sucessfully processed incoming blob {name} in {document.TimeToShred} ms"); } // unexpected failure catch (Exception e) { metadata = new Dictionary <string, string>(); metadata.Add(ParsingConstants.UniqueRunIdentifierKey, document.UniqueRunIdentifier); _telemetryClient.TrackException(e); log.LogError($"{FUNCTION_NAME} Unexpected error. Exception Type: {e.GetType().ToString()} Message {e.Message}. Please refer to application insights for diagnosis"); string documentName = $"{name.Substring(0, name.LastIndexOf('-'))}{ParsingConstants.DocumentExtension}"; await utils.MoveBlobAsync(_processingContext.InboundDocumentContainer, _processingContext.ExceptionContainer, documentName); await utils.SaveDocumentAsync(document, _processingContext.ExceptionContainer, name, metadata); await utils.SaveBlobAsync(_processingContext.ExceptionContainer, name + ParsingConstants.ExceptionExtension, e.ToString(), metadata); throw; } }
public static Document LoadDocument(string fileName, ILogger log) { Document document = null; using (SqlConnection connection = new SqlConnection(sqlConnectionString)) { connection.Open(); SqlCommand command = connection.CreateCommand(); command.Connection = connection; try { int documentId = 0; command.CommandText = $"select * from Document where FileName = '{fileName}' order by ShreddingUtcDateTime desc"; SqlDataReader reader = command.ExecuteReader(); try { while (reader.Read()) { documentId = (int)reader["Id"]; document = new Document { Account = reader.GetValue <string>("Account"), DocumentNumber = reader.GetValue <string>("DocumentNumber"), TaxDate = reader.GetValue <DateTime>("TaxDate"), FileName = reader.GetValue <string>("FileName"), GrandTotal = reader.GetValue <decimal>("GrandTotal"), NetTotal = reader.GetValue <decimal>("NetTotal"), PostCode = reader.GetValue <string>("PostCode"), ShippingTotal = reader.GetValue <decimal>("ShippingTotal"), VatAmount = reader.GetValue <decimal>("VatAmount"), TaxPeriod = reader.GetValue <string>("TaxPeriod"), OrderDate = reader.GetValue <DateTime>("OrderDate"), OrderNumber = reader.GetValue <string>("OrderNumber"), DocumentVersion = reader.GetValue <int>("DocumentVersion"), LatestVersionIndicator = reader.GetValue <bool>("LatestVersionIndicator") }; break; } } catch (Exception e) { log.LogError($"Exception prevented reading Document {document.DocumentNumber} from SQL database {connection.Database}. Message is {e.Message}"); throw e; } finally { reader.Close(); } if (document == null) { log.LogInformation($"Requested document {fileName} not found in database"); return(null); } command.CommandText = $"select * from DocumentLineItem where DocumentId = '{documentId}' order by DocumentLineNumber"; reader = command.ExecuteReader(); try { while (reader.Read()) { var lineItem = new DocumentLineItem { DiscountPercent = reader.GetValue <decimal>("DiscountPercent"), DocumentLineNumber = reader.GetValue <string>("DocumentLineNumber"), ItemDescription = reader.GetValue <string>("ItemDescription"), LineQuantity = reader.GetValue <string>("LineQuantity"), VATCode = reader.GetValue <string>("VATCode"), NetAmount = reader.GetValue <decimal>("NetAmount"), Taxableindicator = reader.GetValue <string>("Taxableindicator"), UnitPrice = reader.GetValue <decimal>("UnitPrice") }; document.LineItems.Add(lineItem); } } finally { reader.Close(); } } catch (Exception e) { log.LogError($"Exception prevented reading Document Lines for document {document.DocumentNumber} from SQL database {connection.Database}. Message is {e.Message}"); throw e; } log.LogInformation($"Document {document.DocumentNumber} was read from SQL database {connection.Database}"); return(document); } }
private List <ScoreRecord> CompareActualWithExpectedResults(Document actual, DocumentCheckRequest expected, ILogger log) { int documentPoints = 0; var results = new List <ScoreRecord>(); log.LogTrace($"Checking accuracy of document {actual.FileName} header"); // There are 7 header fields to check. We will identify those that match. Dictionary <string, bool> matches = new Dictionary <string, bool> { { "Account", false }, { "GrandTotal", false }, { "ShippingTotal", false }, { "NetTotal", false }, { "VatAmount", false }, { "PostCode", false }, { "TaxDate", false } }; matches["Account"] = Compare <string>(actual.FileName, "Account", actual.Account, expected.Account, log); matches["GrandTotal"] = Compare <decimal>(actual.FileName, "GrandTotal", actual.GrandTotal, (decimal)expected.GrandTotalValue, log); matches["ShippingTotal"] = Compare <decimal>(actual.FileName, "ShippingTotal", actual.ShippingTotal, (decimal)expected.ShippingTotalValue, log); matches["NetTotal"] = Compare <decimal>(actual.FileName, "NetTotal", actual.NetTotal, (decimal)expected.PreTaxTotalValue, log); matches["VatAmount"] = Compare <decimal>(actual.FileName, "VatAmount", actual.VatAmount, (decimal)expected.TaxTotalValue, log); matches["PostCode"] = Compare <string>(actual.FileName, "PostCode", actual.PostCode, expected.PostalCode, log); matches["TaxDate"] = Compare <DateTime>(actual.FileName, "TaxDate", actual.TaxDate ?? DateTime.MinValue, expected.DocumentDate, log); // A fully matched header is worth 20 points - so apportion that int DOCUMENT_HEADER_POINTS = 20; decimal numPossibleMatches = matches.Count(); decimal numMatches = matches.Where(m => m.Value == true).Count(); var successRate = numMatches / numPossibleMatches; var points = (int)(DOCUMENT_HEADER_POINTS * successRate); documentPoints = points; string notes = $"Document {actual.FileName} Header matched {numMatches} of {numPossibleMatches} fields for a score of {points} / {DOCUMENT_HEADER_POINTS})"; log.LogInformation(notes); log.LogTrace($"Checking accuracy of document {actual.DocumentNumber} lines"); matches = new Dictionary <string, bool> { { "ItemDescription", false }, { "UnitPrice", false }, { "Taxableindicator", false }, { "LineQuantity", false }, { "NetAmount", false }, { "DiscountPercent", false } }; // 100 points for a fully matched document leaves 80 up for grabs: pro rata that over the expected lines int DOCUMENT_LINES_POINTS = 80; decimal DOCUMENT_LINE_POINTS = DOCUMENT_LINES_POINTS / expected.Lines.Count(); numPossibleMatches = matches.Count(); foreach (var expLine in expected.Lines.OrderBy(o => o.LineNumber)) { var expLineNumber = expLine.LineNumber.PadLeft(2, '0'); log.LogTrace($"Checking Line {expLineNumber}"); DocumentLineItem actLine = null; try { actLine = actual.LineItems.Where(l => l.DocumentLineNumber == expLineNumber).Single(); } catch (Exception) { log.LogTrace($"{actual.FileName} Actual line matching {expLineNumber} does not exist - you may want to retrain your model?"); } if (actLine != null) { matches["ItemDescription"] = Compare <string>(actual.FileName, "ItemDescription" + expLineNumber, actLine.ItemDescription, $"{expLine.ProductCode}{expLine.ProductDescription}".Trim(), log); matches["UnitPrice"] = Compare <decimal>(actual.FileName, "UnitPrice" + expLineNumber, actLine.UnitPrice, (decimal)expLine.Price, log); bool actTaxIndicator = false; if (!String.IsNullOrEmpty(actLine.Taxableindicator)) { actTaxIndicator = true; } matches["Taxableindicator"] = Compare <bool>(actual.FileName, "Taxableindicator" + expLineNumber, actTaxIndicator, expLine.Taxable, log); double actLineQuantity = 0; if (Double.TryParse(actLine.LineQuantity, out double res)) { actLineQuantity = res; } matches["LineQuantity"] = Compare <double>(actual.FileName, "LineQuantity" + expLineNumber, actLineQuantity, expLine.Quantity, log); if (!matches["LineQuantity"]) { matches["LineQuantity"] = Compare <double>(actual.FileName, "CalculatedLineQuantity" + expLineNumber, (double)actLine.CalculatedLineQuantity, expLine.Quantity, log); } matches["NetAmount"] = Compare <decimal>(actual.FileName, "NetAmount" + expLineNumber, actLine.NetAmount, (decimal)expLine.DiscountedGoodsValue, log); matches["DiscountPercent"] = Compare <decimal>(actual.FileName, "DiscountPercent" + expLineNumber, actLine.DiscountPercent, (decimal)expLine.Discount, log); numMatches = matches.Where(m => m.Value == true).Count(); successRate = (numMatches / numPossibleMatches); points = (int)(DOCUMENT_LINE_POINTS * successRate); documentPoints += points; notes = $"Document {actual.FileName} line {expLineNumber} matched {numMatches} of {numPossibleMatches} fields for a score of {points} / {DOCUMENT_LINE_POINTS})"; log.LogInformation(notes); } else { log.LogTrace($"{actual.FileName} Actual line matching {expLineNumber} does not exist - you may want to retrain your model?"); } } notes = $"Document {actual.FileName} overall scored {documentPoints} / 100 points)"; if (documentPoints < 50) { log.LogError(notes); } else { log.LogInformation(notes); } results.Add(new ScoreRecord { Type = $"Accuracy", Notes = notes, Score = documentPoints }); return(results); }