Example #1
0
        public override Document Process(DocumentProcessingJob job, ILogger log, string snip)
        {
            Stopwatch timer = new Stopwatch();

            timer.Start();
            Document document = new Document {
                FileName = job.JobBlobName
            };

            document.UniqueRunIdentifier = job.OrchestrationId;
            document.FileName            = job.OrchestrationBlobName;
            JObject jsonContent = JObject.Parse(job.RecognizerResponse);

            if (jsonContent["status"] != null)
            {
                document.RecognizerStatus = jsonContent["status"].ToString();
            }
            if (jsonContent["errors"] != null)
            {
                document.RecognizerErrors = jsonContent["errors"].ToString();
            }

            // Fill out the document object
            var nittyGritty = (JObject)jsonContent["analyzeResult"]["documentResults"][0]["fields"];
            var temp        = nittyGritty.ToString();

            document.ShreddingUtcDateTime = DateTime.Now;
            document.OrderNumber          = GetString(HorusConstants.OrderNumber, nittyGritty, document);
            document.OrderDate            = GetDate(HorusConstants.OrderDate, nittyGritty, document);
            document.TaxDate        = GetDate(HorusConstants.TaxDate, nittyGritty, document);
            document.DocumentNumber = GetString(HorusConstants.InvoiceNumber, nittyGritty, document);
            document.Account        = GetString(HorusConstants.Account, nittyGritty, document);
            document.NetTotal       = GetNumber(HorusConstants.NetTotal, nittyGritty, document) ?? 0;
            document.VatAmount      = GetNumber(HorusConstants.VatAmount, nittyGritty, document) ?? 0;
            document.ShippingTotal  = GetNumber(HorusConstants.ShippingTotal, nittyGritty, document) ?? 0;
            document.GrandTotal     = GetNumber(HorusConstants.GrandTotal, nittyGritty, document) ?? 0;
            document.PostCode       = GetString(HorusConstants.PostCode, nittyGritty, document);
            document.TimeToShred    = 0; // Set after processing complete
            document.Thumbprint     = job.Thumbprint;
            document.ModelId        = job.Model.ModelId;
            document.ModelVersion   = job.Model.ModelVersion.ToString();
            if (document.TaxDate != null && document.TaxDate.HasValue)
            {
                document.TaxPeriod = document.TaxDate.Value.Year.ToString() + document.TaxDate.Value.Month.ToString();
            }

            // Lines

            for (int i = 1; i < HorusConstants.MAX_DOCUMENT_LINES; i++)
            {
                var    lineNumber        = i.ToString("D2");
                string lineItemId        = $"{HorusConstants.LineItemPrefix}{lineNumber}";
                string unitPriceId       = $"{HorusConstants.UnitPricePrefix}{lineNumber}";
                string quantityId        = $"{HorusConstants.QuantityPrefix}{lineNumber}";
                string netPriceId        = $"{HorusConstants.NetPricePrefix}{lineNumber}";
                string vatCodeId         = $"{HorusConstants.VatCodePrefix}{lineNumber}";
                string discountPercentId = $"{HorusConstants.DiscountPercentPrefix}{lineNumber}";
                string taxableId         = $"{HorusConstants.TaxablePrefix}{lineNumber}";

                // presence of any one of the following items will mean the document line is considered to exist.
                string[] elements = { unitPriceId, netPriceId, lineItemId };

                if (AnyElementsPresentForThisLine(nittyGritty, lineNumber, elements))
                {
                    log.LogTrace($"{snip}{lineItemId}: {GetString(lineItemId, nittyGritty, document)}");
                    DocumentLineItem lineItem = new DocumentLineItem();

                    // aid debug
                    string test = nittyGritty.ToString();
                    //
                    lineItem.ItemDescription    = GetString(lineItemId, nittyGritty, document, DocumentErrorSeverity.Terminal);
                    lineItem.DocumentLineNumber = lineNumber;
                    lineItem.LineQuantity       = GetNumber(quantityId, nittyGritty, document).ToString();
                    lineItem.NetAmount          = GetNumber(netPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                    lineItem.UnitPrice          = GetNumber(unitPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                    lineItem.VATCode            = GetString(vatCodeId, nittyGritty, document, DocumentErrorSeverity.Warning);
                    lineItem.DiscountPercent    = GetNumber(discountPercentId, nittyGritty, document, DocumentErrorSeverity.Warning) ?? 0;
                    lineItem.Taxableindicator   = GetString(taxableId, nittyGritty, document, DocumentErrorSeverity.Warning);
                    document.LineItems.Add(lineItem);
                }
                else
                {
                    break;
                }
            }

            timer.Stop();
            document.TimeToShred = timer.ElapsedMilliseconds;
            return(document);
        }
Example #2
0
        public async Task Run([BlobTrigger("process-in-json/{name}", Connection = "IncomingConnection")] Stream incomingBlobStream, string name, ILogger log)
        {
            log.LogInformation($"{FUNCTION_NAME} function was triggered by receipt of blob - Name:{name} Size: {incomingBlobStream.Length} Bytes Container: {_processingContext.InboundDocumentContainer}");
            string thumbprint   = "";
            string modelId      = "";
            string modelVersion = "";
            string operationId  = "";
            string parentId     = "";
            Utils  utils        = new Utils(log, _config["IncomingConnection"], FUNCTION_NAME);

            IDictionary <string, string> metadata = new Dictionary <string, string>();
            Document document = new Document {
                FileName = name
            };

            document.UniqueRunIdentifier = Guid.NewGuid().ToString();
            try
            {
                try
                {
                    metadata = await utils.GetBlobMetadataAsync(name, _processingContext.InboundDocumentContainer);

                    if (metadata != null)
                    {
                        if (metadata.ContainsKey(ParsingConstants.UniqueRunIdentifierKey))
                        {
                            if (!String.IsNullOrEmpty(metadata[ParsingConstants.UniqueRunIdentifierKey]))
                            {
                                document.UniqueRunIdentifier = metadata[ParsingConstants.UniqueRunIdentifierKey];
                                log.LogDebug($"{FUNCTION_NAME} unique run identifier was set to  {document.UniqueRunIdentifier}");
                            }
                        }
                        if (metadata.ContainsKey(ParsingConstants.ThumbprintKey))
                        {
                            if (!String.IsNullOrEmpty(metadata[ParsingConstants.ThumbprintKey]))
                            {
                                thumbprint = metadata[ParsingConstants.ThumbprintKey];
                                log.LogDebug($"{FUNCTION_NAME} thumbprint is {thumbprint}");
                            }
                        }
                        if (metadata.ContainsKey(ParsingConstants.ModelIdKey))
                        {
                            if (!String.IsNullOrEmpty(metadata[ParsingConstants.ModelIdKey]))
                            {
                                modelId = metadata[ParsingConstants.ModelIdKey];
                                log.LogDebug($"{FUNCTION_NAME} modelId is {modelId}");
                            }
                        }
                        if (metadata.ContainsKey(ParsingConstants.ModelVersionKey))
                        {
                            if (!String.IsNullOrEmpty(metadata[ParsingConstants.ModelVersionKey]))
                            {
                                modelVersion = metadata[ParsingConstants.ModelVersionKey];
                                log.LogDebug($"{FUNCTION_NAME} modelVersion is {modelVersion}");
                            }
                        }
                        if (metadata.ContainsKey(ParsingConstants.TelemetryOperationIdKey) && metadata.ContainsKey(ParsingConstants.TelemetryOperationParentIdKey))
                        {
                            if (!String.IsNullOrEmpty(metadata[ParsingConstants.TelemetryOperationIdKey]) && !String.IsNullOrEmpty(metadata[ParsingConstants.TelemetryOperationIdKey]))
                            {
                                operationId = metadata[ParsingConstants.TelemetryOperationIdKey];
                                parentId    = metadata[ParsingConstants.TelemetryOperationParentIdKey];
                                log.LogDebug($"{FUNCTION_NAME} setting Application Insights Telemetry.  OperationId = {operationId}, ParentId = {parentId}");
                            }
                        }
                    }
                }
                catch (Exception e)
                {
                    log.LogWarning($"{FUNCTION_NAME} Error reading incoming blob metadata.  Exception Type: {e.GetType()} Message: {e.Message}");
                }

                _telemetryClient.Context.Operation.Id       = operationId;
                _telemetryClient.Context.Operation.ParentId = parentId;
                log.LogInformation("Check the telemetry operation and parent associated with this request - search gooseberry!");
                string content = "";
                using (var sr = new StreamReader(incomingBlobStream))
                {
                    content = sr.ReadToEnd();
                }

                Stopwatch timer = new Stopwatch();
                timer.Start();

                JObject jsonContent = JObject.Parse(content);
                if (jsonContent["status"] != null)
                {
                    document.RecognizerStatus = jsonContent["status"].ToString();
                }
                if (jsonContent["errors"] != null)
                {
                    document.RecognizerErrors = jsonContent["errors"].ToString();
                }

                // Fill out the document object
                var nittyGritty = (JObject)jsonContent["analyzeResult"]["documentResults"][0]["fields"];
                log.LogInformation($"{FUNCTION_NAME} Creating document based on forms recognizer output");
                document.ShreddingUtcDateTime = DateTime.Now;
                document.OrderNumber          = GetString(ParsingConstants.OrderNumber, nittyGritty, document);
                document.OrderDate            = GetDate(ParsingConstants.OrderDate, nittyGritty, document);
                document.TaxDate        = GetDate(ParsingConstants.TaxDate, nittyGritty, document);
                document.DocumentNumber = GetString(ParsingConstants.InvoiceNumber, nittyGritty, document);
                document.Account        = GetString(ParsingConstants.Account, nittyGritty, document);
                document.NetTotal       = GetNumber(ParsingConstants.NetTotal, nittyGritty, document) ?? 0;
                document.VatAmount      = GetNumber(ParsingConstants.VatAmount, nittyGritty, document) ?? 0;
                document.GrandTotal     = GetNumber(ParsingConstants.GrandTotal, nittyGritty, document) ?? 0;
                document.PostCode       = GetString(ParsingConstants.PostCode, nittyGritty, document);
                document.TimeToShred    = 0; // Set after processing complete
                document.Thumbprint     = thumbprint;
                document.ModelId        = modelId;
                document.ModelVersion   = modelVersion;
                if (document.TaxDate != null && document.TaxDate.HasValue)
                {
                    document.TaxPeriod = document.TaxDate.Value.Year.ToString() + document.TaxDate.Value.Month.ToString();
                }

                // Lines

                for (int i = 1; i < ParsingConstants.MAX_DOCUMENT_LINES; i++)
                {
                    var    lineNumber  = i.ToString("D2");
                    string lineItemId  = $"{ParsingConstants.LineItemPrefix}{lineNumber}";
                    string unitPriceId = $"{ParsingConstants.UnitPricePrefix}{lineNumber}";
                    string quantityId  = $"{ParsingConstants.QuantityPrefix}{lineNumber}";
                    string netPriceId  = $"{ParsingConstants.NetPricePrefix}{lineNumber}";
                    string vatCodeId   = $"{ParsingConstants.VatCodePrefix}{lineNumber}";

                    // presence of any one of the following items will mean the document line is considered to exist.
                    string[] elements = { unitPriceId, netPriceId, lineItemId };

                    if (AnyElementsPresentForThisLine(nittyGritty, lineNumber, elements))
                    {
                        log.LogDebug($"{FUNCTION_NAME} {lineItemId}: {GetString(lineItemId, nittyGritty, document)}");
                        DocumentLineItem lineItem = new DocumentLineItem();

                        // aid debug
                        string test = nittyGritty.ToString();
                        //
                        lineItem.ItemDescription    = GetString(lineItemId, nittyGritty, document, DocumentErrorSeverity.Terminal);
                        lineItem.DocumentLineNumber = lineNumber;
                        lineItem.LineQuantity       = GetNumber(quantityId, nittyGritty, document).ToString();
                        lineItem.NetAmount          = GetNumber(netPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                        lineItem.UnitPrice          = GetNumber(unitPriceId, nittyGritty, document, DocumentErrorSeverity.Terminal) ?? 0;
                        lineItem.VATCode            = GetString(vatCodeId, nittyGritty, document, DocumentErrorSeverity.Warning);

                        document.LineItems.Add(lineItem);
                    }
                    else
                    {
                        break;
                    }
                }

                timer.Stop();
                document.TimeToShred = timer.ElapsedMilliseconds;
                string documentForOutput = "********";
                if (!string.IsNullOrEmpty(document.DocumentNumber))
                {
                    documentForOutput = document.DocumentNumber;
                }
                log.LogInformation($"{FUNCTION_NAME} Document {documentForOutput} was parsed form recognizer output in {document.TimeToShred} ms");

                metadata = new Dictionary <string, string>();
                metadata.Add(ParsingConstants.ThumbprintKey, thumbprint);
                metadata.Add(ParsingConstants.UniqueRunIdentifierKey, document.UniqueRunIdentifier);
                string documentName = $"{name.Substring(0, name.LastIndexOf('-'))}{ParsingConstants.DocumentExtension}";
                await utils.MoveBlobAsync(_processingContext.InboundDocumentContainer, _processingContext.ProcessingCompleteContainer, name);

                await utils.SaveDocumentAsync(document, _processingContext.ProcessingCompleteContainer, documentName, metadata);
                await SendToPersistenceQueue(document, log);

                log.LogInformation($"{FUNCTION_NAME} sucessfully processed incoming blob {name} in {document.TimeToShred} ms");
            }

            // unexpected failure
            catch (Exception e)
            {
                metadata = new Dictionary <string, string>();
                metadata.Add(ParsingConstants.UniqueRunIdentifierKey, document.UniqueRunIdentifier);
                _telemetryClient.TrackException(e);
                log.LogError($"{FUNCTION_NAME} Unexpected error.  Exception Type: {e.GetType().ToString()} Message {e.Message}.  Please refer to application insights for diagnosis");
                string documentName = $"{name.Substring(0, name.LastIndexOf('-'))}{ParsingConstants.DocumentExtension}";
                await utils.MoveBlobAsync(_processingContext.InboundDocumentContainer, _processingContext.ExceptionContainer, documentName);

                await utils.SaveDocumentAsync(document, _processingContext.ExceptionContainer, name, metadata);

                await utils.SaveBlobAsync(_processingContext.ExceptionContainer, name + ParsingConstants.ExceptionExtension, e.ToString(), metadata);

                throw;
            }
        }
Example #3
0
        public static Document LoadDocument(string fileName, ILogger log)
        {
            Document document = null;

            using (SqlConnection connection = new SqlConnection(sqlConnectionString))
            {
                connection.Open();
                SqlCommand command = connection.CreateCommand();
                command.Connection = connection;
                try
                {
                    int documentId = 0;
                    command.CommandText = $"select * from Document where FileName = '{fileName}' order by ShreddingUtcDateTime desc";
                    SqlDataReader reader = command.ExecuteReader();
                    try
                    {
                        while (reader.Read())
                        {
                            documentId = (int)reader["Id"];
                            document   = new Document
                            {
                                Account                = reader.GetValue <string>("Account"),
                                DocumentNumber         = reader.GetValue <string>("DocumentNumber"),
                                TaxDate                = reader.GetValue <DateTime>("TaxDate"),
                                FileName               = reader.GetValue <string>("FileName"),
                                GrandTotal             = reader.GetValue <decimal>("GrandTotal"),
                                NetTotal               = reader.GetValue <decimal>("NetTotal"),
                                PostCode               = reader.GetValue <string>("PostCode"),
                                ShippingTotal          = reader.GetValue <decimal>("ShippingTotal"),
                                VatAmount              = reader.GetValue <decimal>("VatAmount"),
                                TaxPeriod              = reader.GetValue <string>("TaxPeriod"),
                                OrderDate              = reader.GetValue <DateTime>("OrderDate"),
                                OrderNumber            = reader.GetValue <string>("OrderNumber"),
                                DocumentVersion        = reader.GetValue <int>("DocumentVersion"),
                                LatestVersionIndicator = reader.GetValue <bool>("LatestVersionIndicator")
                            };
                            break;
                        }
                    }
                    catch (Exception e)
                    {
                        log.LogError($"Exception prevented reading Document {document.DocumentNumber} from SQL database {connection.Database}.  Message is {e.Message}");
                        throw e;
                    }
                    finally
                    {
                        reader.Close();
                    }

                    if (document == null)
                    {
                        log.LogInformation($"Requested document {fileName} not found in database");
                        return(null);
                    }

                    command.CommandText = $"select * from DocumentLineItem where DocumentId = '{documentId}' order by DocumentLineNumber";
                    reader = command.ExecuteReader();
                    try
                    {
                        while (reader.Read())
                        {
                            var lineItem = new DocumentLineItem
                            {
                                DiscountPercent    = reader.GetValue <decimal>("DiscountPercent"),
                                DocumentLineNumber = reader.GetValue <string>("DocumentLineNumber"),
                                ItemDescription    = reader.GetValue <string>("ItemDescription"),
                                LineQuantity       = reader.GetValue <string>("LineQuantity"),
                                VATCode            = reader.GetValue <string>("VATCode"),
                                NetAmount          = reader.GetValue <decimal>("NetAmount"),
                                Taxableindicator   = reader.GetValue <string>("Taxableindicator"),
                                UnitPrice          = reader.GetValue <decimal>("UnitPrice")
                            };
                            document.LineItems.Add(lineItem);
                        }
                    }
                    finally
                    {
                        reader.Close();
                    }
                }
                catch (Exception e)
                {
                    log.LogError($"Exception prevented reading Document Lines for document {document.DocumentNumber} from SQL database {connection.Database}.  Message is {e.Message}");
                    throw e;
                }
                log.LogInformation($"Document {document.DocumentNumber} was read from SQL database {connection.Database}");
                return(document);
            }
        }
Example #4
0
        private List <ScoreRecord> CompareActualWithExpectedResults(Document actual, DocumentCheckRequest expected, ILogger log)
        {
            int documentPoints = 0;

            var results = new List <ScoreRecord>();

            log.LogTrace($"Checking accuracy of document {actual.FileName} header");

            // There are 7 header fields to check.  We will identify those that match.
            Dictionary <string, bool> matches = new Dictionary <string, bool> {
                { "Account", false }, { "GrandTotal", false }, { "ShippingTotal", false }, { "NetTotal", false }, { "VatAmount", false }, { "PostCode", false }, { "TaxDate", false }
            };

            matches["Account"]       = Compare <string>(actual.FileName, "Account", actual.Account, expected.Account, log);
            matches["GrandTotal"]    = Compare <decimal>(actual.FileName, "GrandTotal", actual.GrandTotal, (decimal)expected.GrandTotalValue, log);
            matches["ShippingTotal"] = Compare <decimal>(actual.FileName, "ShippingTotal", actual.ShippingTotal, (decimal)expected.ShippingTotalValue, log);
            matches["NetTotal"]      = Compare <decimal>(actual.FileName, "NetTotal", actual.NetTotal, (decimal)expected.PreTaxTotalValue, log);
            matches["VatAmount"]     = Compare <decimal>(actual.FileName, "VatAmount", actual.VatAmount, (decimal)expected.TaxTotalValue, log);
            matches["PostCode"]      = Compare <string>(actual.FileName, "PostCode", actual.PostCode, expected.PostalCode, log);
            matches["TaxDate"]       = Compare <DateTime>(actual.FileName, "TaxDate", actual.TaxDate ?? DateTime.MinValue, expected.DocumentDate, log);

            // A fully matched header is worth 20 points - so apportion that
            int     DOCUMENT_HEADER_POINTS = 20;
            decimal numPossibleMatches     = matches.Count();
            decimal numMatches             = matches.Where(m => m.Value == true).Count();
            var     successRate            = numMatches / numPossibleMatches;
            var     points = (int)(DOCUMENT_HEADER_POINTS * successRate);

            documentPoints = points;
            string notes = $"Document {actual.FileName} Header matched {numMatches} of {numPossibleMatches} fields for a score of {points} / {DOCUMENT_HEADER_POINTS})";

            log.LogInformation(notes);

            log.LogTrace($"Checking accuracy of document {actual.DocumentNumber} lines");
            matches = new Dictionary <string, bool> {
                { "ItemDescription", false }, { "UnitPrice", false }, { "Taxableindicator", false }, { "LineQuantity", false }, { "NetAmount", false }, { "DiscountPercent", false }
            };

            // 100 points for a fully matched document leaves 80 up for grabs: pro rata that over the expected lines
            int     DOCUMENT_LINES_POINTS = 80;
            decimal DOCUMENT_LINE_POINTS  = DOCUMENT_LINES_POINTS / expected.Lines.Count();

            numPossibleMatches = matches.Count();

            foreach (var expLine in expected.Lines.OrderBy(o => o.LineNumber))
            {
                var expLineNumber = expLine.LineNumber.PadLeft(2, '0');
                log.LogTrace($"Checking Line {expLineNumber}");
                DocumentLineItem actLine = null;
                try
                {
                    actLine = actual.LineItems.Where(l => l.DocumentLineNumber == expLineNumber).Single();
                }
                catch (Exception)
                {
                    log.LogTrace($"{actual.FileName} Actual line matching {expLineNumber} does not exist - you may want to retrain your model?");
                }
                if (actLine != null)
                {
                    matches["ItemDescription"] = Compare <string>(actual.FileName, "ItemDescription" + expLineNumber, actLine.ItemDescription, $"{expLine.ProductCode}{expLine.ProductDescription}".Trim(), log);
                    matches["UnitPrice"]       = Compare <decimal>(actual.FileName, "UnitPrice" + expLineNumber, actLine.UnitPrice, (decimal)expLine.Price, log);

                    bool actTaxIndicator = false;
                    if (!String.IsNullOrEmpty(actLine.Taxableindicator))
                    {
                        actTaxIndicator = true;
                    }
                    matches["Taxableindicator"] = Compare <bool>(actual.FileName, "Taxableindicator" + expLineNumber, actTaxIndicator, expLine.Taxable, log);

                    double actLineQuantity = 0;
                    if (Double.TryParse(actLine.LineQuantity, out double res))
                    {
                        actLineQuantity = res;
                    }
                    matches["LineQuantity"] = Compare <double>(actual.FileName, "LineQuantity" + expLineNumber, actLineQuantity, expLine.Quantity, log);
                    if (!matches["LineQuantity"])
                    {
                        matches["LineQuantity"] = Compare <double>(actual.FileName, "CalculatedLineQuantity" + expLineNumber, (double)actLine.CalculatedLineQuantity, expLine.Quantity, log);
                    }
                    matches["NetAmount"]       = Compare <decimal>(actual.FileName, "NetAmount" + expLineNumber, actLine.NetAmount, (decimal)expLine.DiscountedGoodsValue, log);
                    matches["DiscountPercent"] = Compare <decimal>(actual.FileName, "DiscountPercent" + expLineNumber, actLine.DiscountPercent, (decimal)expLine.Discount, log);

                    numMatches      = matches.Where(m => m.Value == true).Count();
                    successRate     = (numMatches / numPossibleMatches);
                    points          = (int)(DOCUMENT_LINE_POINTS * successRate);
                    documentPoints += points;
                    notes           = $"Document {actual.FileName} line {expLineNumber} matched {numMatches} of {numPossibleMatches} fields for a score of {points} / {DOCUMENT_LINE_POINTS})";
                    log.LogInformation(notes);
                }
                else
                {
                    log.LogTrace($"{actual.FileName} Actual line matching {expLineNumber} does not exist - you may want to retrain your model?");
                }
            }

            notes = $"Document {actual.FileName} overall scored {documentPoints} / 100 points)";
            if (documentPoints < 50)
            {
                log.LogError(notes);
            }
            else
            {
                log.LogInformation(notes);
            }
            results.Add(new ScoreRecord {
                Type = $"Accuracy", Notes = notes, Score = documentPoints
            });
            return(results);
        }