Пример #1
0
        public async Task <Receipt> ProcessReceiptImage(byte[] image)
        {
            OCRVisionResponse ocrResponse = await CallComputerVisionOCRAsync(image);

            Receipt processedReceipt = ExtractReceiptLines(ocrResponse);

            processedReceipt = ExtractDataOfInterest(processedReceipt);

            return(processedReceipt);
        }
Пример #2
0
        Receipt ExtractReceiptLines(OCRVisionResponse ocr)
        {
            // create Receipt object and populate from OCR response
            Receipt receipt = new Receipt();

            receipt.lines       = new List <ReceiptLine>();
            receipt.language    = ocr.language;
            receipt.orientation = ocr.orientation;
            receipt.textAngle   = ocr.textAngle;

            decimal docminx       = 100000; // set high and reduce
            decimal docminy       = 100000; // set high and reduce
            decimal docwidth      = 0;
            decimal docheight     = 0;
            double  avglineheight = 0;

            // reduce to region/lines
            foreach (Region region in ocr.regions)
            {
                foreach (Line line in region.lines)
                {
                    string strline = "";
                    foreach (Word word in line.words)
                    {
                        strline += word.text + " ";
                    }
                    strline = strline.TrimEnd();

                    ReceiptLine newReceiptLine = new ReceiptLine();
                    newReceiptLine.boundingBox = line.boundingBox;

                    string[] bounds = line.boundingBox.Split(',');
                    int.TryParse(bounds[0], out newReceiptLine.x);
                    int.TryParse(bounds[1], out newReceiptLine.y);
                    int.TryParse(bounds[2], out newReceiptLine.width);
                    int.TryParse(bounds[3], out newReceiptLine.height);

                    newReceiptLine.text = strline;
                    receipt.lines.Add(newReceiptLine);
                    docheight = Math.Max(docheight, newReceiptLine.height + newReceiptLine.y);
                    docwidth  = Math.Max(docwidth, newReceiptLine.width + newReceiptLine.x);
                    docminx   = Math.Min(docminx, newReceiptLine.x);
                    docminy   = Math.Min(docminy, newReceiptLine.y);
                }
            }

            // sort the receipt class by the vertical, then horizontal position
            receipt.lines.Sort((a, b) => a.y == b.y ? a.x.CompareTo(b.x) : (a.y.CompareTo(b.y)));

            // calculate the average line height
            avglineheight = receipt.lines.Average(a => a.height);

            Receipt sortedreceipt = new Receipt();

            sortedreceipt.lines       = new List <ReceiptLine>();
            sortedreceipt.language    = receipt.language;
            sortedreceipt.orientation = sortedreceipt.orientation;
            sortedreceipt.textAngle   = sortedreceipt.textAngle;

            int currenty      = 0;
            int currentx      = 0;
            int currentheight = 0;
            int currentwidth  = 0;

            foreach (ReceiptLine line in receipt.lines)
            {
                // figure out if we're on a new line, based on some line height difference calc (may need tweaking)
                currentheight = line.height;
                if (line.y > (currenty + currentheight - (avglineheight / 3)))
                {
                    currenty = line.y;
                }

                ReceiptLine newline = new ReceiptLine();
                newline.height      = currentheight;
                newline.text        = line.text;
                newline.y           = currenty;
                newline.x           = line.x;
                newline.width       = line.width;
                newline.boundingBox = String.Format("{0},{1},{2},{3}", newline.x, newline.y, newline.width, newline.height);
                sortedreceipt.lines.Add(newline);
            }

            // re-sort the receipt class by the vertical, then horizontal position
            sortedreceipt.lines.Sort((a, b) => a.y == b.y ? a.x.CompareTo(b.x) : (a.y.CompareTo(b.y)));

            Receipt processedreceipt = new Receipt();

            processedreceipt.lines       = new List <ReceiptLine>();
            processedreceipt.language    = sortedreceipt.language;
            processedreceipt.orientation = sortedreceipt.orientation;
            processedreceipt.textAngle   = sortedreceipt.textAngle;

            // concatenate into single lines
            currenty      = 0;
            currentx      = 0;
            currentheight = 0;
            currentwidth  = 0;
            ReceiptLine processedLine = new ReceiptLine();

            foreach (ReceiptLine line in sortedreceipt.lines)
            {
                if (currenty != line.y)
                {
                    processedLine = new ReceiptLine();
                    processedreceipt.lines.Add(processedLine);
                    currenty                  = line.y;
                    currentx                  = line.x;
                    currentwidth              = line.width;
                    currentheight             = line.height;
                    processedLine.y           = currenty;
                    processedLine.x           = Math.Min(currentx, line.x);
                    processedLine.width       = currentwidth;
                    processedLine.height      = currentheight;
                    processedLine.boundingBox = String.Format("{0},{1},{2},{3}", processedLine.x, processedLine.y, processedLine.width, processedLine.height);
                    processedLine.text       += line.text + " ";
                }
                else
                {
                    currentx                  = line.x;
                    processedLine.width       = line.width + currentwidth;
                    processedLine.height      = Math.Max(currentheight, line.height);
                    processedLine.x           = Math.Min(currentx, line.x);
                    processedLine.boundingBox = String.Format("{0},{1},{2},{3}", processedLine.x, processedLine.y, processedLine.width, processedLine.height);
                    processedLine.text       += line.text + " ";
                }
            }

            return(processedreceipt);
        }