Exemplo n.º 1
0
        /// <summary>
        /// Performs OCR on a bitmap and returns the recognized characters.
        /// </summary>
        /// <param name="source"></param>
        /// <param name="onlyNumbers"></param>
        /// <param name="whiteThreshold">Value from 0 to 255, affects which pixels are considered white or black.</param>
        /// <param name="x"></param>
        /// <param name="y"></param>
        /// <param name="ocrControl">If given the read letters are added for further editing and comparing.</param>
        /// <returns></returns>
        public static string ReadImageOcr(Bitmap source, bool onlyNumbers, byte whiteThreshold, int x = 0, int y = 0, OCRControl ocrControl = null)
        {
            var ret = string.Empty;
            var maxDistanceForNonSpace = Math.Max(2, source.Height / 4);

            var imageArray = ImageUtils.GetBooleanArrayOfImage(source, whiteThreshold);

            //var debugArray = new bool[source.Width, source.Height];
            //for (int xx = 0; xx < source.Width; xx++)
            //{
            //    for (int yy = 0; yy < source.Height; yy++)
            //    {
            //        debugArray[xx, yy] = db.GetPixel(xx, yy).R == 0;
            //    }
            //}
            //Boolean2DimArrayConverter.ToDebugLog(debugArray);

            // some images are just lines, ignore these. the lines can be at any height, probably not worth checking these
            //if (source.Width > 3 * source.Height)
            //{
            //    bool isJustLine = true;
            //    var minNotSetPixel = (int)(source.Width * 0.1);
            //    for (int sx = 0; sx < source.Width; sx++)
            //    {
            //        if (db.GetPixel(sx, 0).R == byte.MaxValue)
            //        {
            //            minNotSetPixel--;
            //            if (minNotSetPixel < 0)
            //            {
            //                isJustLine = false;
            //                break;
            //            }
            //        }
            //    }
            //    if (isJustLine) return string.Empty;
            //}

            var charSymbols = SplitBySymbol(imageArray, onlyNumbers).ToArray();

            int xPos = charSymbols.FirstOrDefault()?.Coords.X ?? 0;

            foreach (var sym in charSymbols)
            {
                // read spaces
                if (!onlyNumbers && sym.Coords.X - xPos > maxDistanceForNonSpace)
                {
                    ret += " ";
                }

                xPos = sym.Coords.X + sym.Pattern.GetLength(0);

                var c = ArkOcr.Ocr.ocrConfig.RecognitionPatterns.FindMatchingChar(sym, imageArray, onlyNumbers: onlyNumbers);
                // if c==string.Empty: character was not recognized and skipped in the manual recognition
                if (string.IsNullOrEmpty(c))
                {
                    if (c == null)
                    {
                        return(CleanUpOcr(ret)); // recognition was cancelled
                    }
                    continue;
                }

                ret += c;
                //ocrControl?.AddLetterToRecognized(c, sym.Pattern);
                ocrControl?.AddLetterToRecognized(new RecognizedCharData(x + sym.Coords.X, y + sym.Coords.Y, sym.YOffset)
                {
                    Text = c, Pattern = sym.Pattern
                });
            }

            return(CleanUpOcr(ret));
        }
Exemplo n.º 2
0
        public static string ReadImageOcr(Bitmap source, bool onlyNumbers, float brightAdj = 1f, OCRControl ocrControl = null)
        {
            var ret = string.Empty;

            using (var db = ImageUtils.GetAdjustedDirectBitmapOfImage(source, brightAdj)) // TODO use whiteThreshold from user
            {
                var adjPic = db.ToBitmap();

                var charSymbols = SplitBySymbol(db, onlyNumbers);

                int xPos = charSymbols.FirstOrDefault()?.Coords.X ?? 0;

                foreach (var sym in charSymbols)
                {
                    // read spaces
                    if (!onlyNumbers && sym.Coords.X - xPos > 3)
                    {
                        ret += " ";
                    }
                    xPos = sym.Coords.X + sym.Pattern.GetLength(0);

                    var c = ArkOCR.OCR.ocrConfig.RecognitionPatterns.FindMatchingChar(sym, adjPic, onlyNumbers: onlyNumbers);
                    // if c==string.Empty: character was not recognized and skipped in the manual recognition
                    if (string.IsNullOrEmpty(c))
                    {
                        if (c == null)
                        {
                            return(ret);
                        }
                        continue;
                    }

                    ret += c;
                    ocrControl?.AddLetterToRecognized(c, sym.Pattern);
                }
            }

            ret = CleanUpOcr(ret);

            return(ret);
        }