/// <summary> /// Performs OCR on a bitmap and returns the recognized characters. /// </summary> /// <param name="source"></param> /// <param name="onlyNumbers"></param> /// <param name="whiteThreshold">Value from 0 to 255, affects which pixels are considered white or black.</param> /// <param name="x"></param> /// <param name="y"></param> /// <param name="ocrControl">If given the read letters are added for further editing and comparing.</param> /// <returns></returns> public static string ReadImageOcr(Bitmap source, bool onlyNumbers, byte whiteThreshold, int x = 0, int y = 0, OCRControl ocrControl = null) { var ret = string.Empty; var maxDistanceForNonSpace = Math.Max(2, source.Height / 4); var imageArray = ImageUtils.GetBooleanArrayOfImage(source, whiteThreshold); //var debugArray = new bool[source.Width, source.Height]; //for (int xx = 0; xx < source.Width; xx++) //{ // for (int yy = 0; yy < source.Height; yy++) // { // debugArray[xx, yy] = db.GetPixel(xx, yy).R == 0; // } //} //Boolean2DimArrayConverter.ToDebugLog(debugArray); // some images are just lines, ignore these. the lines can be at any height, probably not worth checking these //if (source.Width > 3 * source.Height) //{ // bool isJustLine = true; // var minNotSetPixel = (int)(source.Width * 0.1); // for (int sx = 0; sx < source.Width; sx++) // { // if (db.GetPixel(sx, 0).R == byte.MaxValue) // { // minNotSetPixel--; // if (minNotSetPixel < 0) // { // isJustLine = false; // break; // } // } // } // if (isJustLine) return string.Empty; //} var charSymbols = SplitBySymbol(imageArray, onlyNumbers).ToArray(); int xPos = charSymbols.FirstOrDefault()?.Coords.X ?? 0; foreach (var sym in charSymbols) { // read spaces if (!onlyNumbers && sym.Coords.X - xPos > maxDistanceForNonSpace) { ret += " "; } xPos = sym.Coords.X + sym.Pattern.GetLength(0); var c = ArkOcr.Ocr.ocrConfig.RecognitionPatterns.FindMatchingChar(sym, imageArray, onlyNumbers: onlyNumbers); // if c==string.Empty: character was not recognized and skipped in the manual recognition if (string.IsNullOrEmpty(c)) { if (c == null) { return(CleanUpOcr(ret)); // recognition was cancelled } continue; } ret += c; //ocrControl?.AddLetterToRecognized(c, sym.Pattern); ocrControl?.AddLetterToRecognized(new RecognizedCharData(x + sym.Coords.X, y + sym.Coords.Y, sym.YOffset) { Text = c, Pattern = sym.Pattern }); } return(CleanUpOcr(ret)); }
public static string ReadImageOcr(Bitmap source, bool onlyNumbers, float brightAdj = 1f, OCRControl ocrControl = null) { var ret = string.Empty; using (var db = ImageUtils.GetAdjustedDirectBitmapOfImage(source, brightAdj)) // TODO use whiteThreshold from user { var adjPic = db.ToBitmap(); var charSymbols = SplitBySymbol(db, onlyNumbers); int xPos = charSymbols.FirstOrDefault()?.Coords.X ?? 0; foreach (var sym in charSymbols) { // read spaces if (!onlyNumbers && sym.Coords.X - xPos > 3) { ret += " "; } xPos = sym.Coords.X + sym.Pattern.GetLength(0); var c = ArkOCR.OCR.ocrConfig.RecognitionPatterns.FindMatchingChar(sym, adjPic, onlyNumbers: onlyNumbers); // if c==string.Empty: character was not recognized and skipped in the manual recognition if (string.IsNullOrEmpty(c)) { if (c == null) { return(ret); } continue; } ret += c; ocrControl?.AddLetterToRecognized(c, sym.Pattern); } } ret = CleanUpOcr(ret); return(ret); }