public List <string> ProcessBodyImage(bool normAny = false) { Image <Bgr, byte> baseImg = new Image <Bgr, byte>(image); Normalization <Bgr> rgb_norm = new Normalization <Bgr>(baseImg); var normalized = rgb_norm.Normalize(); baseImg = rgb_norm.Result; //saveImage(baseImg, Path.GetFileName(imagePath)+"base"); Image <Gray, byte> grayImg = baseImg.Convert <Gray, byte>(); //saveImage(grayImg, Path.GetFileName(imagePath)+"gray"); if (normalized) { Normalization <Gray> gray_norm = new Normalization <Gray>(grayImg); if (normAny) { gray_norm.NormalizeAny(); } grayImg = gray_norm.Result; //saveImage(grayImg, Path.GetFileName(imagePath) + "gray_norm"); } Image <Gray, byte> canny = grayImg.Canny(175, 320); //saveImage(canny, "canny"); //detecting bounding boxes var aContours = new VectorOfVectorOfPoint(); var aHierarchy = new Mat(); CvInvoke.FindContours(canny, aContours, aHierarchy, Emgu.CV.CvEnum.RetrType.List, Emgu.CV.CvEnum.ChainApproxMethod.LinkRuns, new Point(0, 0)); List <Rectangle> boxes = new List <Rectangle>(); for (int i = 0; i < aContours.Size; i++) { var item = aContours[i]; List <Point> points = new List <Point>(); for (int j = 0; j < item.Size; j++) { var item2 = item[j]; points.Add(new Point(item2.X, item2.Y)); } var x_query = from Point p in points select p.X; int xmin = x_query.Min(); int xmax = x_query.Max(); var y_query = from Point p in points select p.Y; int ymin = y_query.Min(); int ymax = y_query.Max(); Rectangle r = new Rectangle(xmin, ymin, xmax - xmin, ymax - ymin); boxes.Add(r); } //saveImage(drawBoxesOnImage(canny.Bitmap, boxes), Path.GetFileName(imagePath)+"test"); List <Tuple <Rectangle, List <Rectangle> > > itemsToUnite = new List <Tuple <Rectangle, List <Rectangle> > >(); //check if boxes contact more than 70%, if yes - unite them for (int i = 0; i < boxes.Count; i++) { //contacts = new List<Rectangle>(); List <Rectangle> unions = new List <Rectangle>(); for (int j = i + 1; j < boxes.Count; j++) { //if (i == j) // continue; var b1 = boxes[i]; var b2 = boxes[j]; int dif = 1; //contact differenct //check up/down & left/right contact bool hasContact = false; if (Math.Abs(b1.Bottom - b2.Top) == dif) { Rectangle left = b1.Left < b2.Left ? b1 : b2; Rectangle right = b1.Right > b2.Right ? b1 : b2; if (left.Right < right.Left) { continue; } hasContact = true; } else if (Math.Abs(b1.Right - b2.Right) == dif) { Rectangle top = b1.Top < b2.Top ? b1 : b2; Rectangle bottom = b1.Bottom > b2.Bottom ? b1 : b2; if (top.Bottom < bottom.Top) { continue; } hasContact = true; } if (hasContact) { //contacts.Add(b1); //contacts.Add(b2); //check if contact area if more than 70% var length1 = b1.Right - b1.Left; var length2 = b2.Right - b1.Left; var length = Math.Max(b1.Right, b2.Right) - Math.Min(b1.Left, b2.Left); if (length > 0) { var left_offset = Math.Max(b1.Left, b2.Left) - Math.Min(b1.Left, b2.Left); var right_offset = Math.Max(b1.Right, b2.Right) - Math.Min(b1.Right, b2.Right); var intersection = length - left_offset - right_offset; var perc = 100 * intersection / (float)length; if (perc >= 70) { unions.Add(b2); } } } } //if (contacts.Any()) // saveImage(drawBoxesOnImage(canny.Bitmap, contacts), "contact_" + i); //if (unions.Any()) itemsToUnite.Add(new Tuple <Rectangle, List <Rectangle> >(boxes[i], unions)); //if (contacts.Any()) // break; } //saveImage(drawBoxesOnImage(canny.Bitmap, contacts), "contact"); List <Rectangle> newBoxes = new List <Rectangle>(); foreach (var item in itemsToUnite) { if (item.Item2.Any()) { var lst = item.Item2; lst.Add(item.Item1); Rectangle r = getBoundingBox(lst); newBoxes.Add(r); } else { bool canAdd = true; foreach (var i in itemsToUnite) { if (i.Item2.Contains(item.Item1)) { canAdd = false; break; } } if (canAdd) { newBoxes.Add(item.Item1); } } } boxes = newBoxes; //saveImage(drawBoxesOnImage(canny.Bitmap, boxes), Path.GetFileName(imagePath) + "unions"); //filter bounding boxes float minHeight = 5; boxes.RemoveAll(x => x.Height < minHeight); boxes.RemoveAll(x => x.Height < x.Width); boxes.RemoveAll(x => x.Height > canny.Height / 2); boxes.RemoveAll(x => x.Width < 2); //saveImage(drawBoxesOnImage(canny.Bitmap, boxes), Path.GetFileName(imagePath) + "filtered"); //detecting numbers bounding boxes List <Rectangle> sums = new List <Rectangle>(); List <Rectangle> lefts = new List <Rectangle>(); List <Rectangle> rights = new List <Rectangle>(); List <Rectangle> extended = new List <Rectangle>(); boxes = boxes.OrderBy(x => x.X).ToList(); for (int i = 0; i < boxes.Count; i++) { var box = boxes[i]; int offsetWidth = (int)(box.Width / 3); Rectangle offset1 = new Rectangle(box.X - offsetWidth, box.Y, offsetWidth, box.Height), offset2 = new Rectangle(box.X + box.Width, box.Y, offsetWidth, box.Height); Rectangle uni = Rectangle.Union(box, offset1); uni = Rectangle.Union(uni, offset2); extended.Add(uni); lefts.Add(offset1); rights.Add(offset2); } //saveImage(drawBoxesOnImage(canny.Bitmap, new Color[] { Color.Red, Color.Green, Color.Blue }, boxes, lefts, rights), "offsets"); //saveImage(drawBoxesOnImage(canny.Bitmap, extended), Path.GetFileName(imagePath) + "extended"); List <IntersectionHierarchyItem> intersections = new List <IntersectionHierarchyItem>(); foreach (var box in extended) { intersections.Add(findIntersectingHierarchy(extended, box)); } List <Rectangle> result = new List <Rectangle>(); foreach (var box in intersections) { if (box.HasIntersection) { result.Add(box.Union); } } result = result.Distinct().ToList(); //filtering horizontal rectangles result.RemoveAll(x => x.Width <= x.Height); //filtering rectangles by aspect ratio result.RemoveAll(x => { float aspectRatio = (float)x.Width / (float)x.Height; return(aspectRatio > 0.75 && aspectRatio < 1.3); }); //saveImage(drawBoxesOnImage(canny.Bitmap, result), Path.GetFileName(imagePath) + "filtered"); if (!result.Any()) { if (!normAny) { return(ProcessBodyImage(true)); } return(new List <string>()); } List <Rectangle> bounding = new List <Rectangle>(); List <Rectangle[]> sRects = new List <Rectangle[]>(); List <List <string> > digitVariants = new List <List <string> >(); //cutting numbers from images for (int j = 0; j < result.Count; j++) { var area = result[j]; //find source bounding boxes that are inside intersecting area List <Rectangle> rects = findInnerRectangles(boxes, area); //save(drawBoxesOnImage(canny, rects), imgNumber, "inner1_"+j); //remove rectangles that are inside another rect rects = removeInnerRectangles(rects); //save(drawBoxesOnImage(canny, rects), imgNumber, "inner2_" + j); //saveCoords(rects, imgNumber, "inner2_" + j); //TODO: do intersection rects = merge(rects); sRects.Add(rects.ToArray()); bounding.Add(getBoundingBox(rects)); //saveImage(drawBoxesOnImage(canny.Bitmap, rects), "inner_" + j); //saveCoords(rects, imgNumber, "inner_" + j); //distinct list to prevent adding duplicating rectangles after merging rects = rects.Distinct().ToList(); List <string> tesseractParts = new List <string>(); //cropping each rectangle and saving as image if (digitsRecognitionMethod == DigitsRecognitionMethod.Tesseract || digitsRecognitionMethod == DigitsRecognitionMethod.Both) { List <string> digitVariant = new List <string>(); for (int i = 0; i < rects.Count; i++) { var gray = grayImg.Clone(); gray.ROI = rects[i]; Mat componentRoi = gray.Mat; Mat thresholdedMat = gray.Mat; CvInvoke.Threshold(componentRoi, thresholdedMat, 0, 255, Emgu.CV.CvEnum.ThresholdType.Otsu | Emgu.CV.CvEnum.ThresholdType.BinaryInv); string digitLocation = FileManager.TempPng; thresholdedMat.Save(digitLocation); digitVariant.Add(digitLocation); //save(thresholdedMat, imgNumber, "digit_" + j + "_" + i); //save(crop(canny, rects[i]), imgNumber, "digit_" + j + "_" + i); } digitVariants.Add(digitVariant); } } //saveImage(drawBoxesOnImage(canny.Bitmap, bounding), "bb"); List <string> numbersFinals = new List <string>(); if (digitsRecognitionMethod == DigitsRecognitionMethod.Tesseract || digitsRecognitionMethod == DigitsRecognitionMethod.Both) { foreach (var dvar in digitVariants) { string file = saveTesseract(dvar); numbersFinals.Add(OCRParser.ParseTesseract(file)); } } if (digitsRecognitionMethod == DigitsRecognitionMethod.Neural || digitsRecognitionMethod == DigitsRecognitionMethod.Both) { //get max campatible bounding box //var largestRect = bounding.Aggregate((r1, r2) => (((r1.Height * r1.Width) > (r2.Height * r2.Width)) || ()) ? r1 : r2); int index = 0; List <string> digitPaths = new List <string>(); if (bounding.Count > 0) { int maxArea = bounding[index].Height * bounding[index].Width; int lastSubs = sRects[index].Length; int goodAspects = checkGoodLetters(sRects[index]); for (int i = 1; i < bounding.Count; i++) { //exclude elements that contain much more than 5 rectangles inside (this means that rectagles don't represent letters and numbers but other shapes) int subs = sRects[i].Length; if (subs > 5) { continue; } //exclude elements by aspect ratio float aspectRatio = (float)bounding[i].Width / (float)bounding[i].Height; const float MAX_ASPECT = 2.4f; //12 / 5 const float MIN_ASPECT = 1.7f; //if (aspectRatio > MAX_ASPECT || aspectRatio < MIN_ASPECT) // continue; //if (lastSubs > subs) // continue; int area = bounding[i].Height * bounding[i].Width; if (area > maxArea) { //check letters aspect ratio int lets = checkGoodLetters(sRects[i]); if (lets > goodAspects) { index = i; maxArea = area; lastSubs = subs; goodAspects = lets; } } } //int index = bounding.IndexOf(largestRect); var elems = sRects[index]; for (int i = 0; i < elems.Length; i++) { var gray = grayImg.Clone(); gray.ROI = elems[i]; Mat componentRoi = gray.Mat; Mat thresholdedMat = gray.Mat; CvInvoke.Threshold(componentRoi, thresholdedMat, 0, 255, Emgu.CV.CvEnum.ThresholdType.Otsu | Emgu.CV.CvEnum.ThresholdType.BinaryInv); /* * int s = (int)(0.05 * mat.Rows); // 5% of up-scaled size * Mat elem = Cv2.GetStructuringElement(StructuringElementShape.Ellipse, new Size(2 * s + 1, 2 * s + 1), new Point(s, s)); * //Cv2.Erode(mat, mat, elem); */ int s = (int)(0.05 * thresholdedMat.Rows); Mat elem = CvInvoke.GetStructuringElement(Emgu.CV.CvEnum.ElementShape.Ellipse, new Size(2 * s + 1, 2 * s + 1), new Point(s, s)); CvInvoke.Erode(thresholdedMat, thresholdedMat, elem, new Point(s, s), 1, Emgu.CV.CvEnum.BorderType.Reflect, default(MCvScalar)); string digitPath = FileManager.TempPng; digitPaths.Add(digitPath); thresholdedMat.Save(digitPath); //save(thresholdedMat, imgNumber, "digit_" + "_" + i); } } numbersFinals.Add(OCRParser.ParseNeural(digitPaths.ToArray()).Value); } return(numbersFinals); }
public static string[] Recognize(IplImage input, TextDetectionParams _params, Chain[] chains, List <Tuple <Point2d, Point2d> > compBB, List <Tuple <CvPoint, CvPoint> > chainBB, DigitsRecognitionMethod digitsRecognition) { List <string> variants = new List <string>(); //convert to grayscale IplImage grayImage = Cv.CreateImage(input.GetSize(), BitDepth.U8, 1); Cv.CvtColor(input, grayImage, ColorConversion.RgbToGray); for (int i = 0; i < chainBB.Count; i++) { Rect chainRect = new Rect(chainBB[i].Item1.X, chainBB[i].Item1.Y, chainBB[i].Item2.X - chainBB[i].Item1.X, chainBB[i].Item2.Y - chainBB[i].Item1.Y); CvPoint center = new CvPoint((chainBB[i].Item1.X + chainBB[i].Item2.X) / 2, (chainBB[i].Item1.Y + chainBB[i].Item2.Y) / 2); //work out if total width of chain is large enough if (chainBB[i].Item2.X - chainBB[i].Item1.X < input.Width / _params.MaxImgWidthToTextRatio) { continue; } //eliminate chains with components of lower height than required minimum int minHeight = chainBB[i].Item2.Y - chainBB[i].Item1.Y; for (int j = 0; j < chains[i].components.Count; j++) { minHeight = Math.Min(minHeight, compBB[chains[i].components[j]].Item2.y - compBB[chains[i].components[j]].Item1.y); } if (minHeight < _params.MinCharacterHeight) { continue; } //invert direction if angle is in 3rd/4th quadrants if (chains[i].direction.x < 0) { chains[i].direction.x = -chains[i].direction.x; chains[i].direction.y = -chains[i].direction.y; } //work out chain angle double theta_deg = 180 * Math.Atan2(chains[i].direction.y, chains[i].direction.x) / Math.PI; if (Math.Abs(theta_deg) > _params.MaxAngle) { continue; } if ((chainBB.Count == 2) && (Math.Abs(theta_deg) > 5)) { continue; } //Console.WriteLine("Chain #" + i + " angle: " + theta_deg + " degress"); //create copy of input image including only the selected components Mat inputMat = new Mat(input); Mat grayMat = new Mat(grayImage); Mat componentsImg = Mat.Zeros(new Size(grayMat.Cols, grayMat.Rows), grayMat.Type()); //CvMat componentsImg = _componentsImg.ToCvMat(); Mat componentsImgRoi = null; List <CvPoint> compCoords = new List <CvPoint>(); chains[i].components = chains[i].components.Distinct().ToList(); int order = 0; //ordering components bounding boxes by x coord var ordCompBB = compBB.OrderBy(x => x.Item1.x).ToList(); List <string> digits = new List <string>(); for (int j = 0; j < ordCompBB.Count; j++) { Rect roi = new Rect(ordCompBB[j].Item1.x, ordCompBB[j].Item1.y, ordCompBB[j].Item2.x - ordCompBB[j].Item1.x, ordCompBB[j].Item2.y - ordCompBB[j].Item1.y); if (!chainRect.Contains(roi)) { continue; } Mat componentRoi = new Mat(grayMat, roi); compCoords.Add(new CvPoint(ordCompBB[j].Item1.x, ordCompBB[j].Item1.y)); compCoords.Add(new CvPoint(ordCompBB[j].Item2.x, ordCompBB[j].Item2.y)); compCoords.Add(new CvPoint(ordCompBB[j].Item1.x, ordCompBB[j].Item2.y)); compCoords.Add(new CvPoint(ordCompBB[j].Item2.x, ordCompBB[j].Item1.y)); Mat thresholded = new Mat(grayMat, roi); Cv2.Threshold(componentRoi, thresholded, 0, 255, ThresholdType.Otsu | ThresholdType.BinaryInv); componentsImgRoi = new Mat(componentsImg, roi); Cv2.Threshold(componentRoi, componentsImgRoi, 0, 255, ThresholdType.Otsu | ThresholdType.BinaryInv); //var size = thresholded.Size(); //digits.Add(new Bitmap(size.Width, size.Height, (int)thresholded.Step1(), System.Drawing.Imaging.PixelFormat.Format24bppRgb, thresholded.Data)); if (digitsRecognition == DigitsRecognitionMethod.Neural || digitsRecognition == DigitsRecognitionMethod.Both) { string file = FileManager.TempBitmap; Cv2.ImWrite(file, thresholded); try { digits.Add(file); } catch { GC.Collect(); GC.WaitForFullGCComplete(); } //digits.Last().Save("test" + order + ".bmp"); order++; } //else if (digitsRecognition == DigitsRecognitionMethod.Tesseract || digitsRecognition == DigitsRecognitionMethod.Both) //{ // DO NOTHING //} } if (digitsRecognition == DigitsRecognitionMethod.Neural || digitsRecognition == DigitsRecognitionMethod.Both) { //TODO: neural recognition var result = OCRParser.ParseNeural(digits.ToArray()); variants.Add(result.Value); //variants.AddRange(OCRParser.ParseNeural(digits.ToArray())); //variants.Add(BibOCR.OCRParser.ParseBib(digits.ToArray())); } if (digitsRecognition == DigitsRecognitionMethod.Tesseract || digitsRecognition == DigitsRecognitionMethod.Both) { CvRect _roi = GetBoundingBox(compCoords, new CvSize(input.Width, input.Height)); //ROI area can be null if outside of clipping area if ((_roi.Width == 0) || (_roi.Height == 0)) { continue; } //rotate each component coordinates const int border = 3; Mat _mat = new Mat(_roi.Height + 2 * border, _roi.Width + 2 * border, grayMat.Type()); Mat tmp = new Mat(grayMat, _roi); //copy bounded box from rotated mat to new mat with borders - borders are needed to improve OCR success rate Mat mat = new Mat(_mat, new Rect(border, border, _roi.Width, _roi.Height)); tmp.CopyTo(mat); //resize image to improve OCR success rate float upscale = 5.0f; Cv2.Resize(mat, mat, new Size(0, 0), upscale, upscale); //erode text to get rid of thin joints int s = (int)(0.05 * mat.Rows); // 5% of up-scaled size Mat elem = Cv2.GetStructuringElement(StructuringElementShape.Ellipse, new Size(2 * s + 1, 2 * s + 1), new Point(s, s)); //Cv2.Erode(mat, mat, elem); //Cv2.Threshold(mat, mat, 0, 255, ThresholdType.Otsu | ThresholdType.BinaryInv); string file = FileManager.TempPng; Cv2.ImWrite(file, mat); // TODO: Pass it to Tesseract API variants.Add(OCRParser.ParseTesseract(file)); } //for (int j = 0; j < digits.Count; j++) // digits[j].Dispose(); digits.Clear(); GC.Collect(); GC.WaitForFullGCComplete(5000); } Cv.ReleaseImage(grayImage); return(variants.Distinct().ToArray()); }