public List<List<float>> SpaceCosts(List<Candidate> candidates, Bytearray image) { /* Given a list of character recognition candidates and their classifications, and an image of the corresponding text line, compute a list of pairs of costs for putting/not putting a space after each of the candidate characters. The basic idea behind this simple algorithm is to try larger and larger horizontal closing operations until most of the components start having a "wide" aspect ratio; that's when characters have merged into words. The remaining whitespace should be spaces. This is just a simple stopgap measure; it will be replaced with trainable space modeling. */ int w = image.Dim(0); int h = image.Dim(1); Bytearray closed = new Bytearray(); int r; for (r = 0; r < maxrange; r++) { if (r > 0) { closed.Copy(image); Morph.binary_close_circle(closed, r); } else closed.Copy(image); Intarray labeled = new Intarray(); labeled.Copy(closed); ImgLabels.label_components(ref labeled); Narray<Rect> rects = new Narray<Rect>(); ImgLabels.bounding_boxes(ref rects, labeled); Floatarray aspects = new Floatarray(); for (int i = 0; i < rects.Length(); i++) { Rect rect = rects[i]; float aspect = rect.Aspect(); aspects.Push(aspect); } float maspect = NarrayUtil.Median(aspects); if (maspect >= this.aspect_threshold) break; } // close with a little bit of extra space closed.Copy(image); Morph.binary_close_circle(closed, r+1); // compute the remaining aps //Morph.binary_dilate_circle(); // every character box that ends near a cap gets a space appended return null; }
public override void Charseg(ref Intarray outimage, Bytearray inimage) { int swidth = PGeti("swidth"); int sheight = PGeti("sheight"); Bytearray image = new Bytearray(); image.Copy(inimage); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); if (swidth > 0 || sheight > 0) { Morph.binary_close_rect(image, swidth, sheight); } Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); for (int i = 0; i < outimage.Length1d(); i++) { if (outimage.At1d(i) > 0) { outimage.Put1d(i, SegmRoutine.cseg_pixel(labels.At1d(i))); } } SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public void Binarize(Bytearray outarray, Floatarray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); Binarize(outarray, image); }
public override void Charseg(ref Intarray result_segmentation, Bytearray orig_image) { Logger.Default.Image("segmenting", orig_image); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(orig_image); Bytearray image = new Bytearray(); Narray <byte> bimage = image; image.Copy(orig_image); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); ImgOps.pad_by(ref bimage, PADDING, PADDING); // pass image to segmenter segmenter.SetImage(image); // find all cuts in the image segmenter.FindAllCuts(); // choose the best of all cuts segmenter.FindBestCuts(); Intarray segmentation = new Intarray(); segmentation.Resize(image.Dim(0), image.Dim(1)); for (int i = 0; i < image.Dim(0); i++) { for (int j = 0; j < image.Dim(1); j++) { segmentation[i, j] = image[i, j] > 0 ? 1 : 0; } } for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int c = segmenter.bestcuts[r]; Narray <Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int x = cut[y].X; x < image.Dim(0); x++) { if (segmentation[x, y] > 0) { segmentation[x, y]++; } } } } ImgOps.extract_subimage(result_segmentation, segmentation, PADDING, PADDING, segmentation.Dim(0) - PADDING, segmentation.Dim(1) - PADDING); if (small_merge_threshold > 0) { SegmRoutine.line_segmentation_merge_small_components(ref result_segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(result_segmentation); } SegmRoutine.make_line_segmentation_white(result_segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", result_segmentation); }
public virtual void Extract(Bytearray outa, Bytearray ina) { Floatarray fina = new Floatarray(); Floatarray fouta = new Floatarray(); fina.Copy(ina); Extract(fouta, fina); outa.Copy(fouta); }
public override void Charseg(ref Intarray outimage, Bytearray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Intarray equiv = new Intarray(boxes.Length()); for (int i = 0; i < boxes.Length(); i++) { equiv[i] = i; } for (int i = 1; i < boxes.Length(); i++) { Rect p = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { if (i == j) { continue; } Rect q = boxes[j]; int x0 = Math.Max(p.x0, q.x0); int x1 = Math.Min(p.x1, q.x1); int iw = x1 - x0; if (iw <= 0) { continue; // no overlap } int ow = Math.Min(p.Width(), q.Width()); float frac = iw / (float)(ow); if (frac < 0.5f) { continue; // insufficient overlap } // printf("%d %d : %d %d : %g\n",i,j,iw,ow,frac); equiv.Put1d(Math.Max(i, j), Math.Min(i, j)); } } for (int i = 0; i < labels.Length(); i++) { labels.Put1d(i, equiv.At1d(labels.At1d(i))); } ImgLabels.renumber_labels(labels, 1); outimage.Move(labels); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public void SetImage(Bytearray image_) { Bytearray image = new Bytearray(); //image = image_; image.Copy(image_); dimage.Copy(image); if (PGeti("fill_holes") > 0) { Bytearray holes = new Bytearray(); SegmRoutine.extract_holes(ref holes, image); for (int i = 0; i < image.Length(); i++) { if (holes.At1d(i) > 0) { image.Put1d(i, 255); } } } int w = image.Dim(0), h = image.Dim(1); wimage.Resize(w, h); wimage.Fill(0); float s1 = 0.0f, sy = 0.0f; for (int i = 1; i < w; i++) { for (int j = 0; j < h; j++) { if (image[i, j] > 0) { s1++; sy += j; } if (image[i, j] > 0) { wimage[i, j] = inside_weight; } else { wimage[i, j] = outside_weight; } } } if (s1 == 0) { where = image.Dim(1) / 2; } else { where = (int)(sy / s1); } for (int i = 0; i < dimage.Dim(0); i++) { dimage[i, where] = 0x008000; } }
public static void skeletal_features(Bytearray endpoints, Bytearray junctions, Bytearray image, float presmooth, float skelsmooth) { Bytearray temp = new Bytearray(); temp.Copy(image); NarrayUtil.Greater(temp, (byte)128, (byte)0, (byte)255); if (presmooth > 0f) { Gauss.Gauss2d(temp, presmooth, presmooth); NarrayUtil.Greater(temp, (byte)128, (byte)0, (byte)255); } }
public static void binary_dilate_circle(Bytearray image, int r) { if (r == 0) return; Bytearray outa = new Bytearray(); outa.Copy(image); for (int i = -r; i <= r; i++) for (int j = -r; j <= r; j++) { if (i * i + j * j <= r * r) binary_or(outa, image, i, j); } image.Move(outa); }
public static void binary_dilate_rect(Bytearray image, int rw, int rh) { if(rw==0 && rh==0) return; Bytearray outa = new Bytearray(); outa.Copy(image); // note that we handle the even cases complementary // to erode_rect; this makes open_rect and close_rect // do the right thing for(int i=0; i<rw; i++) binary_or(outa, image, i-(rw-1)/2, 0); for(int j=0; j<rh; j++) binary_or(image, outa, 0, j-(rh-1)/2); }
public override void Charseg(ref Intarray result_segmentation, Bytearray orig_image) { Logger.Default.Image("segmenting", orig_image); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(orig_image); Bytearray image = new Bytearray(); Narray<byte> bimage = image; image.Copy(orig_image); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); ImgOps.pad_by(ref bimage, PADDING, PADDING); // pass image to segmenter segmenter.SetImage(image); // find all cuts in the image segmenter.FindAllCuts(); // choose the best of all cuts segmenter.FindBestCuts(); Intarray segmentation = new Intarray(); segmentation.Resize(image.Dim(0), image.Dim(1)); for (int i = 0; i < image.Dim(0); i++) for (int j = 0; j < image.Dim(1); j++) segmentation[i, j] = image[i, j] > 0 ? 1 : 0; for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int c = segmenter.bestcuts[r]; Narray<Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int x = cut[y].X; x < image.Dim(0); x++) { if (segmentation[x, y] > 0) segmentation[x, y]++; } } } ImgOps.extract_subimage(result_segmentation, segmentation, PADDING, PADDING, segmentation.Dim(0) - PADDING, segmentation.Dim(1) - PADDING); if (small_merge_threshold > 0) { SegmRoutine.line_segmentation_merge_small_components(ref result_segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(result_segmentation); } SegmRoutine.make_line_segmentation_white(result_segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", result_segmentation); }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { Logger.Default.Image("segmenting", inraw); OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); segmenter.SetImage(image); segmenter.FindAllCuts(); segmenter.FindBestCuts(); Intarray seg = new Intarray(); seg.Copy(image); for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int w = seg.Dim(0); int c = segmenter.bestcuts[r]; Narray <Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, seg); SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray result_segmentation, Bytearray orig_image) { Bytearray image = new Bytearray(); image.Copy(orig_image); OcrRoutine.optional_check_background_is_lighter(image); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); base.Charseg(ref result_segmentation, orig_image); SegmRoutine.combine_segmentations(ref result_segmentation, ccseg); }
/// <summary> /// Remove segments from start to end. /// </summary> /// <param name="cseg">Output</param> /// <param name="rseg">Input</param> /// <param name="start">start remove position</param> /// <param name="end">end remove position</param> public static void rseg_to_cseg_remove(Intarray cseg, Intarray rseg, Bytearray outimg, Bytearray img, int start, int end) { int maxSegNum = NarrayUtil.Max(rseg); if (start > end) { throw new Exception("segmentation encoded in IDs looks seriously broken!"); } if (start > maxSegNum || end > maxSegNum) { throw new Exception("segmentation encoded in IDs doesn't fit!"); } if (rseg.Length1d() != img.Length1d()) { throw new Exception("rseg and img must have same a dimension!"); } Intarray map = new Intarray(maxSegNum + 1); map.Fill(0); int color = 1; for (int i = 1; i <= maxSegNum; i++) { map[i] = color; if (i < start || i > end) { color++; } else { map[i] = 0; } } cseg.MakeLike(rseg); outimg.Copy(img); for (int i = 0; i < cseg.Length1d(); i++) { int val = rseg.At1d(i); cseg.Put1d(i, map[val]); if (val > 0 && map[val] == 0) { outimg.Put1d(i, 255); } } }
public static void binary_erode_rect(Bytearray image, int rw, int rh) { if (rw == 0 && rh == 0) { return; } Bytearray outa = new Bytearray();; outa.Copy(image); for (int i = 0; i < rw; i++) { binary_and(outa, image, i - rw / 2, 0); } for (int j = 0; j < rh; j++) { binary_and(image, outa, 0, j - rh / 2); } }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { Logger.Default.Image("segmenting", inraw); OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); segmenter.SetImage(image); segmenter.FindAllCuts(); segmenter.FindBestCuts(); Intarray seg = new Intarray(); seg.Copy(image); for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int w = seg.Dim(0); int c = segmenter.bestcuts[r]; Narray<Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) continue; seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, seg); SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray segmentation, Bytearray image) { Bytearray timage = new Bytearray(); timage.Copy(image); //for (int i = 0; i < image.Length(); i++) image[i] = (byte)(image[i] > 0 ? 0 : 1); OcrRoutine.binarize_simple(timage); OcrRoutine.Invert(image); Skeleton.Thin(ref timage); //ImgIo.write_image_gray("_thinned.png", timage); ImgMisc.remove_singular_points(ref timage, 2); //ImgIo.write_image_gray("_segmented.png", timage); Intarray tsegmentation = new Intarray(); tsegmentation.Copy(timage); ImgLabels.label_components(ref tsegmentation); SegmRoutine.remove_small_components(tsegmentation, 4, 4); //ImgIo.write_image_packed("_labeled.png", tsegmentation); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, tsegmentation); //ImgIo.write_image_packed("_propagated.png", segmentation); }
public override void Charseg(ref Intarray outimage, Bytearray inimage) { int swidth = PGeti("swidth"); int sheight = PGeti("sheight"); Bytearray image = new Bytearray(); image.Copy(inimage); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); if (swidth > 0 || sheight > 0) Morph.binary_close_rect(image, swidth, sheight); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); for(int i=0; i<outimage.Length1d(); i++) if (outimage.At1d(i) > 0) outimage.Put1d(i, SegmRoutine.cseg_pixel(labels.At1d(i))); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public static void binary_erode_circle(Bytearray image, int r) { if (r == 0) { return; } Bytearray outa = new Bytearray(); outa.Copy(image); for (int i = -r; i <= r; i++) { for (int j = -r; j <= r; j++) { if (i * i + j * j <= r * r) { binary_and(outa, image, i, j); } } } image.Move(outa); }
public static void binary_dilate_rect(Bytearray image, int rw, int rh) { if (rw == 0 && rh == 0) { return; } Bytearray outa = new Bytearray(); outa.Copy(image); // note that we handle the even cases complementary // to erode_rect; this makes open_rect and close_rect // do the right thing for (int i = 0; i < rw; i++) { binary_or(outa, image, i - (rw - 1) / 2, 0); } for (int j = 0; j < rh; j++) { binary_or(image, outa, 0, j - (rh - 1) / 2); } }
public override void Charseg(ref Intarray outimage, Bytearray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); Narray<Rect> boxes = new Narray<Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Intarray equiv = new Intarray(boxes.Length()); for(int i=0; i<boxes.Length(); i++) equiv[i] = i; for(int i=1; i<boxes.Length(); i++) { Rect p = boxes[i]; for(int j=1;j<boxes.Length();j++) { if(i==j) continue; Rect q = boxes[j]; int x0 = Math.Max(p.x0, q.x0); int x1 = Math.Min(p.x1, q.x1); int iw = x1-x0; if(iw <= 0) continue; // no overlap int ow = Math.Min(p.Width(), q.Width()); float frac = iw/(float)(ow); if(frac < 0.5f) continue; // insufficient overlap // printf("%d %d : %d %d : %g\n",i,j,iw,ow,frac); equiv.Put1d(Math.Max(i, j), Math.Min(i, j)); } } for(int i=0; i<labels.Length(); i++) labels.Put1d(i, equiv.At1d(labels.At1d(i))); ImgLabels.renumber_labels(labels, 1); outimage.Move(labels); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public override void Extract(Narray<Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i - 1, j]; a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) xgrad[i, j] = Math.Max(xgrad[i, j], 0f); else xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i, j - 1]; a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) ygrad[i, j] = Math.Max(ygrad[i, j], 0f); else ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for(int i=0; i<n; i++) { sigma = step * i; if(bs > 0) OcrRoutine.binsmooth(binary, input, sigma); else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }
public override void Binarize(Bytearray bin_image, Bytearray gray_image) { w = PGeti("w"); k = (float)PGetf("k"); whalf = w >> 1; // fprintf(stderr,"[sauvola %g %d]\n",k,w); if (k < 0.001 || k > 0.999) { throw new Exception("Binarize: CHECK_ARG(k>=0.001 && k<=0.999)"); } if (w == 0 || k >= 1000) { throw new Exception("Binarize: CHECK_ARG(w>0 && k<1000)"); } if (bin_image.Length1d() != gray_image.Length1d()) { bin_image.MakeLike(gray_image); } if (NarrayUtil.contains_only(gray_image, (byte)0, (byte)255)) { bin_image.Copy(gray_image); return; } int image_width = gray_image.Dim(0); int image_height = gray_image.Dim(1); whalf = w >> 1; // Calculate the integral image, and integral of the squared image Narray <long> integral_image = new Narray <long>(), rowsum_image = new Narray <long>(); Narray <long> integral_sqimg = new Narray <long>(), rowsum_sqimg = new Narray <long>(); integral_image.MakeLike(gray_image); rowsum_image.MakeLike(gray_image); integral_sqimg.MakeLike(gray_image); rowsum_sqimg.MakeLike(gray_image); int xmin, ymin, xmax, ymax; double diagsum, idiagsum, diff, sqdiagsum, sqidiagsum, sqdiff, area; double mean, std, threshold; for (int j = 0; j < image_height; j++) { rowsum_image[0, j] = gray_image[0, j]; rowsum_sqimg[0, j] = gray_image[0, j] * gray_image[0, j]; } for (int i = 1; i < image_width; i++) { for (int j = 0; j < image_height; j++) { rowsum_image[i, j] = rowsum_image[i - 1, j] + gray_image[i, j]; rowsum_sqimg[i, j] = rowsum_sqimg[i - 1, j] + gray_image[i, j] * gray_image[i, j]; } } for (int i = 0; i < image_width; i++) { integral_image[i, 0] = rowsum_image[i, 0]; integral_sqimg[i, 0] = rowsum_sqimg[i, 0]; } for (int i = 0; i < image_width; i++) { for (int j = 1; j < image_height; j++) { integral_image[i, j] = integral_image[i, j - 1] + rowsum_image[i, j]; integral_sqimg[i, j] = integral_sqimg[i, j - 1] + rowsum_sqimg[i, j]; } } //Calculate the mean and standard deviation using the integral image for (int i = 0; i < image_width; i++) { for (int j = 0; j < image_height; j++) { xmin = Math.Max(0, i - whalf); ymin = Math.Max(0, j - whalf); xmax = Math.Min(image_width - 1, i + whalf); ymax = Math.Min(image_height - 1, j + whalf); area = (xmax - xmin + 1) * (ymax - ymin + 1); // area can't be 0 here // proof (assuming whalf >= 0): // we'll prove that (xmax-xmin+1) > 0, // (ymax-ymin+1) is analogous // It's the same as to prove: xmax >= xmin // image_width - 1 >= 0 since image_width > i >= 0 // i + whalf >= 0 since i >= 0, whalf >= 0 // i + whalf >= i - whalf since whalf >= 0 // image_width - 1 >= i - whalf since image_width > i // --IM if (area <= 0) { throw new Exception("Binarize: area can't be 0 here"); } if (xmin == 0 && ymin == 0) { // Point at origin diff = integral_image[xmax, ymax]; sqdiff = integral_sqimg[xmax, ymax]; } else if (xmin == 0 && ymin > 0) { // first column diff = integral_image[xmax, ymax] - integral_image[xmax, ymin - 1]; sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmax, ymin - 1]; } else if (xmin > 0 && ymin == 0) { // first row diff = integral_image[xmax, ymax] - integral_image[xmin - 1, ymax]; sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmin - 1, ymax]; } else { // rest of the image diagsum = integral_image[xmax, ymax] + integral_image[xmin - 1, ymin - 1]; idiagsum = integral_image[xmax, ymin - 1] + integral_image[xmin - 1, ymax]; diff = diagsum - idiagsum; sqdiagsum = integral_sqimg[xmax, ymax] + integral_sqimg[xmin - 1, ymin - 1]; sqidiagsum = integral_sqimg[xmax, ymin - 1] + integral_sqimg[xmin - 1, ymax]; sqdiff = sqdiagsum - sqidiagsum; } mean = diff / area; std = Math.Sqrt((sqdiff - diff * diff / area) / (area - 1)); threshold = mean * (1 + k * ((std / 128) - 1)); if (gray_image[i, j] < threshold) { bin_image[i, j] = 0; } else { bin_image[i, j] = (byte)(MAXVAL - 1); } } } if (PGeti("debug_binarize") > 0) { ImgIo.write_image_gray("debug_binarize.png", bin_image); } }
public void SetImage(Bytearray image_) { Bytearray image = new Bytearray(); //image = image_; image.Copy(image_); dimage.Copy(image); if (PGeti("fill_holes") > 0) { Bytearray holes = new Bytearray(); SegmRoutine.extract_holes(ref holes, image); for (int i = 0; i < image.Length(); i++) if (holes.At1d(i) > 0) image.Put1d(i, 255); } int w = image.Dim(0), h = image.Dim(1); wimage.Resize(w, h); wimage.Fill(0); float s1 = 0.0f, sy = 0.0f; for (int i = 1; i < w; i++) for (int j = 0; j < h; j++) { if (image[i, j] > 0) { s1++; sy += j; } if (image[i, j] > 0) wimage[i, j] = inside_weight; else wimage[i, j] = outside_weight; } if(s1==0) where = image.Dim(1)/2; else where = (int)(sy / s1); for (int i = 0; i < dimage.Dim(0); i++) dimage[i, where] = 0x008000; }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray<Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) continue; seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) if (segmentation.At1d(i) == 0) seg.Put1d(i, 0); ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
public override void Binarize(Bytearray bin_image, Bytearray gray_image) { if (bin_image.Length1d() != gray_image.Length1d()) { bin_image.MakeLike(gray_image); } if (NarrayUtil.contains_only(gray_image, (byte)0, (byte)255)) { bin_image.Copy(gray_image); return; } int image_width = gray_image.Dim(0); int image_height = gray_image.Dim(1); int[] hist = new int[MAXVAL]; double[] pdf = new double[MAXVAL]; //probability distribution double[] cdf = new double[MAXVAL]; //cumulative probability distribution double[] myu = new double[MAXVAL]; // mean value for separation double max_sigma; double[] sigma = new double[MAXVAL]; // inter-class variance /* Histogram generation */ for (int i = 0; i < MAXVAL; i++) { hist[i] = 0; } for (int x = 0; x < image_width; x++) { for (int y = 0; y < image_height; y++) { hist[gray_image[x, y]]++; } } /* calculation of probability density */ for (int i = 0; i < MAXVAL; i++) { pdf[i] = (double)hist[i] / (image_width * image_height); } /* cdf & myu generation */ cdf[0] = pdf[0]; myu[0] = 0.0; /* 0.0 times prob[0] equals zero */ for (int i = 1; i < MAXVAL; i++) { cdf[i] = cdf[i - 1] + pdf[i]; myu[i] = myu[i - 1] + i * pdf[i]; } /* sigma maximization * sigma stands for inter-class variance * and determines optimal threshold value */ int threshold = 0; max_sigma = 0.0; for (int i = 0; i < MAXVAL - 1; i++) { if (cdf[i] != 0.0 && cdf[i] != 1.0) { double p1p2 = cdf[i] * (1.0 - cdf[i]); double mu1mu2diff = myu[MAXVAL - 1] * cdf[i] - myu[i]; sigma[i] = mu1mu2diff * mu1mu2diff / p1p2; } else { sigma[i] = 0.0; } if (sigma[i] > max_sigma) { max_sigma = sigma[i]; threshold = i; } } for (int x = 0; x < image_width; x++) { for (int y = 0; y < image_height; y++) { if (gray_image[x, y] > threshold) { bin_image[x, y] = (byte)(MAXVAL - 1); } else { bin_image[x, y] = 0; } } } if (PGeti("debug_otsu") > 0) { Logger.Default.Format("Otsu threshold value = {0}\n", threshold); //ImgIo.write_image_gray("debug_otsu.png", bin_image); } }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) { Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); } use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else { p.Values.Fill(0.0f); } } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) { continue; } if (j == reject_class) { continue; } float value = p.Values[index]; if (value <= 0.0f) { continue; } if (value < minprob) { continue; } float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return(rate); }
/// <summary> /// Binarize an image stored in a bytearray. /// Override this if you want to provide a more efficient implementation. /// </summary> public virtual void Binarize(Bytearray outarray, Bytearray gray, Bytearray inarray) { Binarize(outarray, inarray); gray.Copy(inarray); // copy from inarray }
public static void binary_erode_rect(Bytearray image, int rw, int rh) { if(rw==0 && rh==0) return; Bytearray outa = new Bytearray();; outa.Copy(image); for(int i=0; i<rw; i++) binary_and(outa, image, i-rw/2, 0); for(int j=0; j<rh; j++) binary_and(image, outa, 0, j-rh/2); }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return(false); } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } for (int i = 0; i < transcript.Length; i++) { CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); } // compute correspondences between actual segmentation and // ground truth segmentation Narray <Intarray> segments = new Narray <Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) { junk++; } // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) { classifier.Object.XAdd(v, c); } } if (c != reject_class) { IncClass(c); } ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return(true); }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return false; } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); for (int i = 0; i < transcript.Length; i++) CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); // compute correspondences between actual segmentation and // ground truth segmentation Narray<Intarray> segments = new Narray<Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) junk++; // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) classifier.Object.XAdd(v, c); } if (c != reject_class) IncClass(c); ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return true; }
public List <List <float> > SpaceCosts(List <Candidate> candidates, Bytearray image) { /* * Given a list of character recognition candidates and their * classifications, and an image of the corresponding text line, * compute a list of pairs of costs for putting/not putting a space * after each of the candidate characters. * * The basic idea behind this simple algorithm is to try larger * and larger horizontal closing operations until most of the components * start having a "wide" aspect ratio; that's when characters have merged * into words. The remaining whitespace should be spaces. * * This is just a simple stopgap measure; it will be replaced with * trainable space modeling. */ int w = image.Dim(0); int h = image.Dim(1); Bytearray closed = new Bytearray(); int r; for (r = 0; r < maxrange; r++) { if (r > 0) { closed.Copy(image); Morph.binary_close_circle(closed, r); } else { closed.Copy(image); } Intarray labeled = new Intarray(); labeled.Copy(closed); ImgLabels.label_components(ref labeled); Narray <Rect> rects = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rects, labeled); Floatarray aspects = new Floatarray(); for (int i = 0; i < rects.Length(); i++) { Rect rect = rects[i]; float aspect = rect.Aspect(); aspects.Push(aspect); } float maspect = NarrayUtil.Median(aspects); if (maspect >= this.aspect_threshold) { break; } } // close with a little bit of extra space closed.Copy(image); Morph.binary_close_circle(closed, r + 1); // compute the remaining aps //Morph.binary_dilate_circle(); // every character box that ends near a cap gets a space appended return(null); }
public override void Binarize(Bytearray bin_image, Bytearray gray_image) { w = PGeti("w"); k = (float)PGetf("k"); whalf = w >> 1; // fprintf(stderr,"[sauvola %g %d]\n",k,w); if(k<0.001 || k>0.999) throw new Exception("Binarize: CHECK_ARG(k>=0.001 && k<=0.999)"); if(w==0 || k>=1000) throw new Exception("Binarize: CHECK_ARG(w>0 && k<1000)"); if(bin_image.Length1d() != gray_image.Length1d()) bin_image.MakeLike(gray_image); if(NarrayUtil.contains_only(gray_image, (byte)0, (byte)255)) { bin_image.Copy(gray_image); return; } int image_width = gray_image.Dim(0); int image_height = gray_image.Dim(1); whalf = w >> 1; // Calculate the integral image, and integral of the squared image Narray<long> integral_image = new Narray<long>(), rowsum_image = new Narray<long>(); Narray<long> integral_sqimg = new Narray<long>(), rowsum_sqimg = new Narray<long>(); integral_image.MakeLike(gray_image); rowsum_image.MakeLike(gray_image); integral_sqimg.MakeLike(gray_image); rowsum_sqimg.MakeLike(gray_image); int xmin,ymin,xmax,ymax; double diagsum,idiagsum,diff,sqdiagsum,sqidiagsum,sqdiff,area; double mean,std,threshold; for (int j = 0; j < image_height; j++) { rowsum_image[0, j] = gray_image[0, j]; rowsum_sqimg[0, j] = gray_image[0, j] * gray_image[0, j]; } for (int i = 1; i < image_width; i++) { for (int j = 0; j < image_height; j++) { rowsum_image[i, j] = rowsum_image[i - 1, j] + gray_image[i, j]; rowsum_sqimg[i, j] = rowsum_sqimg[i - 1, j] + gray_image[i, j] * gray_image[i, j]; } } for (int i = 0; i < image_width; i++) { integral_image[i, 0] = rowsum_image[i, 0]; integral_sqimg[i, 0] = rowsum_sqimg[i, 0]; } for (int i = 0; i < image_width; i++) { for (int j = 1; j < image_height; j++) { integral_image[i, j] = integral_image[i, j - 1] + rowsum_image[i, j]; integral_sqimg[i, j] = integral_sqimg[i, j - 1] + rowsum_sqimg[i, j]; } } //Calculate the mean and standard deviation using the integral image for(int i=0; i<image_width; i++){ for(int j=0; j<image_height; j++){ xmin = Math.Max(0,i-whalf); ymin = Math.Max(0, j - whalf); xmax = Math.Min(image_width - 1, i + whalf); ymax = Math.Min(image_height - 1, j + whalf); area = (xmax-xmin+1)*(ymax-ymin+1); // area can't be 0 here // proof (assuming whalf >= 0): // we'll prove that (xmax-xmin+1) > 0, // (ymax-ymin+1) is analogous // It's the same as to prove: xmax >= xmin // image_width - 1 >= 0 since image_width > i >= 0 // i + whalf >= 0 since i >= 0, whalf >= 0 // i + whalf >= i - whalf since whalf >= 0 // image_width - 1 >= i - whalf since image_width > i // --IM if (area <= 0) throw new Exception("Binarize: area can't be 0 here"); if (xmin == 0 && ymin == 0) { // Point at origin diff = integral_image[xmax, ymax]; sqdiff = integral_sqimg[xmax, ymax]; } else if (xmin == 0 && ymin > 0) { // first column diff = integral_image[xmax, ymax] - integral_image[xmax, ymin - 1]; sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmax, ymin - 1]; } else if (xmin > 0 && ymin == 0) { // first row diff = integral_image[xmax, ymax] - integral_image[xmin - 1, ymax]; sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmin - 1, ymax]; } else { // rest of the image diagsum = integral_image[xmax, ymax] + integral_image[xmin - 1, ymin - 1]; idiagsum = integral_image[xmax, ymin - 1] + integral_image[xmin - 1, ymax]; diff = diagsum - idiagsum; sqdiagsum = integral_sqimg[xmax, ymax] + integral_sqimg[xmin - 1, ymin - 1]; sqidiagsum = integral_sqimg[xmax, ymin - 1] + integral_sqimg[xmin - 1, ymax]; sqdiff = sqdiagsum - sqidiagsum; } mean = diff/area; std = Math.Sqrt((sqdiff - diff*diff/area)/(area-1)); threshold = mean*(1+k*((std/128)-1)); if(gray_image[i,j] < threshold) bin_image[i,j] = 0; else bin_image[i,j] = (byte)(MAXVAL-1); } } if(PGeti("debug_binarize") > 0) { ImgIo.write_image_gray("debug_binarize.png", bin_image); } }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else p.Values.Fill(0.0f); } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) continue; if (j == reject_class) continue; float value = p.Values[index]; if (value <= 0.0f) continue; if (value < minprob) continue; float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return rate; }
public override void Binarize(Bytearray bin_image, Bytearray gray_image) { if(bin_image.Length1d() != gray_image.Length1d()) bin_image.MakeLike(gray_image); if(NarrayUtil.contains_only(gray_image, (byte)0, (byte)255)) { bin_image.Copy(gray_image); return; } int image_width = gray_image.Dim(0); int image_height = gray_image.Dim(1); int[] hist = new int[MAXVAL]; double[] pdf = new double[MAXVAL]; //probability distribution double[] cdf = new double[MAXVAL]; //cumulative probability distribution double[] myu = new double[MAXVAL]; // mean value for separation double max_sigma; double[] sigma = new double[MAXVAL]; // inter-class variance /* Histogram generation */ for(int i=0; i<MAXVAL; i++){ hist[i] = 0; } for(int x=0; x<image_width; x++){ for(int y=0; y<image_height; y++){ hist[gray_image[x,y]]++; } } /* calculation of probability density */ for(int i=0; i<MAXVAL; i++){ pdf[i] = (double)hist[i] / (image_width * image_height); } /* cdf & myu generation */ cdf[0] = pdf[0]; myu[0] = 0.0; /* 0.0 times prob[0] equals zero */ for(int i=1; i<MAXVAL; i++){ cdf[i] = cdf[i-1] + pdf[i]; myu[i] = myu[i-1] + i*pdf[i]; } /* sigma maximization sigma stands for inter-class variance and determines optimal threshold value */ int threshold = 0; max_sigma = 0.0; for(int i=0; i<MAXVAL-1; i++){ if(cdf[i] != 0.0 && cdf[i] != 1.0){ double p1p2 = cdf[i]*(1.0 - cdf[i]); double mu1mu2diff = myu[MAXVAL-1]*cdf[i]-myu[i]; sigma[i] = mu1mu2diff * mu1mu2diff / p1p2; } else sigma[i] = 0.0; if(sigma[i] > max_sigma){ max_sigma = sigma[i]; threshold = i; } } for(int x=0; x<image_width; x++){ for(int y=0; y<image_height; y++){ if (gray_image[x,y] > threshold) bin_image[x,y] = (byte)(MAXVAL-1); else bin_image[x,y] = 0; } } if(PGeti("debug_otsu") > 0) { Logger.Default.Format("Otsu threshold value = {0}\n", threshold); //ImgIo.write_image_gray("debug_otsu.png", bin_image); } }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray <Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) { if (segmentation.At1d(i) == 0) { seg.Put1d(i, 0); } } ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i - 1, j]; } a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) { xgrad[i, j] = Math.Max(xgrad[i, j], 0f); } else { xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i, j - 1]; } a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) { ygrad[i, j] = Math.Max(ygrad[i, j], 0f); } else { ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for (int i = 0; i < n; i++) { sigma = step * i; if (bs > 0) { OcrRoutine.binsmooth(binary, input, sigma); } else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }