/// <summary> /// Set a segmentation. /// </summary> public override void SetSegmentation(Intarray segmentation) { maxrange = PGeti("maxrange"); maxdist = PGeti("maxdist"); fullheight = PGetb("fullheight"); labels.Copy(segmentation); SegmRoutine.make_line_segmentation_black(labels); GrouperRoutine.check_approximately_sorted(labels); boxes.Dealloc(); segments.Dealloc(); class_outputs.Dealloc(); class_costs.Dealloc(); spaces.Dealloc(); computeGroups(); }
public override void Charseg(ref Intarray outimage, Bytearray inimage) { int swidth = PGeti("swidth"); int sheight = PGeti("sheight"); Bytearray image = new Bytearray(); image.Copy(inimage); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); if (swidth > 0 || sheight > 0) { Morph.binary_close_rect(image, swidth, sheight); } Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); for (int i = 0; i < outimage.Length1d(); i++) { if (outimage.At1d(i) > 0) { outimage.Put1d(i, SegmRoutine.cseg_pixel(labels.At1d(i))); } } SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public override void Arcs(Intarray out_inputs, Intarray out_targets, Intarray out_outputs, Floatarray out_costs, int from) { out_inputs.Copy(m_inputs[from]); out_targets.Copy(m_targets[from]); out_outputs.Copy(m_outputs[from]); out_costs.Copy(m_costs[from]); }
public List<List<float>> SpaceCosts(List<Candidate> candidates, Bytearray image) { /* Given a list of character recognition candidates and their classifications, and an image of the corresponding text line, compute a list of pairs of costs for putting/not putting a space after each of the candidate characters. The basic idea behind this simple algorithm is to try larger and larger horizontal closing operations until most of the components start having a "wide" aspect ratio; that's when characters have merged into words. The remaining whitespace should be spaces. This is just a simple stopgap measure; it will be replaced with trainable space modeling. */ int w = image.Dim(0); int h = image.Dim(1); Bytearray closed = new Bytearray(); int r; for (r = 0; r < maxrange; r++) { if (r > 0) { closed.Copy(image); Morph.binary_close_circle(closed, r); } else closed.Copy(image); Intarray labeled = new Intarray(); labeled.Copy(closed); ImgLabels.label_components(ref labeled); Narray<Rect> rects = new Narray<Rect>(); ImgLabels.bounding_boxes(ref rects, labeled); Floatarray aspects = new Floatarray(); for (int i = 0; i < rects.Length(); i++) { Rect rect = rects[i]; float aspect = rect.Aspect(); aspects.Push(aspect); } float maspect = NarrayUtil.Median(aspects); if (maspect >= this.aspect_threshold) break; } // close with a little bit of extra space closed.Copy(image); Morph.binary_close_circle(closed, r+1); // compute the remaining aps //Morph.binary_dilate_circle(); // every character box that ends near a cap gets a space appended return null; }
public override void Charseg(ref Intarray outimage, Bytearray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Intarray equiv = new Intarray(boxes.Length()); for (int i = 0; i < boxes.Length(); i++) { equiv[i] = i; } for (int i = 1; i < boxes.Length(); i++) { Rect p = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { if (i == j) { continue; } Rect q = boxes[j]; int x0 = Math.Max(p.x0, q.x0); int x1 = Math.Min(p.x1, q.x1); int iw = x1 - x0; if (iw <= 0) { continue; // no overlap } int ow = Math.Min(p.Width(), q.Width()); float frac = iw / (float)(ow); if (frac < 0.5f) { continue; // insufficient overlap } // printf("%d %d : %d %d : %g\n",i,j,iw,ow,frac); equiv.Put1d(Math.Max(i, j), Math.Min(i, j)); } } for (int i = 0; i < labels.Length(); i++) { labels.Put1d(i, equiv.At1d(labels.At1d(i))); } ImgLabels.renumber_labels(labels, 1); outimage.Move(labels); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public void PutCharSegmentation(Intarray image, int page, int line, string variant = null) { string v = "cseg"; if (!String.IsNullOrEmpty(variant)) { v += "."; v += variant; } Intarray simage = new Intarray(); simage.Copy(image); SegmRoutine.make_line_segmentation_white(simage); PutLine(simage, page, line, v); }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { Logger.Default.Image("segmenting", inraw); OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); segmenter.SetImage(image); segmenter.FindAllCuts(); segmenter.FindBestCuts(); Intarray seg = new Intarray(); seg.Copy(image); for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int w = seg.Dim(0); int c = segmenter.bestcuts[r]; Narray <Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, seg); SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray result_segmentation, Bytearray orig_image) { Bytearray image = new Bytearray(); image.Copy(orig_image); OcrRoutine.optional_check_background_is_lighter(image); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); base.Charseg(ref result_segmentation, orig_image); SegmRoutine.combine_segmentations(ref result_segmentation, ccseg); }
public void PutPageSegmentation(Intarray image, int page, string variant = null) { string v = "pseg"; if (!String.IsNullOrEmpty(variant)) { v += "."; v += variant; } Intarray simage = new Intarray(); simage.Copy(image); SegmRoutine.check_page_segmentation(simage); SegmRoutine.make_page_segmentation_white(simage); PutPage(simage, page, v); }
public static Bitmap ConvertCharsegToBitmapRecolor(Intarray charseg, string trans = "") { Intarray cseg = new Intarray(); cseg.Copy(charseg); Narray<Rect> bboxes = new Narray<Rect>(); //SegmRoutine.make_line_segmentation_black(cseg); ImgLabels.bounding_boxes(ref bboxes, cseg); SegmRoutine.make_line_segmentation_white(cseg); ImgLabels.simple_recolor(cseg); return DrawSegmentTranscript( DrawSegmentNumbers( ImgRoutine.NarrayToRgbBitmap(cseg), bboxes), bboxes, trans); }
public Dataset8(Narray <sbyte> data, Intarray classes) : this() { data.Copy(data); classes.Copy(classes); if (classes.Length() > 0) { nc = NarrayUtil.Max(classes) + 1; nf = data.Dim(1); //CHECK_ARG(NarrayUtil.Min(data) > -100 && NarrayUtil.Max(data) < 100, "min(data)>-100 && max(data)<100"); CHECK_ARG(NarrayUtil.Min(classes) >= -1 && NarrayUtil.Max(classes) < 10000, "min(classes)>=-1 && max(classes)<10000"); } else { nc = 0; nf = -1; } }
public Dataset8(Narray<sbyte> data, Intarray classes) : this() { data.Copy(data); classes.Copy(classes); if (classes.Length() > 0) { nc = NarrayUtil.Max(classes) + 1; nf = data.Dim(1); //CHECK_ARG(NarrayUtil.Min(data) > -100 && NarrayUtil.Max(data) < 100, "min(data)>-100 && max(data)<100"); CHECK_ARG(NarrayUtil.Min(classes) >= -1 && NarrayUtil.Max(classes) < 10000, "min(classes)>=-1 && max(classes)<10000"); } else { nc = 0; nf = -1; } }
public static Bitmap ConvertCharsegToBitmapRecolor(Intarray charseg, string trans = "") { Intarray cseg = new Intarray(); cseg.Copy(charseg); Narray <Rect> bboxes = new Narray <Rect>(); //SegmRoutine.make_line_segmentation_black(cseg); ImgLabels.bounding_boxes(ref bboxes, cseg); SegmRoutine.make_line_segmentation_white(cseg); ImgLabels.simple_recolor(cseg); return(DrawSegmentTranscript( DrawSegmentNumbers( ImgRoutine.NarrayToRgbBitmap(cseg), bboxes), bboxes, trans)); }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { Logger.Default.Image("segmenting", inraw); OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); segmenter.SetImage(image); segmenter.FindAllCuts(); segmenter.FindBestCuts(); Intarray seg = new Intarray(); seg.Copy(image); for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int w = seg.Dim(0); int c = segmenter.bestcuts[r]; Narray<Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) continue; seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, seg); SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray segmentation, Bytearray image) { Bytearray timage = new Bytearray(); timage.Copy(image); //for (int i = 0; i < image.Length(); i++) image[i] = (byte)(image[i] > 0 ? 0 : 1); OcrRoutine.binarize_simple(timage); OcrRoutine.Invert(image); Skeleton.Thin(ref timage); //ImgIo.write_image_gray("_thinned.png", timage); ImgMisc.remove_singular_points(ref timage, 2); //ImgIo.write_image_gray("_segmented.png", timage); Intarray tsegmentation = new Intarray(); tsegmentation.Copy(timage); ImgLabels.label_components(ref tsegmentation); SegmRoutine.remove_small_components(tsegmentation, 4, 4); //ImgIo.write_image_packed("_labeled.png", tsegmentation); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, tsegmentation); //ImgIo.write_image_packed("_propagated.png", segmentation); }
public override void Charseg(ref Intarray outimage, Bytearray inimage) { int swidth = PGeti("swidth"); int sheight = PGeti("sheight"); Bytearray image = new Bytearray(); image.Copy(inimage); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); if (swidth > 0 || sheight > 0) Morph.binary_close_rect(image, swidth, sheight); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); for(int i=0; i<outimage.Length1d(); i++) if (outimage.At1d(i) > 0) outimage.Put1d(i, SegmRoutine.cseg_pixel(labels.At1d(i))); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public void EstimateSpaceSize() { Intarray labels = new Intarray(); labels.Copy(segmentation); ImgLabels.label_components(ref labels); Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Floatarray distances = new Floatarray(); distances.Resize(boxes.Length()); distances.Fill(99999f); for (int i = 1; i < boxes.Length(); i++) { Rect b = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { Rect n = boxes[j]; int delta = n.x0 - b.x1; if (delta < 0) { continue; } if (delta >= distances[i]) { continue; } distances[i] = delta; } } float interchar = NarrayUtil.Fractile(distances, PGetf("space_fractile")); space_threshold = interchar * PGetf("space_multiplier"); // impose some reasonable upper and lower bounds float xheight = 10.0f; // FIXME space_threshold = Math.Max(space_threshold, PGetf("space_min") * xheight); space_threshold = Math.Min(space_threshold, PGetf("space_max") * xheight); }
public override void Charseg(ref Intarray outimage, Bytearray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); Narray<Rect> boxes = new Narray<Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Intarray equiv = new Intarray(boxes.Length()); for(int i=0; i<boxes.Length(); i++) equiv[i] = i; for(int i=1; i<boxes.Length(); i++) { Rect p = boxes[i]; for(int j=1;j<boxes.Length();j++) { if(i==j) continue; Rect q = boxes[j]; int x0 = Math.Max(p.x0, q.x0); int x1 = Math.Min(p.x1, q.x1); int iw = x1-x0; if(iw <= 0) continue; // no overlap int ow = Math.Min(p.Width(), q.Width()); float frac = iw/(float)(ow); if(frac < 0.5f) continue; // insufficient overlap // printf("%d %d : %d %d : %g\n",i,j,iw,ow,frac); equiv.Put1d(Math.Max(i, j), Math.Min(i, j)); } } for(int i=0; i<labels.Length(); i++) labels.Put1d(i, equiv.At1d(labels.At1d(i))); ImgLabels.renumber_labels(labels, 1); outimage.Move(labels); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public static void remove_small_components <T>(Narray <T> bimage, int mw, int mh) { Intarray image = new Intarray(); image.Copy(bimage); ImgLabels.label_components(ref image); Narray <Rect> rects = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rects, image); Bytearray good = new Bytearray(rects.Length()); for (int i = 0; i < good.Length(); i++) { good[i] = 1; } for (int i = 0; i < rects.Length(); i++) { if (rects[i].Width() < mw && rects[i].Height() < mh) { // printf("*** %d %d %d\n",i,rects[i].width(),rects[i].height()); good[i] = 0; } } for (int i = 0; i < image.Length1d(); i++) { if (good[image.At1d(i)] == 0) { image.Put1d(i, 0); } } for (int i = 0; i < image.Length1d(); i++) { if (image.At1d(i) == 0) { bimage.Put1d(i, default(T)); // default(T) - 0 } } }
public static void extract_holes(ref Bytearray holes, Bytearray binarized) { Intarray temp = new Intarray(); temp.Copy(binarized); NarrayUtil.Sub(255, temp); ImgLabels.label_components(ref temp); int background = -1; for (int i = 0; i < temp.Dim(0); i++) { if (temp[i, 0] != 0) { background = temp[i, 0]; break; } } holes.MakeLike(temp); holes.Fill((byte)0); if (background <= 0) { throw new Exception("extract_holes: background must be more 0"); } for (int i = 0; i < temp.Dim(0); i++) { for (int j = 0; j < temp.Dim(1); j++) { if (temp[i, j] > 0 && temp[i, j] != background) { holes[i, j] = 255; } } } /*fprintf(stderr, "segholes\n"); * dsection("segholes"); * dshow(holes, "y");*/ }
public override void SetImage(Bytearray image) { dimage.Copy(image); int w = image.Dim(0), h = image.Dim(1); wimage.Resize(w, h); wimage.Fill(0); float s1 = 0.0f, sy = 0.0f; for (int i = 1; i < w; i++) { for (int j = 0; j < h; j++) { if (image[i, j] > 0) { s1++; sy += j; } if (image[i - 1, j] == 0 && image[i, j] > 0) { wimage[i, j] = boundary_weight; } else if (image[i, j] > 0) { wimage[i, j] = inside_weight; } else { wimage[i, j] = outside_weight; } } } where = (int)(sy / s1); for (int i = 0; i < dimage.Dim(0); i++) { dimage[i, where] = 0x008000; } }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray <Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) { if (segmentation.At1d(i) == 0) { seg.Put1d(i, 0); } } ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
/// <summary> /// Return a list of all segments /// </summary> public override void GetSegments(Intarray result, int index) { result.Copy(segments[index]); }
public void EstimateSpaceSize() { Intarray labels = new Intarray(); labels.Copy(segmentation); ImgLabels.label_components(ref labels); Narray<Rect> boxes = new Narray<Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Floatarray distances = new Floatarray(); distances.Resize(boxes.Length()); distances.Fill(99999f); for (int i = 1; i < boxes.Length(); i++) { Rect b = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { Rect n = boxes[j]; int delta = n.x0 - b.x1; if (delta < 0) continue; if (delta >= distances[i]) continue; distances[i] = delta; } } float interchar = NarrayUtil.Fractile(distances, PGetf("space_fractile")); space_threshold = interchar * PGetf("space_multiplier"); // impose some reasonable upper and lower bounds float xheight = 10.0f; // FIXME space_threshold = Math.Max(space_threshold, PGetf("space_min") * xheight); space_threshold = Math.Min(space_threshold, PGetf("space_max") * xheight); }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else p.Values.Fill(0.0f); } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) continue; if (j == reject_class) continue; float value = p.Values[index]; if (value <= 0.0f) continue; if (value < minprob) continue; float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return rate; }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray<Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) continue; seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) if (segmentation.At1d(i) == 0) seg.Put1d(i, 0); ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) { Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); } use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else { p.Values.Fill(0.0f); } } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) { continue; } if (j == reject_class) { continue; } float value = p.Values[index]; if (value <= 0.0f) { continue; } if (value < minprob) { continue; } float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return(rate); }
public List <List <float> > SpaceCosts(List <Candidate> candidates, Bytearray image) { /* * Given a list of character recognition candidates and their * classifications, and an image of the corresponding text line, * compute a list of pairs of costs for putting/not putting a space * after each of the candidate characters. * * The basic idea behind this simple algorithm is to try larger * and larger horizontal closing operations until most of the components * start having a "wide" aspect ratio; that's when characters have merged * into words. The remaining whitespace should be spaces. * * This is just a simple stopgap measure; it will be replaced with * trainable space modeling. */ int w = image.Dim(0); int h = image.Dim(1); Bytearray closed = new Bytearray(); int r; for (r = 0; r < maxrange; r++) { if (r > 0) { closed.Copy(image); Morph.binary_close_circle(closed, r); } else { closed.Copy(image); } Intarray labeled = new Intarray(); labeled.Copy(closed); ImgLabels.label_components(ref labeled); Narray <Rect> rects = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rects, labeled); Floatarray aspects = new Floatarray(); for (int i = 0; i < rects.Length(); i++) { Rect rect = rects[i]; float aspect = rect.Aspect(); aspects.Push(aspect); } float maspect = NarrayUtil.Median(aspects); if (maspect >= this.aspect_threshold) { break; } } // close with a little bit of extra space closed.Copy(image); Morph.binary_close_circle(closed, r + 1); // compute the remaining aps //Morph.binary_dilate_circle(); // every character box that ends near a cap gets a space appended return(null); }