private void ProcessSegmentationMethod(object sender, RoutedEventArgs e) { string strSermenterName = (sender as Control).Tag.ToString(); ISegmentLine segmenter = ComponentCreator.MakeComponent <ISegmentLine>(strSermenterName); if (segmenter == null || currBookLine == null) { return; } // приведем к чернобелому Bytearray image = currBookLine.ImageBytearray; //IBinarize binarizer = new BinarizeByOtsu(); //binarizer.Binarize(image, image); OcrRoutine.binarize_simple(image, image); // сегментация Intarray charseg = new Intarray(); segmenter.Charseg(ref charseg, image); // фон равен 0 SegmRoutine.make_line_segmentation_black(charseg); // удалим маленькие сегменты SegmRoutine.remove_small_components(charseg, 3, 3); ImgLabels.renumber_labels(charseg, 1); currBookLine.CharsegIntarray = charseg; CurrSegmentsCount = NarrayUtil.Max(charseg); // Show segmented image ShowCharsegImage(charseg, (currBookLine.HaveTranscript && CurrSegmentsCount == currBookLine.Transcript.Length) ? currBookLine.Transcript : ""); // to enable save button EnableCharsegCmdButtons(); }
public static void remove_small_components(Intarray segmentation, int w = 5, int h = 4) { if (NarrayUtil.Max(segmentation) > 100000) { throw new Exception("remove_small_components: to many segments"); } Narray <Rect> bboxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref bboxes, segmentation); for (int i = 1; i < bboxes.Length(); i++) { Rect b = bboxes[i]; if (b.Width() < w && b.Height() < h) { for (int x = b.x0; x < b.x1; x++) { for (int y = b.y0; y < b.y1; y++) { if (segmentation[x, y] == i) { segmentation[x, y] = 0; } } } } } }
public void SetLine(Bytearray image) { CHECK_ARG(image.Dim(1) < PGeti("maxheight"), "image.Dim(1) < PGeti(\"maxheight\")"); // run the segmenter /*Narray<Rect> bboxes = new Narray<Rect>(); * Intarray iar = new Intarray(); * iar.Copy(image); * ImgLabels.bounding_boxes(ref bboxes, iar);*/ //Console.WriteLine("IMG SETLINE: imin:{0} imax:{1}", NarrayUtil.ArgMin(iar), NarrayUtil.ArgMax(iar)); //Console.WriteLine("INDEX_BLACK:{0} {1} {2} {3}", bboxes[0].x0, bboxes[0].y0, bboxes[0].x1, bboxes[0].y1); //ImgIo.write_image_gray("image.png", image); OcrRoutine.binarize_simple(binarized, image); segmenter.Object.Charseg(ref segmentation, binarized); /*Intarray segm = new Intarray(); * segm.Copy(segmentation); * ImgLabels.simple_recolor(segm); * ImgIo.write_image_packed("segm_image.png", segm);*/ //NarrayUtil.Sub(255, binarized); SegmRoutine.make_line_segmentation_black(segmentation); SegmRoutine.remove_small_components(segmentation, 3, 3); // i add this line ImgLabels.renumber_labels(segmentation, 1); // set up the grouper grouper.Object.SetSegmentation(segmentation); }
public static void check_approximately_sorted(Intarray labels) { for (int i = 0; i < labels.Length1d(); i++) { if (labels.At1d(i) > 100000) { throw new Exception("labels out of range"); } } Narray <Rect> rboxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rboxes, labels); #if false // TODO/tmb disabling check until the overseg issue is fixed --tmb for (int i = 1; i < rboxes.Length(); i++) { if (rboxes[i].Right < rboxes[i - 1].Left) { /*errors_log("bad segmentation", labels); * errors_log.recolor("bad segmentation (recolored)", labels); * throw_fmt("boxes aren't approximately sorted: " * "box %d is to the left from box %d", i, i-1);*/ } } #endif }
public override void Charseg(ref Intarray outimage, Bytearray inimage) { int swidth = PGeti("swidth"); int sheight = PGeti("sheight"); Bytearray image = new Bytearray(); image.Copy(inimage); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); if (swidth > 0 || sheight > 0) { Morph.binary_close_rect(image, swidth, sheight); } Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); for (int i = 0; i < outimage.Length1d(); i++) { if (outimage.At1d(i) > 0) { outimage.Put1d(i, SegmRoutine.cseg_pixel(labels.At1d(i))); } } SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public void TestSimple() { Global.SetEnv("debug", Global.GetEnv("debug") + ""); // image file name to recognize string imgFileName = "line.png"; string imgCsegFileName = "line.cseg.png"; string imgTranscriptFileName = "line.txt"; // line recognizer Linerec lrec = (Linerec)Linerec.LoadLinerec("default.model"); //Linerec lrec = (Linerec)Linerec.LoadLinerec("2m2-reject.cmodel"); //Linerec lrec = (Linerec)Linerec.LoadLinerec("multi3.cmodel"); //Linerec lrec = (Linerec)Linerec.LoadLinerec("latin-ascii.model"); lrec.Info(); // language model OcroFST default_lmodel = OcroFST.MakeOcroFst(); default_lmodel.Load("default.fst"); OcroFST lmodel = default_lmodel; // read image Bytearray image = new Bytearray(1, 1); ImgIo.read_image_gray(image, imgFileName); // recognize! OcroFST fst = OcroFST.MakeOcroFst(); Intarray rseg = new Intarray(); lrec.RecognizeLine(rseg, fst, image); // show result 1 string resStr; fst.BestPath(out resStr); Console.WriteLine("Fst BestPath: {0}", resStr); // find result 2 Intarray v1 = new Intarray(); Intarray v2 = new Intarray(); Intarray inp = new Intarray(); Intarray outp = new Intarray(); Floatarray c = new Floatarray(); BeamSearch.beam_search(v1, v2, inp, outp, c, fst, lmodel, 100); FstUtil.remove_epsilons(out resStr, outp); Console.WriteLine("Fst BeamSearch: {0}", resStr); Intarray cseg = new Intarray(); SegmRoutine.rseg_to_cseg(cseg, rseg, inp); SegmRoutine.make_line_segmentation_white(cseg); ImgLabels.simple_recolor(cseg); // for human readable ImgIo.write_image_packed(imgCsegFileName, cseg); File.WriteAllText(imgTranscriptFileName, resStr.Replace(" ", "")); }
public override void Charseg(ref Intarray outimage, Bytearray inarray) { Bytearray image = new Bytearray(); image.Copy(inarray); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); outimage.Copy(image); Intarray labels = new Intarray(); labels.Copy(image); ImgLabels.label_components(ref labels); Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Intarray equiv = new Intarray(boxes.Length()); for (int i = 0; i < boxes.Length(); i++) { equiv[i] = i; } for (int i = 1; i < boxes.Length(); i++) { Rect p = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { if (i == j) { continue; } Rect q = boxes[j]; int x0 = Math.Max(p.x0, q.x0); int x1 = Math.Min(p.x1, q.x1); int iw = x1 - x0; if (iw <= 0) { continue; // no overlap } int ow = Math.Min(p.Width(), q.Width()); float frac = iw / (float)(ow); if (frac < 0.5f) { continue; // insufficient overlap } // printf("%d %d : %d %d : %g\n",i,j,iw,ow,frac); equiv.Put1d(Math.Max(i, j), Math.Min(i, j)); } } for (int i = 0; i < labels.Length(); i++) { labels.Put1d(i, equiv.At1d(labels.At1d(i))); } ImgLabels.renumber_labels(labels, 1); outimage.Move(labels); SegmRoutine.make_line_segmentation_white(outimage); SegmRoutine.check_line_segmentation(outimage); }
public static void combine_segmentations(ref Intarray dst, Intarray src) { dst.SameDims(src); int n = NarrayUtil.Max(dst) + 1; for (int i = 0; i < dst.Length1d(); i++) { dst.Put1d(i, (dst.At1d(i) + src.At1d(i) * n)); } ImgLabels.renumber_labels(dst, 1); }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { Logger.Default.Image("segmenting", inraw); OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); segmenter.SetImage(image); segmenter.FindAllCuts(); segmenter.FindBestCuts(); Intarray seg = new Intarray(); seg.Copy(image); for (int r = 0; r < segmenter.bestcuts.Length(); r++) { int w = seg.Dim(0); int c = segmenter.bestcuts[r]; Narray <Point> cut = segmenter.cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, seg); SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray result_segmentation, Bytearray orig_image) { Bytearray image = new Bytearray(); image.Copy(orig_image); OcrRoutine.optional_check_background_is_lighter(image); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); base.Charseg(ref result_segmentation, orig_image); SegmRoutine.combine_segmentations(ref result_segmentation, ccseg); }
public static void line_segmentation_sort_x(Intarray segmentation) { if (NarrayUtil.Max(segmentation) > 100000) { throw new Exception("line_segmentation_merge_small_components: to many segments"); } Narray <Rect> bboxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref bboxes, segmentation); Floatarray x0s = new Floatarray(); unchecked { x0s.Push((float)-999999); } for (int i = 1; i < bboxes.Length(); i++) { if (bboxes[i].Empty()) { x0s.Push(999999); } else { x0s.Push(bboxes[i].x0); } } // dprint(x0s,1000); printf("\n"); Narray <int> permutation = new Intarray(); Narray <int> rpermutation = new Intarray(); NarrayUtil.Quicksort(permutation, x0s); rpermutation.Resize(permutation.Length()); for (int i = 0; i < permutation.Length(); i++) { rpermutation[permutation[i]] = i; } // dprint(rpermutation,1000); printf("\n"); for (int i = 0; i < segmentation.Length1d(); i++) { if (segmentation.At1d(i) == 0) { continue; } segmentation.Put1d(i, rpermutation[segmentation.At1d(i)]); } }
public static Bitmap ConvertCharsegToBitmapRecolor(Intarray charseg, string trans = "") { Intarray cseg = new Intarray(); cseg.Copy(charseg); Narray <Rect> bboxes = new Narray <Rect>(); //SegmRoutine.make_line_segmentation_black(cseg); ImgLabels.bounding_boxes(ref bboxes, cseg); SegmRoutine.make_line_segmentation_white(cseg); ImgLabels.simple_recolor(cseg); return(DrawSegmentTranscript( DrawSegmentNumbers( ImgRoutine.NarrayToRgbBitmap(cseg), bboxes), bboxes, trans)); }
public static void erase_small_components(Floatarray input, float mins = 0.2f, float thresh = 0.25f) { // compute a thresholded image for component labeling float threshold = thresh * NarrayUtil.Max(input); Intarray components = new Intarray(); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) { components[i] = (input[i] > threshold ? 1 : 0); } // compute the number of pixels in each component int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) { totals[components[i]] = totals[components[i]] + 1; } totals[0] = 0; int biggest = NarrayUtil.ArgMax(totals); // erase small components float minsize = mins * totals[biggest]; Bytearray keep = new Bytearray(n + 1); float background = NarrayUtil.Min(input); for (int i = 0; i < keep.Length(); i++) { keep[i] = (byte)(totals[i] > minsize ? 1 : 0); } for (int i = 0; i < input.Length(); i++) { if (keep[components[i]] == 0) { input[i] = background; } } }
public void EstimateSpaceSize() { Intarray labels = new Intarray(); labels.Copy(segmentation); ImgLabels.label_components(ref labels); Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, labels); Floatarray distances = new Floatarray(); distances.Resize(boxes.Length()); distances.Fill(99999f); for (int i = 1; i < boxes.Length(); i++) { Rect b = boxes[i]; for (int j = 1; j < boxes.Length(); j++) { Rect n = boxes[j]; int delta = n.x0 - b.x1; if (delta < 0) { continue; } if (delta >= distances[i]) { continue; } distances[i] = delta; } } float interchar = NarrayUtil.Fractile(distances, PGetf("space_fractile")); space_threshold = interchar * PGetf("space_multiplier"); // impose some reasonable upper and lower bounds float xheight = 10.0f; // FIXME space_threshold = Math.Max(space_threshold, PGetf("space_min") * xheight); space_threshold = Math.Min(space_threshold, PGetf("space_max") * xheight); }
public override void Charseg(ref Intarray segmentation, Bytearray image) { Bytearray timage = new Bytearray(); timage.Copy(image); //for (int i = 0; i < image.Length(); i++) image[i] = (byte)(image[i] > 0 ? 0 : 1); OcrRoutine.binarize_simple(timage); OcrRoutine.Invert(image); Skeleton.Thin(ref timage); //ImgIo.write_image_gray("_thinned.png", timage); ImgMisc.remove_singular_points(ref timage, 2); //ImgIo.write_image_gray("_segmented.png", timage); Intarray tsegmentation = new Intarray(); tsegmentation.Copy(timage); ImgLabels.label_components(ref tsegmentation); SegmRoutine.remove_small_components(tsegmentation, 4, 4); //ImgIo.write_image_packed("_labeled.png", tsegmentation); segmentation.Copy(image); ImgLabels.propagate_labels_to(ref segmentation, tsegmentation); //ImgIo.write_image_packed("_propagated.png", segmentation); }
public static void remove_small_components <T>(Narray <T> bimage, int mw, int mh) { Intarray image = new Intarray(); image.Copy(bimage); ImgLabels.label_components(ref image); Narray <Rect> rects = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rects, image); Bytearray good = new Bytearray(rects.Length()); for (int i = 0; i < good.Length(); i++) { good[i] = 1; } for (int i = 0; i < rects.Length(); i++) { if (rects[i].Width() < mw && rects[i].Height() < mh) { // printf("*** %d %d %d\n",i,rects[i].width(),rects[i].height()); good[i] = 0; } } for (int i = 0; i < image.Length1d(); i++) { if (good[image.At1d(i)] == 0) { image.Put1d(i, 0); } } for (int i = 0; i < image.Length1d(); i++) { if (image.At1d(i) == 0) { bimage.Put1d(i, default(T)); // default(T) - 0 } } }
public static void extract_holes(ref Bytearray holes, Bytearray binarized) { Intarray temp = new Intarray(); temp.Copy(binarized); NarrayUtil.Sub(255, temp); ImgLabels.label_components(ref temp); int background = -1; for (int i = 0; i < temp.Dim(0); i++) { if (temp[i, 0] != 0) { background = temp[i, 0]; break; } } holes.MakeLike(temp); holes.Fill((byte)0); if (background <= 0) { throw new Exception("extract_holes: background must be more 0"); } for (int i = 0; i < temp.Dim(0); i++) { for (int j = 0; j < temp.Dim(1); j++) { if (temp[i, j] > 0 && temp[i, j] != background) { holes[i, j] = 255; } } } /*fprintf(stderr, "segholes\n"); * dsection("segholes"); * dshow(holes, "y");*/ }
/// <summary> /// Compute the groups for a segmentation (internal method). /// </summary> private void computeGroups() { rboxes.Clear(); ImgLabels.bounding_boxes(ref rboxes, labels); int n = rboxes.Length(); // NB: we start with i=1 because i=0 is the background for (int i = 1; i < n; i++) { for (int range = 1; range <= maxrange; range++) { if (i + range > n) { continue; } Rect box = rboxes.At1d(i); Intarray seg = new Intarray(); bool bad = false; for (int j = i; j < i + range; j++) { if (j > i && rboxes.At1d(j).x0 - rboxes.At1d(j - 1).x1 > maxdist) { bad = true; break; } box.Include(rboxes.At1d(j)); seg.Push(j); } if (bad) { continue; } boxes.Push(box); segments.Push(seg); } } }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray <Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) { if (segmentation.At1d(i) == 0) { seg.Put1d(i, 0); } } ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
/// <summary> /// Create char segmentation (cseg) files if missing /// </summary> /// <param name="bookPath">path to bookstore</param> /// <param name="modelFilename">Linerec model file</param> /// <param name="langModel">language model file</param> /// <param name="suffix">e.g., 'gt'</param> public void ComputeMissingCsegForBookStore(string bookPath, string model = "default.model", string suffix = "", bool saveRseg = false, string langModel = "default.fst") { // create line recognizer Linerec linerec = Linerec.LoadLinerec(model); // create IBookStore IBookStore bookstore = new SmartBookStore(); bookstore.SetPrefix(bookPath); bookstore.Info(); // language model OcroFST lmodel = OcroFST.MakeOcroFst(); lmodel.Load(langModel); // iterate lines of pages for (int page = 0; page < bookstore.NumberOfPages(); page++) { int nlines = bookstore.LinesOnPage(page); Console.WriteLine("Page {0} has {1} lines", page, nlines); for (int j = 0; j < nlines; j++) { int line = bookstore.GetLineId(page, j); Bytearray image = new Bytearray(); bookstore.GetLine(image, page, line); Intarray cseg = new Intarray(); bookstore.GetCharSegmentation(cseg, page, line, suffix); // check missing cseg file if (cseg.Length() <= 0 && image.Length() > 0) { // recognize line OcroFST fst = OcroFST.MakeOcroFst(); Intarray rseg = new Intarray(); linerec.RecognizeLine(rseg, fst, image); // find best results string resText; Intarray inp = new Intarray(); Floatarray costs = new Floatarray(); double totalCost = BeamSearch.beam_search(out resText, inp, costs, fst, lmodel, 100); Console.WriteLine(bookstore.PathFile(page, line, suffix)); Console.Write(" beam_search score: {0}", totalCost); /*string resText2; * fst.BestPath(out resText2);*/ // write cseg to bookstore string trans; bookstore.GetLine(out trans, page, line, suffix); resText = resText.Replace(" ", ""); if (String.IsNullOrEmpty(trans)) { bookstore.PutLine(resText, page, line, suffix); Console.Write("; transcript saved"); } else if (trans == resText) { // convert inputs and rseg to cseg SegmRoutine.rseg_to_cseg(cseg, rseg, inp); bookstore.PutCharSegmentation(cseg, page, line, suffix); Console.Write("; cseg saved"); } else if (saveRseg) { // convert inputs and rseg to cseg SegmRoutine.rseg_to_cseg(cseg, rseg, inp); //SegmRoutine.remove_small_components(cseg, 4); /*bookstore.PutCharSegmentation(cseg, page, line, suffix); * Console.Write("; cseg saved");*/ SegmRoutine.make_line_segmentation_white(cseg); ImgLabels.simple_recolor(cseg); string v = "rseg"; if (!String.IsNullOrEmpty(suffix)) { v += "."; v += suffix; } string rsegpath = bookstore.PathFile(page, line, v, "png"); ImgIo.write_image_packed(rsegpath, cseg); Console.Write("; rseg saved"); } Console.WriteLine(); } } } }
static void Main(string[] args) { //MlpClassifier mlp = new MlpClassifier(); /*OcroFST fst = OcroFST.MakeFst("OcroFST"); * fst.Load("ocr-dict-case.fst"); * * Console.WriteLine("{0} ({1})", fst.Name, fst.Interface); * Console.WriteLine("{0}..{1}", fst.GetStart(), fst.nStates()); * fst.Save("test.fst");*/ //TestDataSet ds = new TestDataSet(); /*Console.WriteLine("Interface: {0}", ds.Interface); * Console.WriteLine("Name: {0}", ds.Name); * Console.WriteLine("Description: {0}", ds.Description);*/ /*Console.WriteLine(String.Format("{0:X6}", 15)); * DirPattern dpatt1 = new DirPattern("data2", @"[0-9][0-9][0-9][0-9]"); * Console.WriteLine("Count: {0}", dpatt1.Length); * DirPattern dpatt2 = new DirPattern("data2", @"([0-9][0-9][0-9][0-9])\.png"); * Console.WriteLine("Count: {0}", dpatt2.Length); * * Console.WriteLine("Exist: {0}", * DirPattern.Exist("data2", @"[0-9][0-9][0-9][0-9]", @"([0-9][0-9][0-9][0-9][0-9][0-9])\.png"));*/ //new TestBookStore().RunTest(); Ocronet.Dynamic.Utils.Logger.Default.verbose = true; //new TestLenetWrapper().RunTest(); //TestLenetClassifier testLenet = new TestLenetClassifier(); //testLenet.TestArrays(); //testLenet.TestSaveNetwork(); //testLenet.TestRecognize(); //new TestDataset().TestRowDataset(); TestLinerec testLinerec = new TestLinerec(); //testLinerec.TestSimple(); //testLinerec.TestTrainLatinCseg(); testLinerec.TestTrainLenetCseg(); //testLinerec.TestRecognizeCseg(); //testLinerec.TestComputeMissingCseg(); //testLinerec.TestSimple(); if (args.Length == 0) { Console.WriteLine("Usage: DynamicConsole.exe imagefile"); return; } //args[0] = "scan1.png"; //args[0] = "lenna.jpg"; string fileNameWoExt = Path.GetFileNameWithoutExtension(args[0]); Bytearray image = new Bytearray(1, 1); //Intarray image = new Intarray(1, 1); ImgIo.read_image_gray(image, args[0]); Bytearray binimage = new Bytearray(1, 1); Intarray segmimage = new Intarray(1, 1); IBinarize binarizer = new BinarizeByOtsu(); //binarizer.Set("k", 0.05); //binarizer.Set("w", 5); ISegmentLine segmenter = new DpSegmenter(); binarizer.Binarize(binimage, image); ImgIo.write_image_gray(fileNameWoExt + ".bin.png", binimage); //new BinarizeByOtsu().Binarize(binimage, image); segmenter.Charseg(ref segmimage, binimage); ImgLabels.simple_recolor(segmimage); ImgIo.write_image_packed(fileNameWoExt + ".rseg.png", segmimage); }
protected void rescale(Floatarray v, Floatarray input) { if (input.Rank() != 2) { throw new Exception("CHECK_ARG: sub.Rank()==2"); } Floatarray sub = new Floatarray(); // find the largest connected component // and crop to its bounding box // (use a binary version of the character // to compute the bounding box) Intarray components = new Intarray(); float threshold = PGetf("threshold") * NarrayUtil.Max(input); Global.Debugf("biggestcc", "threshold {0}", threshold); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) { components[i] = (input[i] > threshold ? 1 : 0); } int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) { totals[components[i]]++; } totals[0] = 0; Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, components); int biggest = NarrayUtil.ArgMax(totals); Rect r = boxes[biggest]; int pad = (int)(PGetf("pad") + 0.5f); r.PadBy(pad, pad); Global.Debugf("biggestcc", "({0}) {1}[{2}] :: {3} {4} {5} {6}", n, biggest, totals[biggest], r.x0, r.y0, r.x1, r.y1); // now perform normal feature extraction // (use the original grayscale input) sub = input; ImgMisc.Crop(sub, r); int csize = PGeti("csize"); float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)csize; if (PGetf("noupscale") > 0 && s < 1.0f) { s = 1.0f; } float sig = s * PGetf("aa"); float dx = (csize * s - sub.Dim(0)) / 2f; float dy = (csize * s - sub.Dim(1)) / 2f; if (sig > 1e-3f) { Gauss.Gauss2d(sub, sig, sig); } v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) { continue; } if (y < 0 || y >= sub.Dim(1)) { continue; } float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } /*Global.Debugf("biggestcc", "{0} {1} ({2}) -> {3} {4} ({5})", * sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), * v.Dim(0), v.Dim(1), NarrayUtil.Max(v));*/ }
public List <List <float> > SpaceCosts(List <Candidate> candidates, Bytearray image) { /* * Given a list of character recognition candidates and their * classifications, and an image of the corresponding text line, * compute a list of pairs of costs for putting/not putting a space * after each of the candidate characters. * * The basic idea behind this simple algorithm is to try larger * and larger horizontal closing operations until most of the components * start having a "wide" aspect ratio; that's when characters have merged * into words. The remaining whitespace should be spaces. * * This is just a simple stopgap measure; it will be replaced with * trainable space modeling. */ int w = image.Dim(0); int h = image.Dim(1); Bytearray closed = new Bytearray(); int r; for (r = 0; r < maxrange; r++) { if (r > 0) { closed.Copy(image); Morph.binary_close_circle(closed, r); } else { closed.Copy(image); } Intarray labeled = new Intarray(); labeled.Copy(closed); ImgLabels.label_components(ref labeled); Narray <Rect> rects = new Narray <Rect>(); ImgLabels.bounding_boxes(ref rects, labeled); Floatarray aspects = new Floatarray(); for (int i = 0; i < rects.Length(); i++) { Rect rect = rects[i]; float aspect = rect.Aspect(); aspects.Push(aspect); } float maspect = NarrayUtil.Median(aspects); if (maspect >= this.aspect_threshold) { break; } } // close with a little bit of extra space closed.Copy(image); Morph.binary_close_circle(closed, r + 1); // compute the remaining aps //Morph.binary_dilate_circle(); // every character box that ends near a cap gets a space appended return(null); }
public static void fix_diacritics(Intarray segmentation) { Narray <Rect> bboxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref bboxes, segmentation); if (bboxes.Length() < 1) { return; } Intarray assignments = new Intarray(bboxes.Length()); for (int i = 0; i < assignments.Length(); i++) { assignments[i] = i; } for (int j = 0; j < bboxes.Length(); j++) { float dist = 1e38f; int closest = -1; for (int i = 0; i < bboxes.Length(); i++) { // j should overlap i in the x direction if (bboxes.At1d(j).x1 < bboxes.At1d(i).x0) { continue; } if (bboxes.At1d(j).x0 > bboxes.At1d(i).x1) { continue; } // j should be above i if (!(bboxes.At1d(j).y0 >= bboxes.At1d(i).y1)) { continue; } #if false // j should be smaller than i if (!(bboxes.At1d(j).area() < bboxes.At1d(i).area())) { continue; } #endif float d = Math.Abs((bboxes[j].x0 + bboxes[j].x1) / 2 - (bboxes[i].x0 + bboxes[i].x1) / 2); if (d >= dist) { continue; } dist = d; closest = i; } if (closest < 0) { continue; } assignments[j] = closest; } for (int i = 0; i < segmentation.Length(); i++) { segmentation.Put1d(i, assignments[segmentation.At1d(i)]); } ImgLabels.renumber_labels(segmentation, 1); }
public static void line_segmentation_merge_small_components(ref Intarray segmentation, int r = 10) { if (NarrayUtil.Max(segmentation) > 100000) { throw new Exception("line_segmentation_merge_small_components: to many segments"); } make_line_segmentation_black(segmentation); Narray <Rect> bboxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref bboxes, segmentation); bboxes[0] = Rect.CreateEmpty(); bool changed; do { changed = false; for (int i = 1; i < bboxes.Length(); i++) { Rect b = bboxes[i]; if (b.Empty()) { continue; } if (b.Width() >= r || b.Height() >= r) { continue; } // merge small components only with touching components int closest = 0; Rect b1 = b.Grow(1); b1.Intersect(new Rect(0, 0, segmentation.Dim(0), segmentation.Dim(1))); for (int x = b1.x0; x < b1.x1; x++) { for (int y = b1.y0; y < b1.y1; y++) { int value = segmentation[x, y]; if (value == 0) { continue; } if (value == i) { continue; } closest = value; break; } } if (closest == 0) { continue; } for (int x = b.x0; x < b.x1; x++) { for (int y = b.y0; y < b.y1; y++) { if (segmentation[x, y] == i) { segmentation[x, y] = closest; } } } bboxes[i] = Rect.CreateEmpty(); changed = true; } } while (changed); }