/// <summary> /// Compute a classmap that maps a set of possibly sparse classes onto a dense /// list of new classes and vice versa /// </summary> public static void ClassMap(Intarray out_class_to_index, Intarray out_index_to_class, Intarray classes) { int nclasses = NarrayUtil.Max(classes) + 1; Intarray hist = new Intarray(nclasses); hist.Fill(0); for (int i = 0; i < classes.Length(); i++) { if (classes[i] == -1) continue; hist[classes[i]]++; } int count = 0; for (int i = 0; i < hist.Length(); i++) if (hist[i] > 0) count++; out_class_to_index.Resize(nclasses); out_class_to_index.Fill(-1); out_index_to_class.Resize(count); out_index_to_class.Fill(-1); int index = 0; for (int i = 0; i < hist.Length(); i++) { if (hist[i] > 0) { out_class_to_index[i] = index; out_index_to_class[index] = i; index++; } } CHECK_ARG(out_class_to_index.Length() == nclasses, "class_to_index.Length() == nclasses"); CHECK_ARG(out_index_to_class.Length() == NarrayUtil.Max(out_class_to_index) + 1, "index_to_class.Length() == Max(class_to_index)+1"); CHECK_ARG(out_index_to_class.Length() <= out_class_to_index.Length(), "index_to_class.Length() <= class_to_index.Length()"); }
Intarray heapback; // heap[heapback[node]] == node; -1 if not in the heap #endregion Fields #region Constructors /// <summary> /// Constructor. /// Create a heap storing node indices from 0 to n - 1. /// </summary> public Heap(int n) { heap = new Intarray(); heapback = new Intarray(n); heapback.Fill(-1); costs = new Floatarray(); }
/// <summary> /// Merge segments from start to end. /// </summary> /// <param name="cseg">Output</param> /// <param name="rseg">Input</param> /// <param name="start">start merge position</param> /// <param name="end">end merge position</param> public static void rseg_to_cseg(Intarray cseg, Intarray rseg, int start, int end) { int maxSegNum = NarrayUtil.Max(rseg); if (start > end) { throw new Exception("segmentation encoded in IDs looks seriously broken!"); } if (start > maxSegNum || end > maxSegNum) { throw new Exception("segmentation encoded in IDs doesn't fit!"); } Intarray map = new Intarray(maxSegNum + 1); map.Fill(0); int color = 1; for (int i = 1; i <= maxSegNum; i++) { map[i] = color; if (i < start || i >= end) { color++; } } cseg.MakeLike(rseg); for (int i = 0; i < cseg.Length1d(); i++) { cseg.Put1d(i, map[rseg.At1d(i)]); } }
Floatarray costs; // the cost of the node on the heap /// <summary> /// Constructor. /// Create a heap storing node indices from 0 to n - 1. /// </summary> public Heap(int n) { heap = new Intarray(); heapback = new Intarray(n); heapback.Fill(-1); costs = new Floatarray(); }
public static void rseg_to_cseg(Intarray cseg, Intarray rseg, Intarray ids) { Intarray map = new Intarray(NarrayUtil.Max(rseg) + 1); map.Fill(0); int color = 0; for (int i = 0; i < ids.Length(); i++) { if (ids[i] == 0) { continue; } color++; int start = ids[i] >> 16; int end = ids[i] & 0xFFFF; if (start > end) { throw new Exception("segmentation encoded in IDs looks seriously broken!"); } if (start >= map.Length() || end >= map.Length()) { throw new Exception("segmentation encoded in IDs doesn't fit!"); } for (int j = start; j <= end; j++) { map[j] = color; } } cseg.MakeLike(rseg); for (int i = 0; i < cseg.Length1d(); i++) { cseg.Put1d(i, map[rseg.At1d(i)]); } }
public static void segmentation_correspondences(Narray<Intarray> outsegments, Intarray seg, Intarray cseg) { if (NarrayUtil.Max(seg) >= 10000) throw new Exception("CHECK_ARG: (max(seg)<10000)"); if (NarrayUtil.Max(cseg) >= 10000) throw new Exception("CHECK_ARG: (max(cseg)<10000)"); int nseg = NarrayUtil.Max(seg) + 1; int ncseg = NarrayUtil.Max(cseg) + 1; Intarray overlaps = new Intarray(nseg, ncseg); overlaps.Fill(0); if (seg.Length() != cseg.Length()) throw new Exception("CHECK_ARG: (seg.Length()==cseg.Length())"); for (int i = 0; i < seg.Length(); i++) overlaps[seg.At1d(i), cseg.At1d(i)]++; outsegments.Clear(); outsegments.Resize(ncseg); for (int i = 0; i < nseg; i++) { int j = NarrayRowUtil.RowArgMax(overlaps, i); if (!(j >= 0 && j < ncseg)) throw new Exception("ASSERT: (j>=0 && j<ncseg)"); if (outsegments[j] == null) outsegments[j] = new Intarray(); outsegments[j].Push(i); } }
public void Clear() { _keys.Fill(0); _keys.Clear(); _values.Fill(0f); _values.Clear(); _len = 0; }
/// <summary> /// Remove segments from start to end. /// </summary> /// <param name="cseg">Output</param> /// <param name="rseg">Input</param> /// <param name="start">start remove position</param> /// <param name="end">end remove position</param> public static void rseg_to_cseg_remove(Intarray cseg, Intarray rseg, Bytearray outimg, Bytearray img, int start, int end) { int maxSegNum = NarrayUtil.Max(rseg); if (start > end) { throw new Exception("segmentation encoded in IDs looks seriously broken!"); } if (start > maxSegNum || end > maxSegNum) { throw new Exception("segmentation encoded in IDs doesn't fit!"); } if (rseg.Length1d() != img.Length1d()) { throw new Exception("rseg and img must have same a dimension!"); } Intarray map = new Intarray(maxSegNum + 1); map.Fill(0); int color = 1; for (int i = 1; i <= maxSegNum; i++) { map[i] = color; if (i < start || i > end) { color++; } else { map[i] = 0; } } cseg.MakeLike(rseg); outimg.Copy(img); for (int i = 0; i < cseg.Length1d(); i++) { int val = rseg.At1d(i); cseg.Put1d(i, map[val]); if (val > 0 && map[val] == 0) { outimg.Put1d(i, 255); } } }
/// <summary> /// Compute a classmap that maps a set of possibly sparse classes onto a dense /// list of new classes and vice versa /// </summary> public static void ClassMap(Intarray out_class_to_index, Intarray out_index_to_class, Intarray classes) { int nclasses = NarrayUtil.Max(classes) + 1; Intarray hist = new Intarray(nclasses); hist.Fill(0); for (int i = 0; i < classes.Length(); i++) { if (classes[i] == -1) { continue; } hist[classes[i]]++; } int count = 0; for (int i = 0; i < hist.Length(); i++) { if (hist[i] > 0) { count++; } } out_class_to_index.Resize(nclasses); out_class_to_index.Fill(-1); out_index_to_class.Resize(count); out_index_to_class.Fill(-1); int index = 0; for (int i = 0; i < hist.Length(); i++) { if (hist[i] > 0) { out_class_to_index[i] = index; out_index_to_class[index] = i; index++; } } CHECK_ARG(out_class_to_index.Length() == nclasses, "class_to_index.Length() == nclasses"); CHECK_ARG(out_index_to_class.Length() == NarrayUtil.Max(out_class_to_index) + 1, "index_to_class.Length() == Max(class_to_index)+1"); CHECK_ARG(out_index_to_class.Length() <= out_class_to_index.Length(), "index_to_class.Length() <= class_to_index.Length()"); }
public static void erase_small_components(Floatarray input, float mins = 0.2f, float thresh = 0.25f) { // compute a thresholded image for component labeling float threshold = thresh * NarrayUtil.Max(input); Intarray components = new Intarray(); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) { components[i] = (input[i] > threshold ? 1 : 0); } // compute the number of pixels in each component int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) { totals[components[i]] = totals[components[i]] + 1; } totals[0] = 0; int biggest = NarrayUtil.ArgMax(totals); // erase small components float minsize = mins * totals[biggest]; Bytearray keep = new Bytearray(n + 1); float background = NarrayUtil.Min(input); for (int i = 0; i < keep.Length(); i++) { keep[i] = (byte)(totals[i] > minsize ? 1 : 0); } for (int i = 0; i < input.Length(); i++) { if (keep[components[i]] == 0) { input[i] = background; } } }
public static void segmentation_correspondences(Narray <Intarray> outsegments, Intarray seg, Intarray cseg) { if (NarrayUtil.Max(seg) >= 10000) { throw new Exception("CHECK_ARG: (max(seg)<10000)"); } if (NarrayUtil.Max(cseg) >= 10000) { throw new Exception("CHECK_ARG: (max(cseg)<10000)"); } int nseg = NarrayUtil.Max(seg) + 1; int ncseg = NarrayUtil.Max(cseg) + 1; Intarray overlaps = new Intarray(nseg, ncseg); overlaps.Fill(0); if (seg.Length() != cseg.Length()) { throw new Exception("CHECK_ARG: (seg.Length()==cseg.Length())"); } for (int i = 0; i < seg.Length(); i++) { overlaps[seg.At1d(i), cseg.At1d(i)]++; } outsegments.Clear(); outsegments.Resize(ncseg); for (int i = 0; i < nseg; i++) { int j = NarrayRowUtil.RowArgMax(overlaps, i); if (!(j >= 0 && j < ncseg)) { throw new Exception("ASSERT: (j>=0 && j<ncseg)"); } if (outsegments[j] == null) { outsegments[j] = new Intarray(); } outsegments[j].Push(i); } }
public override void SetImage(Bytearray image) { dimage.Copy(image); int w = image.Dim(0), h = image.Dim(1); wimage.Resize(w, h); wimage.Fill(0); float s1 = 0.0f, sy = 0.0f; for (int i = 1; i < w; i++) { for (int j = 0; j < h; j++) { if (image[i, j] > 0) { s1++; sy += j; } if (image[i - 1, j] == 0 && image[i, j] > 0) { wimage[i, j] = boundary_weight; } else if (image[i, j] > 0) { wimage[i, j] = inside_weight; } else { wimage[i, j] = outside_weight; } } } where = (int)(sy / s1); for (int i = 0; i < dimage.Dim(0); i++) { dimage[i, where] = 0x008000; } }
public override void FindAllCuts() { int w = wimage.Dim(0), h = wimage.Dim(1); // initialize dimensions of cuts, costs etc cuts.Resize(w); cutcosts.Resize(w); costs.Resize(w, h); sources.Resize(w, h); costs.Fill(1000000000); for (int i = 0; i < w; i++) { costs[i, 0] = 0; } sources.Fill(-1); limit = where; direction = 1; Step(0, w, 0); for (int x = 0; x < w; x++) { cutcosts[x] = costs[x, where]; cuts[x] = new Narray <Point>(); cuts[x].Clear(); // bottom should probably be initialized with 2*where instead of // h, because where cannot be assumed to be h/2. In the most extreme // case, the cut could go through 2 pixels in each row Narray <Point> bottom = new Narray <Point>(); int i = x, j = where; while (j >= 0) { bottom.Push(new Point(i, j)); i = sources[i, j]; j--; } //cuts(x).resize(h); for (i = bottom.Length() - 1; i >= 0; i--) { cuts[x].Push(bottom[i]); } } costs.Fill(1000000000); for (int i = 0; i < w; i++) { costs[i, h - 1] = 0; } sources.Fill(-1); limit = where; direction = -1; Step(0, w, h - 1); for (int x = 0; x < w; x++) { cutcosts[x] += costs[x, where]; // top should probably be initialized with 2*(h-where) instead of // h, because where cannot be assumed to be h/2. In the most extreme // case, the cut could go through 2 pixels in each row Narray <Point> top = new Narray <Point>(); int i = x, j = where; while (j < h) { if (j > where) { top.Push(new Point(i, j)); } i = sources[i, j]; j++; } for (i = 0; i < top.Length(); i++) { cuts[x].Push(top[i]); } } // add costs for line "where" for (int x = 0; x < w; x++) { cutcosts[x] += wimage[x, where]; } }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray <Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) { continue; } seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) { if (segmentation.At1d(i) == 0) { seg.Put1d(i, 0); } } ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
public override void Charseg(ref Intarray segmentation, Bytearray inraw) { setParams(); //Logger.Default.Image("segmenting", inraw); int PADDING = 3; OcrRoutine.optional_check_background_is_lighter(inraw); Bytearray image = new Bytearray(); image.Copy(inraw); OcrRoutine.binarize_simple(image); OcrRoutine.Invert(image); SetImage(image); FindAllCuts(); FindBestCuts(); Intarray seg = new Intarray(); seg.MakeLike(image); seg.Fill(255); for (int r = 0; r < bestcuts.Length(); r++) { int w = seg.Dim(0); int c = bestcuts[r]; Narray<Point> cut = cuts[c]; for (int y = 0; y < image.Dim(1); y++) { for (int i = -1; i <= 1; i++) { int x = cut[y].X; if (x < 1 || x >= w - 1) continue; seg[x + i, y] = 0; } } } ImgLabels.label_components(ref seg); // dshowr(seg,"YY"); dwait(); segmentation.Copy(image); for (int i = 0; i < seg.Length1d(); i++) if (segmentation.At1d(i) == 0) seg.Put1d(i, 0); ImgLabels.propagate_labels_to(ref segmentation, seg); if (PGeti("component_segmentation") > 0) { Intarray ccseg = new Intarray(); ccseg.Copy(image); ImgLabels.label_components(ref ccseg); SegmRoutine.combine_segmentations(ref segmentation, ccseg); if (PGeti("fix_diacritics") > 0) { SegmRoutine.fix_diacritics(segmentation); } } #if false SegmRoutine.line_segmentation_merge_small_components(ref segmentation, small_merge_threshold); SegmRoutine.line_segmentation_sort_x(segmentation); #endif SegmRoutine.make_line_segmentation_white(segmentation); // set_line_number(segmentation, 1); //Logger.Default.Image("resulting segmentation", segmentation); }
protected void rescale(Floatarray v, Floatarray input) { if (input.Rank() != 2) { throw new Exception("CHECK_ARG: sub.Rank()==2"); } Floatarray sub = new Floatarray(); // find the largest connected component // and crop to its bounding box // (use a binary version of the character // to compute the bounding box) Intarray components = new Intarray(); float threshold = PGetf("threshold") * NarrayUtil.Max(input); Global.Debugf("biggestcc", "threshold {0}", threshold); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) { components[i] = (input[i] > threshold ? 1 : 0); } int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) { totals[components[i]]++; } totals[0] = 0; Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, components); int biggest = NarrayUtil.ArgMax(totals); Rect r = boxes[biggest]; int pad = (int)(PGetf("pad") + 0.5f); r.PadBy(pad, pad); Global.Debugf("biggestcc", "({0}) {1}[{2}] :: {3} {4} {5} {6}", n, biggest, totals[biggest], r.x0, r.y0, r.x1, r.y1); // now perform normal feature extraction // (use the original grayscale input) sub = input; ImgMisc.Crop(sub, r); int csize = PGeti("csize"); float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)csize; if (PGetf("noupscale") > 0 && s < 1.0f) { s = 1.0f; } float sig = s * PGetf("aa"); float dx = (csize * s - sub.Dim(0)) / 2f; float dy = (csize * s - sub.Dim(1)) / 2f; if (sig > 1e-3f) { Gauss.Gauss2d(sub, sig, sig); } v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) { continue; } if (y < 0 || y >= sub.Dim(1)) { continue; } float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } /*Global.Debugf("biggestcc", "{0} {1} ({2}) -> {3} {4} ({5})", * sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), * v.Dim(0), v.Dim(1), NarrayUtil.Max(v));*/ }
/// <summary> /// Output the segmentation into a segmentation graph. /// Construct a state for each of the segments, then /// add transitions between states (segments) /// from min(segments[i]) to max(segments[i])+1. /// </summary> public override void GetLattice(IGenericFst fst) { fst.Clear(); int final = NarrayUtil.Max(labels) + 1; Intarray states = new Intarray(final + 1); states.Fill(-1); for (int i = 1; i < states.Length(); i++) { states[i] = fst.NewState(); } fst.SetStart(states[1]); fst.SetAccept(states[final]); for (int i = 0; i < boxes.Length(); i++) { int start = NarrayUtil.Min(segments.At1d(i)); int end = NarrayUtil.Max(segments.At1d(i)); int id = (start << 16) + end; if (segments.At1d(i).Length() == 0) { id = 0; } float yes = spaces[i, 0]; float no = spaces[i, 1]; // if no space is set, assume no space is present if (yes == float.PositiveInfinity && no == float.PositiveInfinity) { no = 0.0f; } for (int j = 0; j < class_costs[i].Length(); j++) { float cost = class_costs[i][j]; string str = class_outputs[i][j]; int n = str.Length; int last = start; for (int k = 0; k < n; k++) { int c = (int)str[k]; if (k < n - 1) { // add intermediate states/transitions for all but the last character states.Push(fst.NewState()); fst.AddTransition(states[last], states.Last(), c, 0.0f, 0); last = states.Length() - 1; } else { // for the last character, handle the spaces as well if (no < 1000.0f) { // add the last character as a direct transition with no space fst.AddTransition(states[last], states[end + 1], c, cost + no, id); } if (yes < 1000.0f) { // insert another state to handle spaces states.Push(fst.NewState()); int space_state = states.Last(); fst.AddTransition(states[start], space_state, c, cost, id); fst.AddTransition(space_state, states[end + 1], (int)' ', yes, 0); } } } // for k } // for j } // for i }
protected void rescale(Floatarray v, Floatarray input) { if (input.Rank() != 2) throw new Exception("CHECK_ARG: sub.Rank()==2"); Floatarray sub = new Floatarray(); // find the largest connected component // and crop to its bounding box // (use a binary version of the character // to compute the bounding box) Intarray components = new Intarray(); float threshold = PGetf("threshold") * NarrayUtil.Max(input); Global.Debugf("biggestcc", "threshold {0}", threshold); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) components[i] = (input[i] > threshold ? 1 : 0); int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) totals[components[i]]++; totals[0] = 0; Narray<Rect> boxes = new Narray<Rect>(); ImgLabels.bounding_boxes(ref boxes, components); int biggest = NarrayUtil.ArgMax(totals); Rect r = boxes[biggest]; int pad = (int)(PGetf("pad") + 0.5f); r.PadBy(pad, pad); Global.Debugf("biggestcc", "({0}) {1}[{2}] :: {3} {4} {5} {6}", n, biggest, totals[biggest], r.x0, r.y0, r.x1, r.y1); // now perform normal feature extraction // (use the original grayscale input) sub = input; ImgMisc.Crop(sub, r); int csize = PGeti("csize"); float s = Math.Max(sub.Dim(0), sub.Dim(1))/(float)csize; if(PGetf("noupscale") > 0 && s < 1.0f) s = 1.0f; float sig = s * PGetf("aa"); float dx = (csize*s-sub.Dim(0))/2f; float dy = (csize*s-sub.Dim(1))/2f; if(sig > 1e-3f) Gauss.Gauss2d(sub, sig, sig); v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) continue; if (y < 0 || y >= sub.Dim(1)) continue; float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } /*Global.Debugf("biggestcc", "{0} {1} ({2}) -> {3} {4} ({5})", sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), v.Dim(0), v.Dim(1), NarrayUtil.Max(v));*/ }