public static void getd1(Floatarray image, Floatarray slice, int index) { slice.Resize(image.Dim(0)); for (int i = 0; i < image.Dim(0); i++) { slice.UnsafePut(i, image.UnsafeAt(i, index)); } }
public static void putd1 <T, S>(Floatarray image, Floatarray slice, int index) { if (!(slice.Rank() == 1 && slice.Dim(0) == image.Dim(0))) { throw new Exception("ASSERT: slice.Rank()==1 && slice.Dim(0)==image.Dim(1)"); } for (int i = 0; i < image.Dim(0); i++) { image.UnsafePut(i, index, slice.UnsafeAt(i)); } }
/// <summary> /// SGI compiler bug: can't make this a template function with /// an unused last argument for the template parameter /// </summary> public static void Go(Metric m, ref Floatarray distance, ref Narray<Point> source, float maxdist) { const float BIG = 1e38f; int w = distance.Dim(0); int h = distance.Dim(1); distance.Resize(w,h); source.Resize(w,h); Queue<Point> queue = new Queue<Point>(w*h); int i, j; for(i = 0; i < w; i++) for(j = 0; j < h; j++) { if(distance.At(i, j) > 0) { queue.Enqueue(new Point(i, j)); distance[i, j] = 0; source[i, j] = new Point(i, j); } else { distance[i, j] = BIG; source[i, j] = new Point(-1, -1); } } while(queue.Count != 0) { Point q = queue.Dequeue(); float d = m.metric(new Point(q.X - 1, q.Y), source.At(q.X, q.Y)); if(d <= maxdist && q.X > 0 && d < distance.At(q.X - 1, q.Y)) { queue.Enqueue(new Point(q.X - 1, q.Y)); source[q.X - 1, q.Y] = source.At(q.X, q.Y); distance[q.X - 1, q.Y] = d; } d = m.metric(new Point(q.X, q.Y - 1), source.At(q.X, q.Y)); if(d <= maxdist && q.Y > 0 && d < distance.At(q.X, q.Y - 1)) { queue.Enqueue(new Point(q.X, q.Y - 1)); source[q.X, q.Y - 1] = source.At(q.X, q.Y); distance[q.X, q.Y - 1] = d; } d = m.metric(new Point(q.X + 1, q.Y), source.At(q.X, q.Y)); if(d <= maxdist && q.X < w - 1 && d < distance.At(q.X + 1, q.Y)) { queue.Enqueue(new Point(q.X + 1, q.Y)); source[q.X + 1, q.Y] = source.At(q.X, q.Y); distance[q.X + 1, q.Y] = d; } d = m.metric(new Point(q.X, q.Y + 1), source.At(q.X, q.Y)); if(d <= maxdist && q.Y < h - 1 && d < distance.At(q.X, q.Y + 1)) { queue.Enqueue(new Point(q.X, q.Y + 1)); source[q.X, q.Y + 1] = source.At(q.X, q.Y); distance[q.X, q.Y + 1] = d; } } }
public override void Input(Floatarray v, int i) { v.Resize(data.Dim(1)); for (int j = 0; j < v.Dim(0); j++) { v.UnsafePut1d(j, data[i, j]); } }
public static void Gauss1d(Floatarray outa, Floatarray ina, float sigma) { outa.Resize(ina.Dim(0)); // make a normalized mask int range = 1 + (int)(3.0 * sigma); Floatarray mask = new Floatarray(2 * range + 1); for (int i = 0; i <= range; i++) { float y = (float)Math.Exp(-i * i / 2.0 / sigma / sigma); mask[range + i] = mask[range - i] = y; } float total = 0.0f; for (int i = 0; i < mask.Dim(0); i++) { total += mask[i]; } for (int i = 0; i < mask.Dim(0); i++) { mask[i] /= total; } // apply it int n = ina.Length(); for (int i = 0; i < n; i++) { total = 0.0f; for (int j = 0; j < mask.Dim(0); j++) { int index = i + j - range; if (index < 0) { index = 0; } if (index >= n) { index = n - 1; } total += ina[index] * mask[j]; // it's symmetric } outa[i] = total; } }
public override void Add(Floatarray ds, Intarray cs) { for (int i = 0; i < ds.Dim(0); i++) { RowGet(data.Push(new Narray <byte>()), ds, i); classes.Push(cs[i]); } Recompute(); }
public override void Info() { bool bak = Logger.Default.verbose; Logger.Default.verbose = true; Logger.Default.WriteLine("MLP"); PPrint(); Logger.Default.WriteLine(String.Format("nInput {0} nHidden {1} nOutput {2}", w1.Dim(1), w1.Dim(0), w2.Dim(0))); if (w1.Length() > 0 && w2.Length() > 0) { Logger.Default.WriteLine(String.Format("w1 [{0},{1}] b1 [{2},{3}]", NarrayUtil.Min(w1), NarrayUtil.Max(w1), NarrayUtil.Min(b1), NarrayUtil.Max(b1))); Logger.Default.WriteLine(String.Format("w2 [{0},{1}] b2 [{2},{3}]", NarrayUtil.Min(w2), NarrayUtil.Max(w2), NarrayUtil.Min(b2), NarrayUtil.Max(b2))); } Logger.Default.verbose = bak; }
public static void Gauss2d(Floatarray a, float sx, float sy) { Floatarray r = new Floatarray(); Floatarray s = new Floatarray(); for (int i = 0; i < a.Dim(0); i++) { ImgOps.getd0(a, r, i); Gauss1d(s, r, sy); ImgOps.putd0(a, s, i); } for (int j = 0; j < a.Dim(1); j++) { ImgOps.getd1(a, r, j); Gauss1d(s, r, sx); ImgOps.putd1(a, s, j); } }
protected static void vmmul0(Floatarray result, Floatarray v, Floatarray a) { int n = a.Dim(0); int m = a.Dim(1); CHECK_ARG(n == v.Length(), "n == v.Length()"); result.Resize(m); result.Fill(0f); for (int i = 0; i < n; i++) { float value = v.UnsafeAt(i);//v[i]; if (value == 0f) { continue; } for (int j = 0; j < m; j++) { result.UnsafePut(j, result.UnsafeAt(j) + (a.UnsafeAt(i, j) * value)); } } }
protected void rescale(Floatarray outv, Floatarray sub) { if (sub.Rank() != 2) throw new Exception("CHECK_ARG: sub.Rank()==2"); int csize = PGeti("csize"); int indent = PGeti("indent"); float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)(csize - indent - indent); if (PGeti("noupscale") > 0 && s < 1.0f) s = 1.0f; float sig = s * PGetf("aa"); float dx = (csize * s - sub.Dim(0)) / 2; float dy = (csize * s - sub.Dim(1)) / 2; if (sig > 1e-3f) Gauss.Gauss2d(sub, sig, sig); outv.Resize(csize, csize); outv.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) continue; if (y < 0 || y >= sub.Dim(1)) continue; float value = ImgOps.bilin(sub, x, y); outv[i, j] = value; } } /*Global.Debugf("fe", "{0} {1} ({2}) -> {3} {4} ({5})\n", sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), outv.Dim(0), outv.Dim(1), NarrayUtil.Max(outv));*/ }
protected static void outer_add(Floatarray a, Floatarray u, Floatarray v, float eps) { int n = a.Dim(0); int m = a.Dim(1); CHECK_ARG(n == u.Length(), "n == u.Length()"); CHECK_ARG(m == v.Length(), "m == v.Length()"); if (count_zeros(u) >= count_zeros(v)) { for (int i = 0; i < n; i++) { if (u.UnsafeAt(i) == 0) { continue; } for (int j = 0; j < m; j++) { a.UnsafePut(i, j, a.UnsafeAt(i, j) + (eps * u.UnsafeAt(i) * v.UnsafeAt(j))); } } } else { for (int j = 0; j < m; j++) { if (v.UnsafeAt(j) == 0) { continue; } for (int i = 0; i < n; i++) { a.UnsafePut(i, j, a.UnsafeAt(i, j) + (eps * u.UnsafeAt(i) * v.UnsafeAt(j))); } } } }
protected void rescale(Floatarray outv, Floatarray sub) { if (sub.Rank() != 2) { throw new Exception("CHECK_ARG: sub.Rank()==2"); } int csize = PGeti("csize"); int indent = PGeti("indent"); float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)(csize - indent - indent); if (PGeti("noupscale") > 0 && s < 1.0f) { s = 1.0f; } float sig = s * PGetf("aa"); float dx = (csize * s - sub.Dim(0)) / 2; float dy = (csize * s - sub.Dim(1)) / 2; if (sig > 1e-3f) { Gauss.Gauss2d(sub, sig, sig); } outv.Resize(csize, csize); outv.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) { continue; } if (y < 0 || y >= sub.Dim(1)) { continue; } float value = ImgOps.bilin(sub, x, y); outv[i, j] = value; } } /*Global.Debugf("fe", "{0} {1} ({2}) -> {3} {4} ({5})\n", * sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), * outv.Dim(0), outv.Dim(1), NarrayUtil.Max(outv));*/ }
public static void scale_to(Floatarray v, Floatarray sub, int csize, float noupscale = 1.0f, float aa = 1.0f) { // compute the scale factor float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)csize; // don't upscale if that's prohibited if (s < noupscale) { s = 1.0f; } // compute the offset to keep the input centered in the output float dx = (csize * s - sub.Dim(0)) / 2; float dy = (csize * s - sub.Dim(1)) / 2; // antialiasing via Gaussian convolution float sig = s * aa; if (sig > 1e-3f) { Gauss.Gauss2d(sub, sig, sig); } // now compute the output image via bilinear interpolation v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) { continue; } if (y < 0 || y >= sub.Dim(1)) { continue; } float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } }
public override void Input(Floatarray v, int i) { v.Resize(data.Dim(1)); for (int j = 0; j < v.Dim(0); j++) v.UnsafePut1d(j, data[i, j]); }
protected static void vmmul0(Floatarray result, Floatarray v, Floatarray a) { int n = a.Dim(0); int m = a.Dim(1); CHECK_ARG(n == v.Length(), "n == v.Length()"); result.Resize(m); result.Fill(0f); for (int i = 0; i < n; i++) { float value = v.UnsafeAt(i);//v[i]; if (value == 0f) continue; for (int j = 0; j < m; j++) result.UnsafePut(j, result.UnsafeAt(j) + (a.UnsafeAt(i, j) * value)); } }
public override void Extract(Narray<Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i - 1, j]; a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) xgrad[i, j] = Math.Max(xgrad[i, j], 0f); else xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i, j - 1]; a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) ygrad[i, j] = Math.Max(ygrad[i, j], 0f); else ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for(int i=0; i<n; i++) { sigma = step * i; if(bs > 0) OcrRoutine.binsmooth(binary, input, sigma); else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }
protected void rescale(Floatarray v, Floatarray input) { if (input.Rank() != 2) throw new Exception("CHECK_ARG: sub.Rank()==2"); Floatarray sub = new Floatarray(); // find the largest connected component // and crop to its bounding box // (use a binary version of the character // to compute the bounding box) Intarray components = new Intarray(); float threshold = PGetf("threshold") * NarrayUtil.Max(input); Global.Debugf("biggestcc", "threshold {0}", threshold); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) components[i] = (input[i] > threshold ? 1 : 0); int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) totals[components[i]]++; totals[0] = 0; Narray<Rect> boxes = new Narray<Rect>(); ImgLabels.bounding_boxes(ref boxes, components); int biggest = NarrayUtil.ArgMax(totals); Rect r = boxes[biggest]; int pad = (int)(PGetf("pad") + 0.5f); r.PadBy(pad, pad); Global.Debugf("biggestcc", "({0}) {1}[{2}] :: {3} {4} {5} {6}", n, biggest, totals[biggest], r.x0, r.y0, r.x1, r.y1); // now perform normal feature extraction // (use the original grayscale input) sub = input; ImgMisc.Crop(sub, r); int csize = PGeti("csize"); float s = Math.Max(sub.Dim(0), sub.Dim(1))/(float)csize; if(PGetf("noupscale") > 0 && s < 1.0f) s = 1.0f; float sig = s * PGetf("aa"); float dx = (csize*s-sub.Dim(0))/2f; float dy = (csize*s-sub.Dim(1))/2f; if(sig > 1e-3f) Gauss.Gauss2d(sub, sig, sig); v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) continue; if (y < 0 || y >= sub.Dim(1)) continue; float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } /*Global.Debugf("biggestcc", "{0} {1} ({2}) -> {3} {4} ({5})", sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), v.Dim(0), v.Dim(1), NarrayUtil.Max(v));*/ }
public override void Add(Floatarray ds, Intarray cs) { for (int i = 0; i < ds.Dim(0); i++) { RowGet(data.Push(new Narray<byte>()), ds, i); classes.Push(cs[i]); } Recompute(); }
protected void rescale(Floatarray v, Floatarray input) { if (input.Rank() != 2) { throw new Exception("CHECK_ARG: sub.Rank()==2"); } Floatarray sub = new Floatarray(); // find the largest connected component // and crop to its bounding box // (use a binary version of the character // to compute the bounding box) Intarray components = new Intarray(); float threshold = PGetf("threshold") * NarrayUtil.Max(input); Global.Debugf("biggestcc", "threshold {0}", threshold); components.MakeLike(input); components.Fill(0); for (int i = 0; i < components.Length(); i++) { components[i] = (input[i] > threshold ? 1 : 0); } int n = ImgLabels.label_components(ref components); Intarray totals = new Intarray(n + 1); totals.Fill(0); for (int i = 0; i < components.Length(); i++) { totals[components[i]]++; } totals[0] = 0; Narray <Rect> boxes = new Narray <Rect>(); ImgLabels.bounding_boxes(ref boxes, components); int biggest = NarrayUtil.ArgMax(totals); Rect r = boxes[biggest]; int pad = (int)(PGetf("pad") + 0.5f); r.PadBy(pad, pad); Global.Debugf("biggestcc", "({0}) {1}[{2}] :: {3} {4} {5} {6}", n, biggest, totals[biggest], r.x0, r.y0, r.x1, r.y1); // now perform normal feature extraction // (use the original grayscale input) sub = input; ImgMisc.Crop(sub, r); int csize = PGeti("csize"); float s = Math.Max(sub.Dim(0), sub.Dim(1)) / (float)csize; if (PGetf("noupscale") > 0 && s < 1.0f) { s = 1.0f; } float sig = s * PGetf("aa"); float dx = (csize * s - sub.Dim(0)) / 2f; float dy = (csize * s - sub.Dim(1)) / 2f; if (sig > 1e-3f) { Gauss.Gauss2d(sub, sig, sig); } v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) { continue; } if (y < 0 || y >= sub.Dim(1)) { continue; } float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } /*Global.Debugf("biggestcc", "{0} {1} ({2}) -> {3} {4} ({5})", * sub.Dim(0), sub.Dim(1), NarrayUtil.Max(sub), * v.Dim(0), v.Dim(1), NarrayUtil.Max(v));*/ }
public static void scale_to(Floatarray v, Floatarray sub, int csize, float noupscale=1.0f, float aa=1.0f) { // compute the scale factor float s = Math.Max(sub.Dim(0), sub.Dim(1))/(float)csize; // don't upscale if that's prohibited if(s < noupscale) s = 1.0f; // compute the offset to keep the input centered in the output float dx = (csize*s-sub.Dim(0))/2; float dy = (csize*s-sub.Dim(1))/2; // antialiasing via Gaussian convolution float sig = s * aa; if(sig > 1e-3f) Gauss.Gauss2d(sub, sig, sig); // now compute the output image via bilinear interpolation v.Resize(csize, csize); v.Fill(0f); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { float x = i * s - dx; float y = j * s - dy; if (x < 0 || x >= sub.Dim(0)) continue; if (y < 0 || y >= sub.Dim(1)) continue; float value = ImgOps.bilin(sub, x, y); v[i, j] = value; } } }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return(false); } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } for (int i = 0; i < transcript.Length; i++) { CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); } // compute correspondences between actual segmentation and // ground truth segmentation Narray <Intarray> segments = new Narray <Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) { junk++; } // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) { classifier.Object.XAdd(v, c); } } if (c != reject_class) { IncClass(c); } ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return(true); }
protected static void outer_add(Floatarray a, Floatarray u, Floatarray v, float eps) { int n = a.Dim(0); int m = a.Dim(1); CHECK_ARG(n == u.Length(), "n == u.Length()"); CHECK_ARG(m == v.Length(), "m == v.Length()"); if (count_zeros(u) >= count_zeros(v)) { for (int i = 0; i < n; i++) { if (u.UnsafeAt(i) == 0) continue; for (int j = 0; j < m; j++) { a.UnsafePut(i, j, a.UnsafeAt(i, j) + (eps * u.UnsafeAt(i) * v.UnsafeAt(j))); } } } else { for (int j = 0; j < m; j++) { if (v.UnsafeAt(j) == 0) continue; for (int i = 0; i < n; i++) { a.UnsafePut(i, j, a.UnsafeAt(i, j) + (eps * u.UnsafeAt(i) * v.UnsafeAt(j))); } } } }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return false; } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); for (int i = 0; i < transcript.Length; i++) CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); // compute correspondences between actual segmentation and // ground truth segmentation Narray<Intarray> segments = new Narray<Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) junk++; // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) classifier.Object.XAdd(v, c); } if (c != reject_class) IncClass(c); ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return true; }
/// <summary> /// SGI compiler bug: can't make this a template function with /// an unused last argument for the template parameter /// </summary> public static void Go(Metric m, ref Floatarray distance, ref Narray <Point> source, float maxdist) { const float BIG = 1e38f; int w = distance.Dim(0); int h = distance.Dim(1); distance.Resize(w, h); source.Resize(w, h); Queue <Point> queue = new Queue <Point>(w * h); int i, j; for (i = 0; i < w; i++) { for (j = 0; j < h; j++) { if (distance.At(i, j) > 0) { queue.Enqueue(new Point(i, j)); distance[i, j] = 0; source[i, j] = new Point(i, j); } else { distance[i, j] = BIG; source[i, j] = new Point(-1, -1); } } } while (queue.Count != 0) { Point q = queue.Dequeue(); float d = m.metric(new Point(q.X - 1, q.Y), source.At(q.X, q.Y)); if (d <= maxdist && q.X > 0 && d < distance.At(q.X - 1, q.Y)) { queue.Enqueue(new Point(q.X - 1, q.Y)); source[q.X - 1, q.Y] = source.At(q.X, q.Y); distance[q.X - 1, q.Y] = d; } d = m.metric(new Point(q.X, q.Y - 1), source.At(q.X, q.Y)); if (d <= maxdist && q.Y > 0 && d < distance.At(q.X, q.Y - 1)) { queue.Enqueue(new Point(q.X, q.Y - 1)); source[q.X, q.Y - 1] = source.At(q.X, q.Y); distance[q.X, q.Y - 1] = d; } d = m.metric(new Point(q.X + 1, q.Y), source.At(q.X, q.Y)); if (d <= maxdist && q.X < w - 1 && d < distance.At(q.X + 1, q.Y)) { queue.Enqueue(new Point(q.X + 1, q.Y)); source[q.X + 1, q.Y] = source.At(q.X, q.Y); distance[q.X + 1, q.Y] = d; } d = m.metric(new Point(q.X, q.Y + 1), source.At(q.X, q.Y)); if (d <= maxdist && q.Y < h - 1 && d < distance.At(q.X, q.Y + 1)) { queue.Enqueue(new Point(q.X, q.Y + 1)); source[q.X, q.Y + 1] = source.At(q.X, q.Y); distance[q.X, q.Y + 1] = d; } } }
protected void Step(int x0, int x1, int y) { int w = wimage.Dim(0), h = wimage.Dim(1); Queue <Point> queue = new Queue <Point>(w * h); for (int i = x0; i < x1; i++) { queue.Enqueue(new Point(i, y)); } int low = 1; int high = wimage.Dim(0) - 1; while (queue.Count > 0) { Point p = queue.Dequeue(); int i = p.X, j = p.Y; int cost = costs[i, j]; int ncost = (int)(cost + wimage[i, j] + down_cost); if (costs[i, j + direction] > ncost) { costs[i, j + direction] = ncost; sources[i, j + direction] = i; if (j + direction != limit) { queue.Enqueue(new Point(i, j + direction)); } } if (i > low) { if (wimage[i, j] == 0) { ncost = (int)(cost + wimage[i, j] + outside_diagonal_cost); } else //if(wimage[i, j] > 0) { ncost = (int)(cost + wimage[i, j] + inside_diagonal_cost); } //else if(wimage[i, j] < 0) // ncost = cost + wimage[i,j] + boundary_diagonal_cost;*/ if (costs[i - 1, j + direction] > ncost) { costs[i - 1, j + direction] = ncost; sources[i - 1, j + direction] = i; if (j + direction != limit) { queue.Enqueue(new Point(i - 1, j + direction)); } } } if (i < high) { if (wimage[i, j] == 0) { ncost = (int)(cost + wimage[i, j] + outside_diagonal_cost_r); } else //if(wimage[i, j] > 0) { ncost = (int)(cost + wimage[i, j] + inside_diagonal_cost); } //else if(wimage[i, j] < 0) // ncost = cost + wimage[i, j] + boundary_diagonal_cost; if (costs[i + 1, j + direction] > ncost) { costs[i + 1, j + direction] = ncost; sources[i + 1, j + direction] = i; if (j + direction != limit) { queue.Enqueue(new Point(i + 1, j + direction)); } } } } }
public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i - 1, j]; } a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) { xgrad[i, j] = Math.Max(xgrad[i, j], 0f); } else { xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i, j - 1]; } a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) { ygrad[i, j] = Math.Max(ygrad[i, j], 0f); } else { ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for (int i = 0; i < n; i++) { sigma = step * i; if (bs > 0) { OcrRoutine.binsmooth(binary, input, sigma); } else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }