public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray image = outarrays.Push(new Floatarray()); image.Copy(inarray); }
public override void Arcs(Intarray out_inputs, Intarray out_targets, Intarray out_outputs, Floatarray out_costs, int from) { out_inputs.Copy(m_inputs[from]); out_targets.Copy(m_targets[from]); out_outputs.Copy(m_outputs[from]); out_costs.Copy(m_costs[from]); }
/// <summary> /// If it's initialized with an array, the result vector /// is copied into that array when the vector gets destroyed. /// This allows calls like classifier.Outputs(v,x); with /// floatarray v. /// </summary> public OutputVector(Floatarray v) : this() { _result = new Floatarray(); _result.Copy(v); v.Clear(); }
public override void Binarize(Bytearray outa, Bytearray ina_) { fraction = (float)PGetf("f"); Floatarray ina = new Floatarray(); ina.Copy(ina_); binarize_by_range(outa, ina, fraction); }
public override float OutputsDense(Floatarray result, Floatarray x_raw) { CHECK_ARG(x_raw.Length() == w1.Dim(1), "x_raw.Length() == w1.Dim(1)"); Floatarray z = new Floatarray(); int sparse = PGeti("sparse"); Floatarray y = new Floatarray(); Floatarray x = new Floatarray(); x.Copy(x_raw); mvmul0(y, w1, x); y += b1; for (int i = 0; i < y.Length(); i++) { y[i] = sigmoid(y[i]); } if (sparse > 0) { ClassifierUtil.Sparsify(y, sparse); } mvmul0(z, w2, y); z += b2; for (int i = 0; i < z.Length(); i++) { z[i] = sigmoid(z[i]); } result.Copy(z); //int idx = NarrayUtil.ArgMax(result); //float val = NarrayUtil.Max(result); return(Convert.ToSingle(Math.Abs(NarrayUtil.Sum(z) - 1.0))); }
public virtual void Extract(Bytearray outa, Bytearray ina) { Floatarray fina = new Floatarray(); Floatarray fouta = new Floatarray(); fina.Copy(ina); Extract(fouta, fina); outa.Copy(fouta); }
public float Outputs(Floatarray p, Floatarray x) { OutputVector ov = new OutputVector(); float cost = XOutputs(ov, x); p.Clear(); p.Copy(ov.AsArray()); return(cost); }
public static void a_star_backwards(Floatarray costs_for_all_nodes, IGenericFst fst) { IGenericFst reverse = FstFactory.MakeOcroFST(); FstUtil.fst_copy_reverse(reverse, fst, true); // creates an extra vertex AStarSearch a = new AStarSearch(reverse); a.Loop(); costs_for_all_nodes.Copy(a.g); costs_for_all_nodes.Pop(); // remove the extra vertex }
public static void binsmooth(Bytearray binary, Floatarray input, float sigma) { Floatarray smoothed = new Floatarray(); smoothed.Copy(input); smoothed -= NarrayUtil.Min(smoothed); smoothed /= NarrayUtil.Max(smoothed); if (sigma > 0) { Gauss.Gauss2d(smoothed, sigma, sigma); } binarize_with_threshold(binary, smoothed, 0.5f); }
private void DoTestRecognize(LenetClassifier classifier) { OutputVector ov = new OutputVector(); Floatarray v = new Floatarray(); Bytearray ba = new Bytearray(1, 1); ImgIo.read_image_gray(ba, testPngFileName); NarrayUtil.Sub(255, ba); v.Copy(ba); v /= 255.0; classifier.XOutputs(ov, v); Console.WriteLine("Featured output class '{0}', score '{1}'", (char)ov.Key(ov.BestIndex), ov.Value(ov.BestIndex)); }
public static double Perplexity(Floatarray weights) { Floatarray w = new Floatarray(); w.Copy(weights); w /= NarrayUtil.Sum(w); double total = 0.0; for (int i = 0; i < w.Length(); i++) { float value = w[i]; total += value * Math.Log(value); } return(Math.Exp(-total)); }
public void Copy(MlpClassifier other) { w1.Copy(other.w1); b1.Copy(other.b1); w2.Copy(other.w2); b2.Copy(other.b2); if (c2i.Length() < 1) { c2i.Copy(other.c2i); } if (i2c.Length() < 1) { i2c.Copy(other.i2c); } }
/// <summary> /// Propagate labels across the entire image from a set of non-zero seeds. /// </summary> public static void propagate_labels(ref Intarray image) { Floatarray dist = new Floatarray(); Narray <Point> source = new Narray <Point>(); dist.Copy(image); BrushFire.brushfire_2(ref dist, ref source, 1000000); for (int i = 0; i < dist.Length1d(); i++) { Point p = source.At1d(i); if (image.At1d(i) == 0) { image.Put1d(i, image[p.X, p.Y]); } } }
public static void propagate_labels_to(ref Intarray target, Intarray seed) { Floatarray dist = new Floatarray(); Narray <Point> source = new Narray <Point>(); dist.Copy(seed); BrushFire.brushfire_2(ref dist, ref source, 1000000); for (int i = 0; i < dist.Length1d(); i++) { Point p = source.At1d(i); if (target.At1d(i) > 0) { target.Put1d(i, seed[p.X, p.Y]); } } }
public static void weighted_sample(Intarray samples, Floatarray weights, int n) { Floatarray cs = new Floatarray(); cs.Copy(weights); for (int i = 1; i < cs.Length(); i++) { cs[i] += cs[i - 1]; } cs /= NarrayUtil.Max(cs); samples.Clear(); for (int i = 0; i < n; i++) { float value = (float)DRandomizer.Default.drand(); int where = Binsearch(cs, value); samples.Push(where); } }
public void Copy(Floatarray v, float eps = 1e-11f) { Clear(); int n = v.Length(); for (int i = 0; i < n; i++) { float value = v.At1d(i); if (Math.Abs(value) >= eps) { _keys.Push(i); _values.Push(value); } } _len = v.Length(); _keys.Resize(_len); for (int i = 0; i < _len; i++) { _keys.Put1d(i, i); } _values.Copy(v); }
public override void Extract(Narray<Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i - 1, j]; a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) xgrad[i, j] = Math.Max(xgrad[i, j], 0f); else xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) delta = 0f; else delta = smoothed[i, j] - smoothed[i, j - 1]; a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) ygrad[i, j] = Math.Max(ygrad[i, j], 0f); else ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for(int i=0; i<n; i++) { sigma = step * i; if(bs > 0) OcrRoutine.binsmooth(binary, input, sigma); else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }
public static void binsmooth(Bytearray binary, Floatarray input, float sigma) { Floatarray smoothed = new Floatarray(); smoothed.Copy(input); smoothed -= NarrayUtil.Min(smoothed); smoothed /= NarrayUtil.Max(smoothed); if (sigma > 0) Gauss.Gauss2d(smoothed, sigma, sigma); binarize_with_threshold(binary, smoothed, 0.5f); }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return(false); } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return(false); } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } for (int i = 0; i < transcript.Length; i++) { CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); } // compute correspondences between actual segmentation and // ground truth segmentation Narray <Intarray> segments = new Narray <Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) { junk++; } // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) { classifier.Object.XAdd(v, c); } } if (c != reject_class) { IncClass(c); } ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return(true); }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) { NarrayUtil.Sub(NarrayUtil.Max(image), image); } segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) { Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); } use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else { p.Values.Fill(0.0f); } } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) { continue; } if (j == reject_class) { continue; } float value = p.Values[index]; if (value <= 0.0f) { continue; } if (value < minprob) { continue; } float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return(rate); }
public float Outputs(Floatarray p, Floatarray x) { OutputVector ov = new OutputVector(); float cost = XOutputs(ov, x); p.Clear(); p.Copy(ov.AsArray()); return cost; }
/// <summary> /// Train on a text line, given a segmentation. /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that /// it takes the "ground truth" line segmentation.</remarks> /// </summary> public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr) { Bytearray image = new Bytearray(); image.Copy(image_grayscale); if (String.IsNullOrEmpty(tr)) { Global.Debugf("error", "input transcript is empty"); return false; } if (image.Dim(0) < PGeti("minheight")) { Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) > PGeti("maxheight")) { Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect")) { Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1)); return false; } CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1), "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)"); bool use_reject = PGetb("use_reject") && !DisableJunk; // check and set the transcript transcript = tr; SetLine(image_grayscale); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); for (int i = 0; i < transcript.Length; i++) CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32"); // compute correspondences between actual segmentation and // ground truth segmentation Narray<Intarray> segments = new Narray<Intarray>(); GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg); // now iterate through all the hypothesis segments and // train the classifier with them int total = 0; int junk = 0; for (int i = 0; i < grouper.Object.Length(); i++) { Intarray segs = new Intarray(); grouper.Object.GetSegments(segs, i); // see whether this is a ground truth segment int match = -1; for (int j = 0; j < segments.Length(); j++) { if (GrouperRoutine.Equals(segments[j], segs)) { match = j; break; } } match -= 1; // segments are numbered starting at 1 int c = reject_class; if (match >= 0) { if (match >= transcript.Length) { Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript); continue; } else { c = (int)transcript[match]; Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c); } } if (c == reject_class) junk++; // extract the character and add it to the classifier Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0; Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1)); total++; if (use_reject) { classifier.Object.XAdd(v, c); } else { if (c != reject_class) classifier.Object.XAdd(v, c); } if (c != reject_class) IncClass(c); ntrained++; } Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk); return true; }
public override float OutputsDense(Floatarray result, Floatarray x_raw) { CHECK_ARG(x_raw.Length() == w1.Dim(1), "x_raw.Length() == w1.Dim(1)"); Floatarray z = new Floatarray(); int sparse = PGeti("sparse"); Floatarray y = new Floatarray(); Floatarray x = new Floatarray(); x.Copy(x_raw); mvmul0(y, w1, x); y += b1; for (int i = 0; i < y.Length(); i++) y[i] = sigmoid(y[i]); if (sparse > 0) ClassifierUtil.Sparsify(y, sparse); mvmul0(z, w2, y); z += b2; for (int i = 0; i < z.Length(); i++) z[i] = sigmoid(z[i]); result.Copy(z); //int idx = NarrayUtil.ArgMax(result); //float val = NarrayUtil.Max(result); return Convert.ToSingle(Math.Abs(NarrayUtil.Sum(z) - 1.0)); }
/// <summary> /// Propagate labels across the entire image from a set of non-zero seeds. /// </summary> public static void propagate_labels(ref Intarray image) { Floatarray dist = new Floatarray(); Narray<Point> source = new Narray<Point>(); dist.Copy(image); BrushFire.brushfire_2(ref dist, ref source, 1000000); for (int i = 0; i < dist.Length1d(); i++) { Point p = source.At1d(i); if (image.At1d(i) == 0) image.Put1d(i, image[p.X, p.Y]); } }
public static void propagate_labels_to(ref Intarray target, Intarray seed) { Floatarray dist = new Floatarray(); Narray<Point> source = new Narray<Point>(); dist.Copy(seed); BrushFire.brushfire_2(ref dist, ref source, 1000000); for (int i = 0; i < dist.Length1d(); i++) { Point p = source.At1d(i); if (target.At1d(i) > 0) target.Put1d(i, seed[p.X, p.Y]); } }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else p.Values.Fill(0.0f); } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) continue; if (j == reject_class) continue; float value = p.Values[index]; if (value <= 0.0f) continue; if (value < minprob) continue; float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return rate; }
public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray) { outarrays.Clear(); Floatarray input = new Floatarray(); input.Copy(inarray); int w = input.Dim(0), h = input.Dim(1); Floatarray a = new Floatarray(); // working array int csize = PGeti("csize"); // get rid of small components SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold")); // compute a thresholded version for morphological operations Bytearray thresholded = new Bytearray(); OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold")); // compute a smoothed version of the input for gradient computations float sigma = PGetf("gradsigma"); Floatarray smoothed = new Floatarray(); smoothed.Copy(input); Gauss.Gauss2d(smoothed, sigma, sigma); // x gradient a.Resize(w, h); for (int j = 0; j < h; j++) { for (int i = 0; i < w; i++) { float delta; if (i == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i - 1, j]; } a[i, j] = delta; } } Floatarray xgrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int j = 0; j < csize; j++) { for (int i = 0; i < csize; i++) { if (j % 2 == 0) { xgrad[i, j] = Math.Max(xgrad[i, j], 0f); } else { xgrad[i, j] = Math.Min(xgrad[i, j], 0f); } } } // y gradient a.Resize(w, h); for (int i = 0; i < w; i++) { for (int j = 0; j < h; j++) { float delta; if (j == 0) { delta = 0f; } else { delta = smoothed[i, j] - smoothed[i, j - 1]; } a[i, j] = delta; } } Floatarray ygrad = outarrays.Push(new Floatarray()); OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa")); for (int i = 0; i < csize; i++) { for (int j = 0; j < csize; j++) { if (i % 2 == 0) { ygrad[i, j] = Math.Max(ygrad[i, j], 0f); } else { ygrad[i, j] = Math.Min(ygrad[i, j], 0f); } } } // junctions, endpoints, and holes Floatarray junctions = new Floatarray(); Floatarray endpoints = new Floatarray(); Floatarray holes = new Floatarray(); Bytearray junctions1 = new Bytearray(); Bytearray endpoints1 = new Bytearray(); Bytearray holes1 = new Bytearray(); Bytearray dilated = new Bytearray(); Bytearray binary = new Bytearray(); junctions.MakeLike(input, 0f); endpoints.MakeLike(input, 0f); holes.MakeLike(input, 0f); int n = PGeti("n"); float step = PGetf("step"); int bs = PGeti("binsmooth"); for (int i = 0; i < n; i++) { sigma = step * i; if (bs > 0) { OcrRoutine.binsmooth(binary, input, sigma); } else { binary.Copy(thresholded); Morph.binary_dilate_circle(binary, (int)(sigma)); } OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f); NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1); junctions.Copy(junctions1); NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1); endpoints.Copy(endpoints1); SegmRoutine.extract_holes(ref holes1, binary); NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1); holes.Copy(holes1); } junctions *= 1.0f / (float)n; endpoints *= 1.0f / (float)n; holes *= 1.0f / (float)n; OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa")); OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa")); }