예제 #1
0
        public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray)
        {
            outarrays.Clear();
            Floatarray image = outarrays.Push(new Floatarray());

            image.Copy(inarray);
        }
예제 #2
0
 public override void Arcs(Intarray out_inputs, Intarray out_targets, Intarray out_outputs, Floatarray out_costs, int from)
 {
     out_inputs.Copy(m_inputs[from]);
     out_targets.Copy(m_targets[from]);
     out_outputs.Copy(m_outputs[from]);
     out_costs.Copy(m_costs[from]);
 }
예제 #3
0
 /// <summary>
 /// If it's initialized with an array, the result vector
 /// is copied into that array when the vector gets destroyed.
 /// This allows calls like classifier.Outputs(v,x); with
 /// floatarray v.
 /// </summary>
 public OutputVector(Floatarray v)
     : this()
 {
     _result = new Floatarray();
     _result.Copy(v);
     v.Clear();
 }
예제 #4
0
 public override void Binarize(Bytearray outa, Bytearray ina_)
 {
     fraction = (float)PGetf("f");
     Floatarray ina = new Floatarray();
     ina.Copy(ina_);
     binarize_by_range(outa, ina, fraction);
 }
예제 #5
0
 public override void Arcs(Intarray out_inputs, Intarray out_targets, Intarray out_outputs, Floatarray out_costs, int from)
 {
     out_inputs.Copy(m_inputs[from]);
     out_targets.Copy(m_targets[from]);
     out_outputs.Copy(m_outputs[from]);
     out_costs.Copy(m_costs[from]);
 }
예제 #6
0
        public override float OutputsDense(Floatarray result, Floatarray x_raw)
        {
            CHECK_ARG(x_raw.Length() == w1.Dim(1), "x_raw.Length() == w1.Dim(1)");
            Floatarray z      = new Floatarray();
            int        sparse = PGeti("sparse");
            Floatarray y      = new Floatarray();
            Floatarray x      = new Floatarray();

            x.Copy(x_raw);
            mvmul0(y, w1, x);
            y += b1;
            for (int i = 0; i < y.Length(); i++)
            {
                y[i] = sigmoid(y[i]);
            }
            if (sparse > 0)
            {
                ClassifierUtil.Sparsify(y, sparse);
            }
            mvmul0(z, w2, y);
            z += b2;
            for (int i = 0; i < z.Length(); i++)
            {
                z[i] = sigmoid(z[i]);
            }
            result.Copy(z);
            //int idx = NarrayUtil.ArgMax(result);
            //float val = NarrayUtil.Max(result);
            return(Convert.ToSingle(Math.Abs(NarrayUtil.Sum(z) - 1.0)));
        }
예제 #7
0
        public override void Binarize(Bytearray outa, Bytearray ina_)
        {
            fraction = (float)PGetf("f");
            Floatarray ina = new Floatarray();

            ina.Copy(ina_);
            binarize_by_range(outa, ina, fraction);
        }
예제 #8
0
 public virtual void Extract(Bytearray outa, Bytearray ina)
 {
     Floatarray fina = new Floatarray();
     Floatarray fouta = new Floatarray();
     fina.Copy(ina);
     Extract(fouta, fina);
     outa.Copy(fouta);
 }
예제 #9
0
        public float Outputs(Floatarray p, Floatarray x)
        {
            OutputVector ov   = new OutputVector();
            float        cost = XOutputs(ov, x);

            p.Clear();
            p.Copy(ov.AsArray());
            return(cost);
        }
예제 #10
0
파일: AStarUtil.cs 프로젝트: nickun/OCRonet
 public static void a_star_backwards(Floatarray costs_for_all_nodes, IGenericFst fst)
 {
     IGenericFst reverse = FstFactory.MakeOcroFST();
     FstUtil.fst_copy_reverse(reverse, fst, true); // creates an extra vertex
     AStarSearch a = new AStarSearch(reverse);
     a.Loop();
     costs_for_all_nodes.Copy(a.g);
     costs_for_all_nodes.Pop(); // remove the extra vertex
 }
예제 #11
0
        public virtual void Extract(Bytearray outa, Bytearray ina)
        {
            Floatarray fina  = new Floatarray();
            Floatarray fouta = new Floatarray();

            fina.Copy(ina);
            Extract(fouta, fina);
            outa.Copy(fouta);
        }
예제 #12
0
        public static void a_star_backwards(Floatarray costs_for_all_nodes, IGenericFst fst)
        {
            IGenericFst reverse = FstFactory.MakeOcroFST();

            FstUtil.fst_copy_reverse(reverse, fst, true); // creates an extra vertex
            AStarSearch a = new AStarSearch(reverse);

            a.Loop();
            costs_for_all_nodes.Copy(a.g);
            costs_for_all_nodes.Pop(); // remove the extra vertex
        }
예제 #13
0
        public static void binsmooth(Bytearray binary, Floatarray input, float sigma)
        {
            Floatarray smoothed = new Floatarray();

            smoothed.Copy(input);
            smoothed -= NarrayUtil.Min(smoothed);
            smoothed /= NarrayUtil.Max(smoothed);
            if (sigma > 0)
            {
                Gauss.Gauss2d(smoothed, sigma, sigma);
            }
            binarize_with_threshold(binary, smoothed, 0.5f);
        }
예제 #14
0
        private void DoTestRecognize(LenetClassifier classifier)
        {
            OutputVector ov = new OutputVector();
            Floatarray   v  = new Floatarray();
            Bytearray    ba = new Bytearray(1, 1);

            ImgIo.read_image_gray(ba, testPngFileName);
            NarrayUtil.Sub(255, ba);
            v.Copy(ba);
            v /= 255.0;
            classifier.XOutputs(ov, v);
            Console.WriteLine("Featured output class '{0}', score '{1}'", (char)ov.Key(ov.BestIndex), ov.Value(ov.BestIndex));
        }
예제 #15
0
        public static double Perplexity(Floatarray weights)
        {
            Floatarray w = new Floatarray();

            w.Copy(weights);
            w /= NarrayUtil.Sum(w);
            double total = 0.0;

            for (int i = 0; i < w.Length(); i++)
            {
                float value = w[i];
                total += value * Math.Log(value);
            }
            return(Math.Exp(-total));
        }
예제 #16
0
 public void Copy(MlpClassifier other)
 {
     w1.Copy(other.w1);
     b1.Copy(other.b1);
     w2.Copy(other.w2);
     b2.Copy(other.b2);
     if (c2i.Length() < 1)
     {
         c2i.Copy(other.c2i);
     }
     if (i2c.Length() < 1)
     {
         i2c.Copy(other.i2c);
     }
 }
예제 #17
0
        /// <summary>
        /// Propagate labels across the entire image from a set of non-zero seeds.
        /// </summary>
        public static void propagate_labels(ref Intarray image)
        {
            Floatarray     dist   = new Floatarray();
            Narray <Point> source = new Narray <Point>();

            dist.Copy(image);
            BrushFire.brushfire_2(ref dist, ref source, 1000000);
            for (int i = 0; i < dist.Length1d(); i++)
            {
                Point p = source.At1d(i);
                if (image.At1d(i) == 0)
                {
                    image.Put1d(i, image[p.X, p.Y]);
                }
            }
        }
예제 #18
0
        public static void propagate_labels_to(ref Intarray target, Intarray seed)
        {
            Floatarray     dist   = new Floatarray();
            Narray <Point> source = new Narray <Point>();

            dist.Copy(seed);
            BrushFire.brushfire_2(ref dist, ref source, 1000000);
            for (int i = 0; i < dist.Length1d(); i++)
            {
                Point p = source.At1d(i);
                if (target.At1d(i) > 0)
                {
                    target.Put1d(i, seed[p.X, p.Y]);
                }
            }
        }
예제 #19
0
        public static void weighted_sample(Intarray samples, Floatarray weights, int n)
        {
            Floatarray cs = new Floatarray();

            cs.Copy(weights);
            for (int i = 1; i < cs.Length(); i++)
            {
                cs[i] += cs[i - 1];
            }
            cs /= NarrayUtil.Max(cs);
            samples.Clear();
            for (int i = 0; i < n; i++)
            {
                float value = (float)DRandomizer.Default.drand();
                int where = Binsearch(cs, value);
                samples.Push(where);
            }
        }
예제 #20
0
        public void Copy(Floatarray v, float eps = 1e-11f)
        {
            Clear();
            int n = v.Length();

            for (int i = 0; i < n; i++)
            {
                float value = v.At1d(i);
                if (Math.Abs(value) >= eps)
                {
                    _keys.Push(i);
                    _values.Push(value);
                }
            }
            _len = v.Length();
            _keys.Resize(_len);
            for (int i = 0; i < _len; i++)
            {
                _keys.Put1d(i, i);
            }
            _values.Copy(v);
        }
예제 #21
0
        public override void Extract(Narray<Floatarray> outarrays, Floatarray inarray)
        {
            outarrays.Clear();
            Floatarray input = new Floatarray();
            input.Copy(inarray);
            int w = input.Dim(0), h = input.Dim(1);
            Floatarray a = new Floatarray();            // working array
            int csize = PGeti("csize");

            // get rid of small components
            SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold"));

            // compute a thresholded version for morphological operations
            Bytearray thresholded = new Bytearray();
            OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold"));

            // compute a smoothed version of the input for gradient computations
            float sigma = PGetf("gradsigma");
            Floatarray smoothed = new Floatarray();
            smoothed.Copy(input);
            Gauss.Gauss2d(smoothed, sigma, sigma);

            // x gradient
            a.Resize(w, h);
            for (int j = 0; j < h; j++)
            {
                for (int i = 0; i < w; i++)
                {
                    float delta;
                    if (i == 0) delta = 0f;
                    else delta = smoothed[i, j] - smoothed[i - 1, j];
                    a[i, j] = delta;
                }
            }
            Floatarray xgrad = outarrays.Push(new Floatarray());
            OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa"));
            for (int j = 0; j < csize; j++)
            {
                for (int i = 0; i < csize; i++)
                {
                    if (j % 2 == 0) xgrad[i, j] = Math.Max(xgrad[i, j], 0f);
                    else xgrad[i, j] = Math.Min(xgrad[i, j], 0f);
                }
            }

            // y gradient
            a.Resize(w, h);
            for (int i = 0; i < w; i++)
            {
                for (int j = 0; j < h; j++)
                {
                    float delta;
                    if (j == 0) delta = 0f;
                    else delta = smoothed[i, j] - smoothed[i, j - 1];
                    a[i, j] = delta;
                }
            }
            Floatarray ygrad = outarrays.Push(new Floatarray());
            OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa"));
            for (int i = 0; i < csize; i++)
            {
                for (int j = 0; j < csize; j++)
                {
                    if (i % 2 == 0) ygrad[i, j] = Math.Max(ygrad[i, j], 0f);
                    else ygrad[i, j] = Math.Min(ygrad[i, j], 0f);
                }
            }

            // junctions, endpoints, and holes
            Floatarray junctions = new Floatarray();
            Floatarray endpoints = new Floatarray();
            Floatarray holes = new Floatarray();
            Bytearray junctions1 = new Bytearray();
            Bytearray endpoints1 = new Bytearray();
            Bytearray holes1 = new Bytearray();
            Bytearray dilated = new Bytearray();
            Bytearray binary = new Bytearray();
            junctions.MakeLike(input, 0f);
            endpoints.MakeLike(input, 0f);
            holes.MakeLike(input, 0f);
            int n = PGeti("n");
            float step = PGetf("step");
            int bs = PGeti("binsmooth");
            for(int i=0; i<n; i++)
            {
                sigma = step * i;
                if(bs > 0)
                    OcrRoutine.binsmooth(binary, input, sigma);
                else
                {
                    binary.Copy(thresholded);
                    Morph.binary_dilate_circle(binary, (int)(sigma));
                }
                OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f);
                NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1);
                junctions.Copy(junctions1);
                NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1);
                endpoints.Copy(endpoints1);
                SegmRoutine.extract_holes(ref holes1, binary);
                NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1);
                holes.Copy(holes1);
            }
            junctions *= 1.0f / (float)n;
            endpoints *= 1.0f / (float)n;
            holes *= 1.0f / (float)n;

            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa"));
            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa"));
            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa"));
        }
예제 #22
0
 /// <summary>
 /// If it's initialized with an array, the result vector
 /// is copied into that array when the vector gets destroyed.
 /// This allows calls like classifier.Outputs(v,x); with
 /// floatarray v.
 /// </summary>
 public OutputVector(Floatarray v) : this()
 {
     _result = new Floatarray();
     _result.Copy(v);
     v.Clear();
 }
예제 #23
0
 public static void binsmooth(Bytearray binary, Floatarray input, float sigma)
 {
     Floatarray smoothed = new Floatarray();
     smoothed.Copy(input);
     smoothed -= NarrayUtil.Min(smoothed);
     smoothed /= NarrayUtil.Max(smoothed);
     if (sigma > 0)
         Gauss.Gauss2d(smoothed, sigma, sigma);
     binarize_with_threshold(binary, smoothed, 0.5f);
 }
예제 #24
0
 private void DoTestRecognize(LenetClassifier classifier)
 {
     OutputVector ov = new OutputVector();
     Floatarray v = new Floatarray();
     Bytearray ba = new Bytearray(1, 1);
     ImgIo.read_image_gray(ba, testPngFileName);
     NarrayUtil.Sub(255, ba);
     v.Copy(ba);
     v /= 255.0;
     classifier.XOutputs(ov, v);
     Console.WriteLine("Featured output class '{0}', score '{1}'", (char)ov.Key(ov.BestIndex), ov.Value(ov.BestIndex));
 }
예제 #25
0
        /// <summary>
        /// Train on a text line, given a segmentation.
        /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that
        /// it takes the "ground truth" line segmentation.</remarks>
        /// </summary>
        public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr)
        {
            Bytearray image = new Bytearray();

            image.Copy(image_grayscale);
            if (String.IsNullOrEmpty(tr))
            {
                Global.Debugf("error", "input transcript is empty");
                return(false);
            }
            if (image.Dim(0) < PGeti("minheight"))
            {
                Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1));
                return(false);
            }
            if (image.Dim(1) > PGeti("maxheight"))
            {
                Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1));
                return(false);
            }
            if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect"))
            {
                Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1));
                return(false);
            }
            CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1),
                      "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)");

            bool use_reject = PGetb("use_reject") && !DisableJunk;

            // check and set the transcript
            transcript = tr;
            SetLine(image_grayscale);
            if (PGeti("invert") > 0)
            {
                NarrayUtil.Sub(NarrayUtil.Max(image), image);
            }
            for (int i = 0; i < transcript.Length; i++)
            {
                CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32");
            }

            // compute correspondences between actual segmentation and
            // ground truth segmentation
            Narray <Intarray> segments = new Narray <Intarray>();

            GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg);

            // now iterate through all the hypothesis segments and
            // train the classifier with them
            int total = 0;
            int junk  = 0;

            for (int i = 0; i < grouper.Object.Length(); i++)
            {
                Intarray segs = new Intarray();
                grouper.Object.GetSegments(segs, i);

                // see whether this is a ground truth segment
                int match = -1;
                for (int j = 0; j < segments.Length(); j++)
                {
                    if (GrouperRoutine.Equals(segments[j], segs))
                    {
                        match = j;
                        break;
                    }
                }
                match -= 1;         // segments are numbered starting at 1
                int c = reject_class;
                if (match >= 0)
                {
                    if (match >= transcript.Length)
                    {
                        Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript);
                        continue;
                    }
                    else
                    {
                        c = (int)transcript[match];
                        Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c);
                    }
                }

                if (c == reject_class)
                {
                    junk++;
                }

                // extract the character and add it to the classifier
                Rect      b;
                Bytearray mask = new Bytearray();
                grouper.Object.GetMask(out b, ref mask, i, 0);
                Bytearray cv = new Bytearray();
                grouper.Object.ExtractWithMask(cv, mask, image, i, 0);
                Floatarray v = new Floatarray();
                v.Copy(cv);
                v /= 255.0;
                Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1));
                total++;
                if (use_reject)
                {
                    classifier.Object.XAdd(v, c);
                }
                else
                {
                    if (c != reject_class)
                    {
                        classifier.Object.XAdd(v, c);
                    }
                }
                if (c != reject_class)
                {
                    IncClass(c);
                }
                ntrained++;
            }
            Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk);
            return(true);
        }
예제 #26
0
        /// <summary>
        /// This is a weird, optional method that exposes character segmentation
        /// for those line recognizers that have it segmentation contains colored pixels,
        /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> *
        /// means that pixels with color 1 and 2 together form the letter "a"
        /// </summary>
        public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_)
        {
            double rate = 0.0;

            CHECK_ARG(image_.Dim(1) < PGeti("maxheight"),
                      String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1)));
            CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"),
                      String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1)));
            bool use_reject = PGetb("use_reject") && !DisableJunk;
            //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_));
            Bytearray image = new Bytearray();

            image.Copy(image_);

            SetLine(image_);

            if (PGeti("invert") > 0)
            {
                NarrayUtil.Sub(NarrayUtil.Max(image), image);
            }
            segmentation_.Copy(segmentation);
            Bytearray    available   = new Bytearray();
            Floatarray   cp          = new Floatarray();
            Floatarray   ccosts      = new Floatarray();
            Floatarray   props       = new Floatarray();
            OutputVector p           = new OutputVector();
            int          ncomponents = grouper.Object.Length();
            int          minclass    = PGeti("minclass");
            float        minprob     = PGetf("minprob");
            float        space_yes   = PGetf("space_yes");
            float        space_no    = PGetf("space_no");
            float        maxcost     = PGetf("maxcost");

            // compute priors if possible; fall back on
            // using no priors if no counts are available
            Floatarray priors     = new Floatarray();
            bool       use_priors = PGeti("use_priors") > 0;

            if (use_priors)
            {
                if (counts.Length() > 0)
                {
                    priors.Copy(counts);
                    priors /= NarrayUtil.Sum(priors);
                }
                else
                {
                    if (!counts_warned)
                    {
                        Global.Debugf("warn", "use_priors specified but priors unavailable (old model)");
                    }
                    use_priors    = false;
                    counts_warned = true;
                }
            }

            EstimateSpaceSize();

            for (int i = 0; i < ncomponents; i++)
            {
                Rect      b;
                Bytearray mask = new Bytearray();
                grouper.Object.GetMask(out b, ref mask, i, 0);
                Bytearray cv = new Bytearray();
                grouper.Object.ExtractWithMask(cv, mask, image, i, 0);
                //ImgIo.write_image_gray("extrmask_image.png", cv);
                Floatarray v = new Floatarray();
                v.Copy(cv);
                v /= 255.0f;
                float ccost = classifier.Object.XOutputs(p, v);
                if (use_reject && classifier.Object.HigherOutputIsBetter)
                {
                    ccost = 0;
                    float total = p.Sum();
                    if (total > 1e-11f)
                    {
                        //p /= total;
                    }
                    else
                    {
                        p.Values.Fill(0.0f);
                    }
                }
                int count = 0;

                Global.Debugf("dcost", "output {0}", p.Keys.Length());
                for (int index = 0; index < p.Keys.Length(); index++)
                {
                    int j = p.Keys[index];
                    if (j < minclass)
                    {
                        continue;
                    }
                    if (j == reject_class)
                    {
                        continue;
                    }
                    float value = p.Values[index];
                    if (value <= 0.0f)
                    {
                        continue;
                    }
                    if (value < minprob)
                    {
                        continue;
                    }
                    float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value;
                    Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_'));
                    float total_cost = pcost + ccost;
                    if (total_cost < maxcost)
                    {
                        if (use_priors)
                        {
                            total_cost -= (float)-Math.Log(priors[j]);
                        }
                        grouper.Object.SetClass(i, j, total_cost);
                        count++;
                    }
                }
                Global.Debugf("dcost", "");

                if (count == 0)
                {
                    float xheight = 10.0f;
                    if (b.Height() < xheight / 2 && b.Width() < xheight / 2)
                    {
                        grouper.Object.SetClass(i, (int)'~', high_cost / 2);
                    }
                    else
                    {
                        grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost);
                    }
                }
                if (grouper.Object.PixelSpace(i) > space_threshold)
                {
                    Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i));
                    grouper.Object.SetSpaceCost(i, space_yes, space_no);
                }
            }

            grouper.Object.GetLattice(result);
            return(rate);
        }
예제 #27
0
파일: IModel.cs 프로젝트: nickun/OCRonet
 public float Outputs(Floatarray p, Floatarray x)
 {
     OutputVector ov = new OutputVector();
     float cost = XOutputs(ov, x);
     p.Clear();
     p.Copy(ov.AsArray());
     return cost;
 }
예제 #28
0
파일: Linerec.cs 프로젝트: nickun/OCRonet
        /// <summary>
        /// Train on a text line, given a segmentation.
        /// <remarks>This is analogous to addTrainingLine(bytearray,nustring) except that
        /// it takes the "ground truth" line segmentation.</remarks>
        /// </summary>
        public override bool AddTrainingLine(Intarray cseg, Bytearray image_grayscale, string tr)
        {
            Bytearray image = new Bytearray();
            image.Copy(image_grayscale);
            if (String.IsNullOrEmpty(tr))
            {
                Global.Debugf("error", "input transcript is empty");
                return false;
            }
            if (image.Dim(0) < PGeti("minheight"))
            {
                Global.Debugf("error", "input line too small ({0} x {1})", image.Dim(0), image.Dim(1));
                return false;
            }
            if (image.Dim(1) > PGeti("maxheight"))
            {
                Global.Debugf("error", "input line too high ({0} x {1})", image.Dim(0), image.Dim(1));
                return false;
            }
            if (image.Dim(1) * 1.0 / image.Dim(0) > PGetf("maxaspect"))
            {
                Global.Debugf("warn", "input line has bad aspect ratio ({0} x {1})", image.Dim(0), image.Dim(1));
                return false;
            }
            CHECK_ARG(image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1),
                "image.Dim(0) == cseg.Dim(0) && image.Dim(1) == cseg.Dim(1)");

            bool use_reject = PGetb("use_reject") && !DisableJunk;

            // check and set the transcript
            transcript = tr;
            SetLine(image_grayscale);
            if (PGeti("invert") > 0)
                NarrayUtil.Sub(NarrayUtil.Max(image), image);
            for (int i = 0; i < transcript.Length; i++)
                CHECK_ARG((int)transcript[i] >= 32, "(int)transcript[i] >= 32");

            // compute correspondences between actual segmentation and
            // ground truth segmentation
            Narray<Intarray> segments = new Narray<Intarray>();
            GrouperRoutine.segmentation_correspondences(segments, segmentation, cseg);

            // now iterate through all the hypothesis segments and
            // train the classifier with them
            int total = 0;
            int junk = 0;
            for (int i = 0; i < grouper.Object.Length(); i++)
            {
                Intarray segs = new Intarray();
                grouper.Object.GetSegments(segs, i);

                // see whether this is a ground truth segment
                int match = -1;
                for (int j = 0; j < segments.Length(); j++)
                {
                    if (GrouperRoutine.Equals(segments[j], segs))
                    {
                        match = j;
                        break;
                    }
                }
                match -= 1;         // segments are numbered starting at 1
                int c = reject_class;
                if (match >= 0)
                {
                    if (match >= transcript.Length)
                    {
                        Global.Debugf("error", "mismatch between transcript and cseg: {0}", transcript);
                        continue;
                    }
                    else
                    {
                        c = (int)transcript[match];
                        Global.Debugf("debugmismatch", "index {0} position {1} char {2} [{3}]", i, match, (char)c, c);
                    }
                }

                if (c == reject_class)
                    junk++;

                // extract the character and add it to the classifier
                Rect b;
                Bytearray mask = new Bytearray();
                grouper.Object.GetMask(out b, ref mask, i, 0);
                Bytearray cv = new Bytearray();
                grouper.Object.ExtractWithMask(cv, mask, image, i, 0);
                Floatarray v = new Floatarray();
                v.Copy(cv);
                v /= 255.0;
                Global.Debugf("cdim", "character dimensions ({0},{1})", v.Dim(0), v.Dim(1));
                total++;
                if (use_reject)
                {
                    classifier.Object.XAdd(v, c);
                }
                else
                {
                    if (c != reject_class)
                        classifier.Object.XAdd(v, c);
                }
                if (c != reject_class)
                    IncClass(c);
                ntrained++;
            }
            Global.Debugf("detail", "AddTrainingLine trained {0} chars, {1} junk", total - junk, junk);
            return true;
        }
예제 #29
0
 public override float OutputsDense(Floatarray result, Floatarray x_raw)
 {
     CHECK_ARG(x_raw.Length() == w1.Dim(1), "x_raw.Length() == w1.Dim(1)");
     Floatarray z = new Floatarray();
     int sparse = PGeti("sparse");
     Floatarray y = new Floatarray();
     Floatarray x = new Floatarray();
     x.Copy(x_raw);
     mvmul0(y, w1, x);
     y += b1;
     for (int i = 0; i < y.Length(); i++)
         y[i] = sigmoid(y[i]);
     if (sparse > 0)
         ClassifierUtil.Sparsify(y, sparse);
     mvmul0(z, w2, y);
     z += b2;
     for (int i = 0; i < z.Length(); i++)
         z[i] = sigmoid(z[i]);
     result.Copy(z);
     //int idx = NarrayUtil.ArgMax(result);
     //float val = NarrayUtil.Max(result);
     return Convert.ToSingle(Math.Abs(NarrayUtil.Sum(z) - 1.0));
 }
예제 #30
0
파일: ImgLabels.cs 프로젝트: nickun/OCRonet
 /// <summary>
 /// Propagate labels across the entire image from a set of non-zero seeds.
 /// </summary>
 public static void propagate_labels(ref Intarray image)
 {
     Floatarray dist = new Floatarray();
     Narray<Point> source = new Narray<Point>();
     dist.Copy(image);
     BrushFire.brushfire_2(ref dist, ref source, 1000000);
     for (int i = 0; i < dist.Length1d(); i++)
     {
         Point p = source.At1d(i);
         if (image.At1d(i) == 0) image.Put1d(i, image[p.X, p.Y]);
     }
 }
예제 #31
0
파일: ImgLabels.cs 프로젝트: nickun/OCRonet
 public static void propagate_labels_to(ref Intarray target, Intarray seed)
 {
     Floatarray dist = new Floatarray();
     Narray<Point> source = new Narray<Point>();
     dist.Copy(seed);
     BrushFire.brushfire_2(ref dist, ref source, 1000000);
     for (int i = 0; i < dist.Length1d(); i++)
     {
         Point p = source.At1d(i);
         if (target.At1d(i) > 0) target.Put1d(i, seed[p.X, p.Y]);
     }
 }
예제 #32
0
파일: Linerec.cs 프로젝트: nickun/OCRonet
        /// <summary>
        /// This is a weird, optional method that exposes character segmentation
        /// for those line recognizers that have it segmentation contains colored pixels,
        /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> *
        /// means that pixels with color 1 and 2 together form the letter "a"
        /// </summary>
        public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_)
        {
            double rate = 0.0;
            CHECK_ARG(image_.Dim(1) < PGeti("maxheight"),
                String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1)));
            CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"),
                String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1)));
            bool use_reject = PGetb("use_reject") && !DisableJunk;
            //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_));
            Bytearray image = new Bytearray();
            image.Copy(image_);

            SetLine(image_);

            if (PGeti("invert") > 0)
                NarrayUtil.Sub(NarrayUtil.Max(image), image);
            segmentation_.Copy(segmentation);
            Bytearray available = new Bytearray();
            Floatarray cp = new Floatarray();
            Floatarray ccosts = new Floatarray();
            Floatarray props = new Floatarray();
            OutputVector p = new OutputVector();
            int ncomponents = grouper.Object.Length();
            int minclass = PGeti("minclass");
            float minprob = PGetf("minprob");
            float space_yes = PGetf("space_yes");
            float space_no = PGetf("space_no");
            float maxcost = PGetf("maxcost");

            // compute priors if possible; fall back on
            // using no priors if no counts are available
            Floatarray priors = new Floatarray();
            bool use_priors = PGeti("use_priors") > 0;
            if (use_priors)
            {
                if (counts.Length() > 0)
                {
                    priors.Copy(counts);
                    priors /= NarrayUtil.Sum(priors);
                }
                else
                {
                    if (!counts_warned)
                        Global.Debugf("warn", "use_priors specified but priors unavailable (old model)");
                    use_priors = false;
                    counts_warned = true;
                }
            }

            EstimateSpaceSize();

            for (int i = 0; i < ncomponents; i++)
            {
                Rect b;
                Bytearray mask = new Bytearray();
                grouper.Object.GetMask(out b, ref mask, i, 0);
                Bytearray cv = new Bytearray();
                grouper.Object.ExtractWithMask(cv, mask, image, i, 0);
                //ImgIo.write_image_gray("extrmask_image.png", cv);
                Floatarray v = new Floatarray();
                v.Copy(cv);
                v /= 255.0f;
                float ccost = classifier.Object.XOutputs(p, v);
                if (use_reject && classifier.Object.HigherOutputIsBetter)
                {
                    ccost = 0;
                    float total = p.Sum();
                    if (total > 1e-11f)
                    {
                        //p /= total;
                    }
                    else
                        p.Values.Fill(0.0f);
                }
                int count = 0;

                Global.Debugf("dcost", "output {0}", p.Keys.Length());
                for (int index = 0; index < p.Keys.Length(); index++)
                {
                    int j = p.Keys[index];
                    if (j < minclass) continue;
                    if (j == reject_class) continue;
                    float value = p.Values[index];
                    if (value <= 0.0f) continue;
                    if (value < minprob) continue;
                    float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value;
                    Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_'));
                    float total_cost = pcost + ccost;
                    if (total_cost < maxcost)
                    {
                        if (use_priors)
                        {
                            total_cost -= (float)-Math.Log(priors[j]);
                        }
                        grouper.Object.SetClass(i, j, total_cost);
                        count++;
                    }
                }
                Global.Debugf("dcost", "");

                if (count == 0)
                {
                    float xheight = 10.0f;
                    if (b.Height() < xheight / 2 && b.Width() < xheight / 2)
                    {
                        grouper.Object.SetClass(i, (int)'~', high_cost / 2);
                    }
                    else
                    {
                        grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost);
                    }
                }
                if (grouper.Object.PixelSpace(i) > space_threshold)
                {
                    Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i));
                    grouper.Object.SetSpaceCost(i, space_yes, space_no);
                }
            }

            grouper.Object.GetLattice(result);
            return rate;
        }
예제 #33
0
        public override void Extract(Narray <Floatarray> outarrays, Floatarray inarray)
        {
            outarrays.Clear();
            Floatarray input = new Floatarray();

            input.Copy(inarray);
            int        w = input.Dim(0), h = input.Dim(1);
            Floatarray a     = new Floatarray();        // working array
            int        csize = PGeti("csize");

            // get rid of small components
            SegmRoutine.erase_small_components(input, PGetf("minsize"), PGetf("threshold"));

            // compute a thresholded version for morphological operations
            Bytearray thresholded = new Bytearray();

            OcrRoutine.threshold_frac(thresholded, input, PGetf("threshold"));

            // compute a smoothed version of the input for gradient computations
            float      sigma    = PGetf("gradsigma");
            Floatarray smoothed = new Floatarray();

            smoothed.Copy(input);
            Gauss.Gauss2d(smoothed, sigma, sigma);

            // x gradient
            a.Resize(w, h);
            for (int j = 0; j < h; j++)
            {
                for (int i = 0; i < w; i++)
                {
                    float delta;
                    if (i == 0)
                    {
                        delta = 0f;
                    }
                    else
                    {
                        delta = smoothed[i, j] - smoothed[i - 1, j];
                    }
                    a[i, j] = delta;
                }
            }
            Floatarray xgrad = outarrays.Push(new Floatarray());

            OcrRoutine.scale_to(xgrad, a, csize, PGetf("noupscale"), PGetf("aa"));
            for (int j = 0; j < csize; j++)
            {
                for (int i = 0; i < csize; i++)
                {
                    if (j % 2 == 0)
                    {
                        xgrad[i, j] = Math.Max(xgrad[i, j], 0f);
                    }
                    else
                    {
                        xgrad[i, j] = Math.Min(xgrad[i, j], 0f);
                    }
                }
            }

            // y gradient
            a.Resize(w, h);
            for (int i = 0; i < w; i++)
            {
                for (int j = 0; j < h; j++)
                {
                    float delta;
                    if (j == 0)
                    {
                        delta = 0f;
                    }
                    else
                    {
                        delta = smoothed[i, j] - smoothed[i, j - 1];
                    }
                    a[i, j] = delta;
                }
            }
            Floatarray ygrad = outarrays.Push(new Floatarray());

            OcrRoutine.scale_to(ygrad, a, csize, PGetf("noupscale"), PGetf("aa"));
            for (int i = 0; i < csize; i++)
            {
                for (int j = 0; j < csize; j++)
                {
                    if (i % 2 == 0)
                    {
                        ygrad[i, j] = Math.Max(ygrad[i, j], 0f);
                    }
                    else
                    {
                        ygrad[i, j] = Math.Min(ygrad[i, j], 0f);
                    }
                }
            }

            // junctions, endpoints, and holes
            Floatarray junctions  = new Floatarray();
            Floatarray endpoints  = new Floatarray();
            Floatarray holes      = new Floatarray();
            Bytearray  junctions1 = new Bytearray();
            Bytearray  endpoints1 = new Bytearray();
            Bytearray  holes1     = new Bytearray();
            Bytearray  dilated    = new Bytearray();
            Bytearray  binary     = new Bytearray();

            junctions.MakeLike(input, 0f);
            endpoints.MakeLike(input, 0f);
            holes.MakeLike(input, 0f);
            int   n    = PGeti("n");
            float step = PGetf("step");
            int   bs   = PGeti("binsmooth");

            for (int i = 0; i < n; i++)
            {
                sigma = step * i;
                if (bs > 0)
                {
                    OcrRoutine.binsmooth(binary, input, sigma);
                }
                else
                {
                    binary.Copy(thresholded);
                    Morph.binary_dilate_circle(binary, (int)(sigma));
                }
                OcrRoutine.skeletal_features(endpoints1, junctions1, binary, 0.0f, 0.0f);
                NarrayUtil.Greater(junctions1, (byte)0, (byte)0, (byte)1);
                junctions.Copy(junctions1);
                NarrayUtil.Greater(endpoints1, (byte)0, (byte)0, (byte)1);
                endpoints.Copy(endpoints1);
                SegmRoutine.extract_holes(ref holes1, binary);
                NarrayUtil.Greater(holes1, (byte)0, (byte)0, (byte)1);
                holes.Copy(holes1);
            }
            junctions *= 1.0f / (float)n;
            endpoints *= 1.0f / (float)n;
            holes     *= 1.0f / (float)n;

            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), junctions, csize, PGetf("noupscale"), PGetf("aa"));
            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), endpoints, csize, PGetf("noupscale"), PGetf("aa"));
            OcrRoutine.scale_to(outarrays.Push(new Floatarray()), holes, csize, PGetf("noupscale"), PGetf("aa"));
        }