コード例 #1
0
ファイル: BeamSearch.cs プロジェクト: liaoheping/OCRonet
        /// <summary>
        /// This looks at the transition from state pair
        /// (f1,f2) -> (t1,t2), withthe given cost.
        /// </summary>
        public void Relax(int f1, int f2,   // input state pair
                          int t1, int t2,   // output state pair
                          float cost,       // transition cost
                          int arc_id1,      // (unused)
                          int arc_id2,      // (unused)
                          int input,        // input label
                          int intermediate, // (unused)
                          int output,       // output label
                          float base_cost,  // cost of the path so far
                          int trail_index)
        {
            //logger.format("relaxing %d %d -> %d %d (bcost %f, cost %f)", f1, f2, t1, t2, base_cost, cost);

            if (!nbest.AddReplacingId(t1 * fst2.nStates() + t2,
                                      all_costs.Length(),
                                      -base_cost - cost))
            {
                return;
            }

            //logger.format("nbest changed");
            //nbest.log(logger);

            if (input > 0)
            {
                // The candidate for the next beam is stored in all_XX arrays.
                // (can we store it in the stree instead?)
                all_inputs.Push(input);
                all_targets1.Push(t1);
                all_targets2.Push(t2);
                all_outputs.Push(output);
                all_costs.Push(cost);
                parent_trails.Push(trail_index);
            }
            else
            {
                // Beam control hack
                // -----------------
                // if a node is important (changes nbest) AND its input is 0,
                // then it's added to the CURRENT beam.

                //logger.format("pushing control point from trail %d to %d, %d",
                //trail_index, t1, t2);
                int new_node = stree.Add(beam[trail_index], t1, t2, input, output, (float)cost);
                beam.Push(new_node);
                beamcost.Push(base_cost + cost);

                // This is a stub entry indicating that the node should not
                // be added to the next generation beam.
                all_inputs.Push(0);
                all_targets1.Push(-1);
                all_targets2.Push(-1);
                all_outputs.Push(0);
                all_costs.Push(0);
                parent_trails.Push(-1);
            }
        }
コード例 #2
0
ファイル: RowDataset8.cs プロジェクト: liaoheping/OCRonet
 public RowDataset8(Narray <byte> ds, Intarray cs)
     : this()
 {
     for (int i = 0; i < ds.Dim(0); i++)
     {
         RowGet(data.Push(new Narray <byte>()), ds, i);
         classes.Push(cs[i]);
     }
     Recompute();
 }
コード例 #3
0
        /// <summary>
        /// Randomly sample an FST, assuming any input.
        /// </summary>
        /// <param name="result">The array of output symbols, excluding epsilons.</param>
        /// <param name="fst">The FST.</param>
        /// <param name="max">The maximum length of the result.</param>
        /// <returns>total cost</returns>
        public static double fst_sample(Intarray result, IGenericFst fst, int max = 1000)
        {
            double total_cost = 0;
            int    current    = fst.GetStart();

            for (int counter = 0; counter < max; counter++)
            {
                Intarray   inputs  = new Intarray();
                Intarray   outputs = new Intarray();
                Intarray   targets = new Intarray();
                Floatarray costs   = new Floatarray();

                fst.Arcs(inputs, targets, outputs, costs, current);

                // now we need to deal with the costs uniformly, so:
                costs.Push(fst.GetAcceptCost(current));
                int choice = sample_by_costs(costs);
                if (choice == costs.Length() - 1)
                {
                    break;
                }
                result.Push(outputs[choice]);
                total_cost += costs[choice];
                current     = targets[choice];
            }
            return(total_cost + fst.GetAcceptCost(current));
        }
コード例 #4
0
ファイル: SearchTree.cs プロジェクト: nickun/OCRonet
        public void Get(Intarray r_vertices1,
                 Intarray r_vertices2,
                 Intarray r_inputs,
                 Intarray r_outputs,
                 Floatarray r_costs,
                 int id)
        {
            Intarray t_v1 = new Intarray(); // vertices
            Intarray t_v2 = new Intarray(); // vertices
            Intarray t_i = new Intarray(); // inputs
            Intarray t_o = new Intarray(); // outputs
            Floatarray t_c = new Floatarray(); // costs
            int current = id;
            while (current != -1)
            {
                t_v1.Push(v1[current]);
                t_v2.Push(v2[current]);
                t_i.Push(inputs[current]);
                t_o.Push(outputs[current]);
                t_c.Push(costs[current]);
                current = parents[current];
            }

            NarrayUtil.Reverse(r_vertices1, t_v1);
            NarrayUtil.Reverse(r_vertices2, t_v2);
            NarrayUtil.Reverse(r_inputs, t_i);
            NarrayUtil.Reverse(r_outputs, t_o);
            NarrayUtil.Reverse(r_costs, t_c);
        }
コード例 #5
0
ファイル: OldBookStore.cs プロジェクト: liaoheping/OCRonet
        protected virtual void GetLinesOfPage(Intarray lines, int ipage)
        {
            lines.Clear();
            string     dirName  = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage);
            DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9])\.png");

            if (dpattern.Length > 0)
            {
                lines.ReserveTo(dpattern.Length);
            }
            List <int> llist = new List <int>(dpattern.Length);

            for (int i = 0; i < dpattern.Length; i++)
            {
                int k = int.Parse(dpattern[i]);
                llist.Add(k);
                //lines.Push(k);
            }
            IEnumerable <int> query = llist.OrderBy(i => i);

            foreach (int iline in query)
            {
                lines.Push(iline);
            }
        }
コード例 #6
0
ファイル: IBatchDense.cs プロジェクト: liaoheping/OCRonet
        protected override void Train(IDataset ds)
        {
            if (!(ds.nSamples() > 0))
            {
                throw new Exception("nSamples of IDataset must be > 0");
            }
            if (!(ds.nFeatures() > 0))
            {
                throw new Exception("nFeatures of IDataset must be > 0");
            }
            if (c2i.Length() < 1)
            {
                Intarray raw_classes = new Intarray();
                raw_classes.ReserveTo(ds.nSamples());
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    raw_classes.Push(ds.Cls(i));
                }
                ClassMap(c2i, i2c, raw_classes);

                /*Intarray classes = new Intarray();
                 * ctranslate(classes, raw_classes, c2i);*/
                //debugf("info","[mapped %d to %d classes]\n",c2i.length(),i2c.length());
            }
            TranslatedDataset mds = new TranslatedDataset(ds, c2i);

            TrainDense(mds);
        }
コード例 #7
0
ファイル: SearchTree.cs プロジェクト: liaoheping/OCRonet
        public void Get(Intarray r_vertices1,
                        Intarray r_vertices2,
                        Intarray r_inputs,
                        Intarray r_outputs,
                        Floatarray r_costs,
                        int id)
        {
            Intarray   t_v1    = new Intarray();   // vertices
            Intarray   t_v2    = new Intarray();   // vertices
            Intarray   t_i     = new Intarray();   // inputs
            Intarray   t_o     = new Intarray();   // outputs
            Floatarray t_c     = new Floatarray(); // costs
            int        current = id;

            while (current != -1)
            {
                t_v1.Push(v1[current]);
                t_v2.Push(v2[current]);
                t_i.Push(inputs[current]);
                t_o.Push(outputs[current]);
                t_c.Push(costs[current]);
                current = parents[current];
            }

            NarrayUtil.Reverse(r_vertices1, t_v1);
            NarrayUtil.Reverse(r_vertices2, t_v2);
            NarrayUtil.Reverse(r_inputs, t_i);
            NarrayUtil.Reverse(r_outputs, t_o);
            NarrayUtil.Reverse(r_costs, t_c);
        }
コード例 #8
0
 /// <summary>
 /// Original name: inc_class
 /// </summary>
 public void IncClass(int c)
 {
     while (counts.Length() <= c)
     {
         counts.Push(0);
     }
     counts[c]++;
 }
コード例 #9
0
        /// <summary>
        /// Copy one FST to another, preserving only lowest-cost arcs.
        /// This is useful for visualization.
        /// </summary>
        /// <param name="dst">The destination. Will be cleared before copying.</param>
        /// <param name="src">The FST to copy.</param>
        public static void fst_copy_best_arcs_only(IGenericFst dst, IGenericFst src)
        {
            dst.Clear();
            int n = src.nStates();

            for (int i = 0; i < n; i++)
            {
                dst.NewState();
            }
            dst.SetStart(src.GetStart());
            for (int i = 0; i < n; i++)
            {
                dst.SetAccept(i, src.GetAcceptCost(i));
                Intarray   targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray();
                Floatarray costs = new Floatarray();
                src.Arcs(inputs, targets, outputs, costs, i);
                int inlen = inputs.Length();
                if (inlen != targets.Length())
                {
                    throw new Exception("ASSERT: inputs.length() == targets.length()");
                }
                if (inlen != outputs.Length())
                {
                    throw new Exception("ASSERT: inputs.length() == outputs.length()");
                }
                if (inlen != costs.Length())
                {
                    throw new Exception("ASSERT: inputs.length() == costs.length()");
                }
                Dictionary <int, int> hash = new Dictionary <int, int>();
                for (int j = 0; j < n; j++)
                {
                    int t           = targets[j];
                    int best_so_far = -1;
                    if (hash.ContainsKey(t))
                    {
                        best_so_far = hash[t];
                    }
                    if (best_so_far == -1 || costs[j] < costs[best_so_far])
                    {
                        hash[t] = j;
                    }
                }
                Intarray keys = new Intarray();
                //hash.keys(keys);
                keys.Clear();
                foreach (int key in hash.Keys)
                {
                    keys.Push(key);
                }

                for (int k = 0; k < keys.Length(); k++)
                {
                    int j = hash[keys[k]];
                    dst.AddTransition(i, targets[j], outputs[j], costs[j], inputs[j]);
                }
            }
        }
コード例 #10
0
        public override void TrainDense(IDataset ds)
        {
            //PSet("%nsamples", ds.nSamples());
            float split      = PGetf("cv_split");
            int   mlp_cv_max = PGeti("cv_max");

            if (crossvalidate)
            {
                // perform a split for cross-validation, making sure
                // that we don't have the same sample in both the
                // test and the training set (even if the data set
                // is the result of resampling)
                Intarray test_ids = new Intarray();
                Intarray ids      = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    ids.Push(ds.Id(i));
                }
                NarrayUtil.Uniq(ids);
                Global.Debugf("cvdetail", "reduced {0} ids to {1} ids", ds.nSamples(), ids.Length());
                NarrayUtil.Shuffle(ids);
                int nids = (int)((1.0 - split) * ids.Length());
                nids = Math.Min(nids, mlp_cv_max);
                for (int i = 0; i < nids; i++)
                {
                    test_ids.Push(ids[i]);
                }
                NarrayUtil.Quicksort(test_ids);
                Intarray training = new Intarray();
                Intarray testing  = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    int id = ds.Id(i);
                    if (ClassifierUtil.Bincontains(test_ids, id))
                    {
                        testing.Push(i);
                    }
                    else
                    {
                        training.Push(i);
                    }
                }
                Global.Debugf("cvdetail", "#training {0} #testing {1}",
                              training.Length(), testing.Length());
                PSet("%ntraining", training.Length());
                PSet("%ntesting", testing.Length());
                Datasubset trs = new Datasubset(ds, training);
                Datasubset tss = new Datasubset(ds, testing);
                TrainBatch(trs, tss);
            }
            else
            {
                TrainBatch(ds, ds);
            }
        }
コード例 #11
0
ファイル: AStarSearch.cs プロジェクト: liaoheping/OCRonet
        public bool reconstruct_vertices(Intarray result_vertices)
        {
            Intarray vertices = new Intarray();

            if (accepted_from == -1)
            {
                return(false);
            }
            vertices.Push(accepted_from);
            int last = accepted_from;
            int next;

            while ((next = came_from[last]) != last)
            {
                vertices.Push(next);
                last = next;
            }
            NarrayUtil.Reverse(result_vertices, vertices);
            return(true);
        }
コード例 #12
0
ファイル: BeamSearch.cs プロジェクト: liaoheping/OCRonet
        /// <summary>
        /// The main loop iteration.
        /// </summary>
        public void Radiate()
        {
            Clear();

            //logger("beam", beam);
            //logger("beamcost", beamcost);

            int control_beam_start = beam.Length();

            for (int i = 0; i < control_beam_start; i++)
            {
                TryAccept(i);
            }

            // in this loop, traversal may add "control nodes" to the beam
            for (int i = 0; i < beam.Length(); i++)
            {
                Traverse(stree.v1[beam[i]], stree.v2[beam[i]],
                         beamcost[i], i);
            }

            // try accepts from control beam nodes
            // (they're not going to the next beam)
            for (int i = control_beam_start; i < beam.Length(); i++)
            {
                TryAccept(i);
            }


            Intarray   new_beam     = new Intarray();
            Floatarray new_beamcost = new Floatarray();

            for (int i = 0; i < nbest.Length(); i++)
            {
                int k = nbest.Tag(i);
                if (parent_trails[k] < 0) // skip the control beam nodes
                {
                    continue;
                }
                new_beam.Push(stree.Add(beam[parent_trails[k]],
                                        all_targets1[k], all_targets2[k],
                                        all_inputs[k], all_outputs[k],
                                        all_costs[k]));
                new_beamcost.Push(beamcost[parent_trails[k]] + all_costs[k]);
                //logger.format("to new beam: trail index %d, stree %d, target %d,%d",
                //k, new_beam[new_beam.length() - 1], all_targets1[k], all_targets2[k]);
            }
            //move(beam, new_beam);
            beam.Move(new_beam);
            //move(beamcost, new_beamcost);
            beamcost.Move(new_beamcost);
        }
コード例 #13
0
        public void Copy(Floatarray v, float eps = 1e-11f)
        {
            Clear();
            int n = v.Length();

            for (int i = 0; i < n; i++)
            {
                float value = v.At1d(i);
                if (Math.Abs(value) >= eps)
                {
                    _keys.Push(i);
                    _values.Push(value);
                }
            }
            _len = v.Length();
            _keys.Resize(_len);
            for (int i = 0; i < _len; i++)
            {
                _keys.Put1d(i, i);
            }
            _values.Copy(v);
        }
コード例 #14
0
ファイル: BookStore.cs プロジェクト: nickun/OCRonet
 protected override void GetLinesOfPage(Intarray lines, int ipage)
 {
     lines.Clear();
     string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage);
     //DirPattern dpattern = new DirPattern(dirName, @"([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])\.png");
     DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9][0-9][0-9])\.png");
     if (dpattern.Length > 0)
         lines.ReserveTo(dpattern.Length);
     for (int i = 0; i < dpattern.Length; i++)
     {
         int k = int.Parse(dpattern[i]);
         lines.Push(k);
     }
 }
コード例 #15
0
ファイル: SearchTree.cs プロジェクト: liaoheping/OCRonet
        public int Add(int parent, int vertex1, int vertex2,
                       int input, int output, float cost)
        {
            int n = parents.Length();

            //logger.format("stree: [%d]: parent %d, v1 %d, v2 %d, cost %f",
            //               n, parent, vertex1, vertex2, cost);
            parents.Push(parent);
            v1.Push(vertex1);
            v2.Push(vertex2);
            inputs.Push(input);
            outputs.Push(output);
            costs.Push(cost);
            return(n);
        }
コード例 #16
0
ファイル: Dataset8.cs プロジェクト: liaoheping/OCRonet
 public override void Add(Floatarray v, int c)
 {
     CHECK_ARG(NarrayUtil.Min(v) > -1.2f && NarrayUtil.Max(v) < 1.2f, "float8: value out of range (-1.2..1.2)");
     CHECK_ARG(c >= -1, "c>=-1");
     if (c >= nc)
     {
         nc = c + 1;
     }
     if (nf < 0)
     {
         nf = v.Length();
     }
     RowPush(data, v);
     classes.Push(c);
 }
コード例 #17
0
ファイル: SegmRoutine.cs プロジェクト: liaoheping/OCRonet
        public static void local_minima(ref Intarray result, Floatarray data, int r, float threshold)
        {
            int n = data.Length();

            result.Clear();
            Floatarray lmin = new Floatarray();

            local_min(ref lmin, data, r);
            for (int i = 1; i < n - 1; i++)
            {
                if (data[i] <= threshold && data[i] <= lmin[i] &&
                    data[i] <= data[i - 1] && data[i] < data[i + 1])
                {
                    result.Push(i);
                }
            }
        }
コード例 #18
0
        /// <summary>
        /// simple interface for line recognizers
        /// </summary>
        public virtual void SetString(string text, Floatarray costs, Intarray ids)
        {
            int      n      = text.Length;
            Intarray states = new Intarray();

            states.Clear();
            for (int i = 0; i < n + 1; i++)
            {
                states.Push(NewState());
            }
            for (int i = 0; i < n; i++)
            {
                AddTransition(states[i], states[i + 1], text[i], costs[i], ids[i]);
            }
            SetStart(states[0]);
            SetAccept(states[n]);
        }
コード例 #19
0
ファイル: BookStore.cs プロジェクト: liaoheping/OCRonet
        protected override void GetLinesOfPage(Intarray lines, int ipage)
        {
            lines.Clear();
            string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage);
            //DirPattern dpattern = new DirPattern(dirName, @"([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])\.png");
            DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9][0-9][0-9])\.png");

            if (dpattern.Length > 0)
            {
                lines.ReserveTo(dpattern.Length);
            }
            for (int i = 0; i < dpattern.Length; i++)
            {
                int k = int.Parse(dpattern[i]);
                lines.Push(k);
            }
        }
コード例 #20
0
ファイル: ClassifierUtil.cs プロジェクト: liaoheping/OCRonet
        public static void weighted_sample(Intarray samples, Floatarray weights, int n)
        {
            Floatarray cs = new Floatarray();

            cs.Copy(weights);
            for (int i = 1; i < cs.Length(); i++)
            {
                cs[i] += cs[i - 1];
            }
            cs /= NarrayUtil.Max(cs);
            samples.Clear();
            for (int i = 0; i < n; i++)
            {
                float value = (float)DRandomizer.Default.drand();
                int where = Binsearch(cs, value);
                samples.Push(where);
            }
        }
コード例 #21
0
ファイル: Heap.cs プロジェクト: liaoheping/OCRonet
        /// <summary>
        /// Push the node in the heap if it's not already there, otherwise promote.
        /// </summary>
        /// <returns>
        /// True if the heap was changed, false if the item was already
        /// in the heap and with a better cost.
        /// </returns>
        public bool Push(int node, float cost)
        {
            int i = heapback[node];

            if (i != -1)
            {
                if (cost < costs[i])
                {
                    costs[i] = cost;
                    heapify_up(i);
                    return(true);
                }
                return(false);
            }
            else
            {
                heap.Push(node);
                costs.Push(cost);
                heapback[node] = heap.Length() - 1;
                heapify_up(heap.Length() - 1);
                return(true);
            }
        }
コード例 #22
0
ファイル: SimpleGrouper.cs プロジェクト: liaoheping/OCRonet
        /// <summary>
        /// Compute the groups for a segmentation (internal method).
        /// </summary>
        private void computeGroups()
        {
            rboxes.Clear();
            ImgLabels.bounding_boxes(ref rboxes, labels);
            int n = rboxes.Length();

            // NB: we start with i=1 because i=0 is the background
            for (int i = 1; i < n; i++)
            {
                for (int range = 1; range <= maxrange; range++)
                {
                    if (i + range > n)
                    {
                        continue;
                    }
                    Rect     box = rboxes.At1d(i);
                    Intarray seg = new Intarray();
                    bool     bad = false;
                    for (int j = i; j < i + range; j++)
                    {
                        if (j > i && rboxes.At1d(j).x0 - rboxes.At1d(j - 1).x1 > maxdist)
                        {
                            bad = true;
                            break;
                        }
                        box.Include(rboxes.At1d(j));
                        seg.Push(j);
                    }
                    if (bad)
                    {
                        continue;
                    }
                    boxes.Push(box);
                    segments.Push(seg);
                }
            }
        }
コード例 #23
0
ファイル: IBatchDense.cs プロジェクト: nickun/OCRonet
 protected override void Train(IDataset ds)
 {
     if (!(ds.nSamples() > 0))
         throw new Exception("nSamples of IDataset must be > 0");
     if (!(ds.nFeatures() > 0))
         throw new Exception("nFeatures of IDataset must be > 0");
     if (c2i.Length() < 1)
     {
         Intarray raw_classes = new Intarray();
         raw_classes.ReserveTo(ds.nSamples());
         for (int i = 0; i < ds.nSamples(); i++)
             raw_classes.Push(ds.Cls(i));
         ClassMap(c2i, i2c, raw_classes);
         /*Intarray classes = new Intarray();
         ctranslate(classes, raw_classes, c2i);*/
         //debugf("info","[mapped %d to %d classes]\n",c2i.length(),i2c.length());
     }
     TranslatedDataset mds = new TranslatedDataset(ds, c2i);
     TrainDense(mds);
 }
コード例 #24
0
ファイル: LenetClassifier.cs プロジェクト: liaoheping/OCRonet
        protected override void Train(IDataset ds)
        {
            bool use_junk = PGetb("junk") && !DisableJunk;
            int  nsamples = ds.nSamples();

            if (PExists("%nsamples"))
            {
                nsamples += PGeti("%nsamples");
            }

            Global.Debugf("info", "Training content classifier");

            if (CharClass.IsEmpty)
            {
                Initialize(CreateClassesFromDataset(ds));
            }
            if (use_junk /*&& !JunkClass.IsEmpty*/)
            {
                Intarray nonjunk = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    if (ds.Cls(i) != jc())
                    {
                        nonjunk.Push(i);
                    }
                }
                Datasubset nonjunkds = new Datasubset(ds, nonjunk);
                CharClass.TrainDense(nonjunkds, PGeti("epochs"));
            }
            else
            {
                CharClass.TrainDense(ds, PGeti("epochs"));
            }

            if (use_junk /*&& !JunkClass.IsEmpty*/)
            {
                Global.Debugf("info", "Training junk classifier");
                Intarray isjunk = new Intarray();
                int      njunk  = 0;
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    bool j = (ds.Cls(i) == jc());
                    isjunk.Push(JunkClass.Classes[Convert.ToInt32(j)]);
                    if (j)
                    {
                        njunk++;
                    }
                }
                if (njunk > 0)
                {
                    MappedDataset junkds = new MappedDataset(ds, isjunk);
                    JunkClass.TrainDense(junkds, PGeti("epochs"));
                }
                else
                {
                    Global.Debugf("warn", "you are training a junk class but there are no samples to train on");
                    JunkClass.DeleteLenet();
                }
            }
            PSet("%nsamples", nsamples);
        }
コード例 #25
0
ファイル: LenetClassifier.cs プロジェクト: nickun/OCRonet
        protected override void Train(IDataset ds)
        {
            bool use_junk = PGetb("junk") && !DisableJunk;
            int nsamples = ds.nSamples();
            if (PExists("%nsamples"))
                nsamples += PGeti("%nsamples");

            Global.Debugf("info", "Training content classifier");

            if (CharClass.IsEmpty)
            {
                Initialize(CreateClassesFromDataset(ds));
            }
            if (use_junk/*&& !JunkClass.IsEmpty*/)
            {
                Intarray nonjunk = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                    if (ds.Cls(i) != jc())
                        nonjunk.Push(i);
                Datasubset nonjunkds = new Datasubset(ds, nonjunk);
                CharClass.TrainDense(nonjunkds, PGeti("epochs"));
            }
            else
            {
                CharClass.TrainDense(ds, PGeti("epochs"));
            }

            if (use_junk /*&& !JunkClass.IsEmpty*/)
            {
                Global.Debugf("info", "Training junk classifier");
                Intarray isjunk = new Intarray();
                int njunk = 0;
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    bool j = (ds.Cls(i) == jc());
                    isjunk.Push(JunkClass.Classes[Convert.ToInt32(j)]);
                    if (j) njunk++;
                }
                if (njunk > 0)
                {
                    MappedDataset junkds = new MappedDataset(ds, isjunk);
                    JunkClass.TrainDense(junkds, PGeti("epochs"));
                }
                else
                {
                    Global.Debugf("warn", "you are training a junk class but there are no samples to train on");
                    JunkClass.DeleteLenet();
                }
            }
            PSet("%nsamples", nsamples);
        }
コード例 #26
0
ファイル: AStarSearch.cs プロジェクト: nickun/OCRonet
 public bool reconstruct_vertices(Intarray result_vertices)
 {
     Intarray vertices = new Intarray();
     if (accepted_from == -1)
         return false;
     vertices.Push(accepted_from);
     int last = accepted_from;
     int next;
     while ((next = came_from[last]) != last)
     {
         vertices.Push(next);
         last = next;
     }
     NarrayUtil.Reverse(result_vertices, vertices);
     return true;
 }
コード例 #27
0
        public override void Arcs(Intarray ids, Intarray targets, Intarray outputs, Floatarray costs, int node)
        {
            int        n1   = node / l2.nStates();
            int        n2   = node % l2.nStates();
            Intarray   ids1 = new Intarray();
            Intarray   ids2 = new Intarray();
            Intarray   t1   = new Intarray();
            Intarray   t2   = new Intarray();
            Intarray   o1   = new Intarray();
            Intarray   o2   = new Intarray();
            Floatarray c1   = new Floatarray();
            Floatarray c2   = new Floatarray();

            l1.Arcs(ids1, t1, o1, c1, n1);
            l2.Arcs(ids2, t2, o2, c2, n2);

            // sort & permute
            Intarray p1 = new Intarray();
            Intarray p2 = new Intarray();

            NarrayUtil.Quicksort(p1, o1);
            NarrayUtil.Permute(ids1, p1);
            NarrayUtil.Permute(t1, p1);
            NarrayUtil.Permute(o1, p1);
            NarrayUtil.Permute(c1, p1);

            NarrayUtil.Quicksort(p2, ids2);
            NarrayUtil.Permute(ids2, p2);
            NarrayUtil.Permute(t2, p2);
            NarrayUtil.Permute(o2, p2);
            NarrayUtil.Permute(c2, p2);

            int k1, k2;

            // l1 epsilon moves
            for (k1 = 0; k1 < o1.Length() && o1.At1d(k1) == 0; k1++)
            {
                ids.Push(ids1.At1d(k1));
                targets.Push(Combine(t1.At1d(k1), n2));
                outputs.Push(0);
                costs.Push(c1.At1d(k1));
            }
            // l2 epsilon moves
            for (k2 = 0; k2 < o2.Length() && ids2.At1d(k2) == 0; k2++)
            {
                ids.Push(0);
                targets.Push(Combine(n1, t2.At1d(k2)));
                outputs.Push(o2.At1d(k2));
                costs.Push(c2.At1d(k2));
            }
            // non-epsilon moves
            while (k1 < o1.Length() && k2 < ids2.Length())
            {
                while (k1 < o1.Length() && o1.At1d(k1) < ids2.At1d(k2))
                {
                    k1++;
                }
                if (k1 >= o1.Length())
                {
                    break;
                }
                while (k2 < ids2.Length() && o1.At1d(k1) > ids2.At1d(k2))
                {
                    k2++;
                }
                while (k1 < o1.Length() && k2 < ids2.Length() && o1.At1d(k1) == ids2.At1d(k2))
                {
                    for (int j = k2; j < ids2.Length() && o1.At1d(k1) == ids2.At1d(j); j++)
                    {
                        ids.Push(ids1.At1d(k1));
                        targets.Push(Combine(t1.At1d(k1), t2.At1d(j)));
                        outputs.Push(o2.At1d(j));
                        costs.Push(c1.At1d(k1) + c2.At1d(j));
                    }
                    k1++;
                }
            }
        }
コード例 #28
0
ファイル: BeamSearch.cs プロジェクト: nickun/OCRonet
        /// <summary>
        /// The main loop iteration.
        /// </summary>
        public void Radiate()
        {
            Clear();

            //logger("beam", beam);
            //logger("beamcost", beamcost);

            int control_beam_start = beam.Length();
            for (int i = 0; i < control_beam_start; i++)
                TryAccept(i);

            // in this loop, traversal may add "control nodes" to the beam
            for (int i = 0; i < beam.Length(); i++)
            {
                Traverse(stree.v1[beam[i]], stree.v2[beam[i]],
                         beamcost[i], i);
            }

            // try accepts from control beam nodes
            // (they're not going to the next beam)
            for (int i = control_beam_start; i < beam.Length(); i++)
                TryAccept(i);

            Intarray new_beam = new Intarray();
            Floatarray new_beamcost = new Floatarray();
            for (int i = 0; i < nbest.Length(); i++)
            {
                int k = nbest.Tag(i);
                if (parent_trails[k] < 0) // skip the control beam nodes
                    continue;
                new_beam.Push(stree.Add(beam[parent_trails[k]],
                                        all_targets1[k], all_targets2[k],
                                        all_inputs[k], all_outputs[k],
                                        all_costs[k]));
                new_beamcost.Push(beamcost[parent_trails[k]] + all_costs[k]);
                //logger.format("to new beam: trail index %d, stree %d, target %d,%d",
                //k, new_beam[new_beam.length() - 1], all_targets1[k], all_targets2[k]);
            }
            //move(beam, new_beam);
            beam.Move(new_beam);
            //move(beamcost, new_beamcost);
            beamcost.Move(new_beamcost);
        }
コード例 #29
0
ファイル: LatinClassifier.cs プロジェクト: liaoheping/OCRonet
        protected override void Train(IDataset ds)
        {
            bool use_junk = PGetb("junk") && !DisableJunk;

            if (charclass.IsEmpty)
            {
                charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass")));
                TryAttachCharClassifierEvent(charclass.Object);
            }
            if (junkclass.IsEmpty)
            {
                junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass")));
                TryAttachJunkClassifierEvent(junkclass.Object);
            }
            if (ulclass.IsEmpty)
            {
                ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass")));
            }

            Global.Debugf("info", "Training content classifier");
            if (use_junk && !junkclass.IsEmpty)
            {
                Intarray nonjunk = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    if (ds.Cls(i) != jc())
                    {
                        nonjunk.Push(i);
                    }
                }
                Datasubset nonjunkds = new Datasubset(ds, nonjunk);
                charclass.Object.XTrain(nonjunkds);
            }
            else
            {
                charclass.Object.XTrain(ds);
            }

            if (use_junk && !junkclass.IsEmpty)
            {
                Global.Debugf("info", "Training junk classifier");
                Intarray isjunk = new Intarray();
                int      njunk  = 0;
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    bool j = (ds.Cls(i) == jc());
                    isjunk.Push(Convert.ToInt32(j));
                    if (j)
                    {
                        njunk++;
                    }
                }
                if (njunk > 0)
                {
                    MappedDataset junkds = new MappedDataset(ds, isjunk);
                    junkclass.Object.XTrain(junkds);
                }
                else
                {
                    Global.Debugf("warn", "you are training a junk class but there are no samples to train on");
                    junkclass.SetComponent(null);
                }

                if (PGeti("ul") > 0 && !ulclass.IsEmpty)
                {
                    throw new Exception("ulclass not implemented");
                }
            }
        }
コード例 #30
0
ファイル: CompositionFstImpl.cs プロジェクト: nickun/OCRonet
        public override void Arcs(Intarray ids, Intarray targets, Intarray outputs, Floatarray costs, int node)
        {
            int n1 = node / l2.nStates();
            int n2 = node % l2.nStates();
            Intarray ids1 = new Intarray();
            Intarray ids2 = new Intarray();
            Intarray t1 = new Intarray();
            Intarray t2 = new Intarray();
            Intarray o1 = new Intarray();
            Intarray o2 = new Intarray();
            Floatarray c1 = new Floatarray();
            Floatarray c2 = new Floatarray();
            l1.Arcs(ids1, t1, o1, c1, n1);
            l2.Arcs(ids2, t2, o2, c2, n2);

            // sort & permute
            Intarray p1 = new Intarray();
            Intarray p2 = new Intarray();

            NarrayUtil.Quicksort(p1, o1);
            NarrayUtil.Permute(ids1, p1);
            NarrayUtil.Permute(t1, p1);
            NarrayUtil.Permute(o1, p1);
            NarrayUtil.Permute(c1, p1);

            NarrayUtil.Quicksort(p2, ids2);
            NarrayUtil.Permute(ids2, p2);
            NarrayUtil.Permute(t2, p2);
            NarrayUtil.Permute(o2, p2);
            NarrayUtil.Permute(c2, p2);

            int k1, k2;
            // l1 epsilon moves
            for (k1 = 0; k1 < o1.Length() && o1.At1d(k1) == 0; k1++)
            {
                ids.Push(ids1.At1d(k1));
                targets.Push(Combine(t1.At1d(k1), n2));
                outputs.Push(0);
                costs.Push(c1.At1d(k1));
            }
            // l2 epsilon moves
            for (k2 = 0; k2 < o2.Length() && ids2.At1d(k2) == 0; k2++)
            {
                ids.Push(0);
                targets.Push(Combine(n1, t2.At1d(k2)));
                outputs.Push(o2.At1d(k2));
                costs.Push(c2.At1d(k2));
            }
            // non-epsilon moves
            while (k1 < o1.Length() && k2 < ids2.Length())
            {
                while (k1 < o1.Length() && o1.At1d(k1) < ids2.At1d(k2)) k1++;
                if (k1 >= o1.Length()) break;
                while (k2 < ids2.Length() && o1.At1d(k1) > ids2.At1d(k2)) k2++;
                while (k1 < o1.Length() && k2 < ids2.Length() && o1.At1d(k1) == ids2.At1d(k2))
                {
                    for (int j = k2; j < ids2.Length() && o1.At1d(k1) == ids2.At1d(j); j++)
                    {
                        ids.Push(ids1.At1d(k1));
                        targets.Push(Combine(t1.At1d(k1), t2.At1d(j)));
                        outputs.Push(o2.At1d(j));
                        costs.Push(c1.At1d(k1) + c2.At1d(j));
                    }
                    k1++;
                }
            }
        }
コード例 #31
0
ファイル: OldBookStore.cs プロジェクト: nickun/OCRonet
 protected virtual void GetLinesOfPage(Intarray lines, int ipage)
 {
     lines.Clear();
     string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage);
     DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9])\.png");
     if (dpattern.Length > 0)
         lines.ReserveTo(dpattern.Length);
     List<int> llist = new List<int>(dpattern.Length);
     for (int i = 0; i < dpattern.Length; i++)
     {
         int k = int.Parse(dpattern[i]);
         llist.Add(k);
         //lines.Push(k);
     }
     IEnumerable<int> query = llist.OrderBy(i => i);
     foreach (int iline in query)
         lines.Push(iline);
 }
コード例 #32
0
ファイル: IGenericFst.cs プロジェクト: nickun/OCRonet
 /// <summary>
 /// simple interface for line recognizers
 /// </summary>
 public virtual void SetString(string text, Floatarray costs, Intarray ids)
 {
     int n = text.Length;
     Intarray states = new Intarray();
     states.Clear();
     for(int i=0; i<n+1; i++)
         states.Push(NewState());
     for(int i=0; i<n; i++)
         AddTransition(states[i], states[i+1], text[i], costs[i], ids[i]);
     SetStart(states[0]);
     SetAccept(states[n]);
 }
コード例 #33
0
ファイル: SimpleGrouper.cs プロジェクト: liaoheping/OCRonet
        /// <summary>
        /// Output the segmentation into a segmentation graph.
        /// Construct a state for each of the segments, then
        /// add transitions between states (segments)
        /// from min(segments[i]) to max(segments[i])+1.
        /// </summary>
        public override void GetLattice(IGenericFst fst)
        {
            fst.Clear();

            int      final  = NarrayUtil.Max(labels) + 1;
            Intarray states = new Intarray(final + 1);

            states.Fill(-1);
            for (int i = 1; i < states.Length(); i++)
            {
                states[i] = fst.NewState();
            }
            fst.SetStart(states[1]);
            fst.SetAccept(states[final]);

            for (int i = 0; i < boxes.Length(); i++)
            {
                int start = NarrayUtil.Min(segments.At1d(i));
                int end   = NarrayUtil.Max(segments.At1d(i));
                int id    = (start << 16) + end;
                if (segments.At1d(i).Length() == 0)
                {
                    id = 0;
                }

                float yes = spaces[i, 0];
                float no  = spaces[i, 1];
                // if no space is set, assume no space is present
                if (yes == float.PositiveInfinity && no == float.PositiveInfinity)
                {
                    no = 0.0f;
                }

                for (int j = 0; j < class_costs[i].Length(); j++)
                {
                    float  cost = class_costs[i][j];
                    string str  = class_outputs[i][j];
                    int    n    = str.Length;
                    int    last = start;
                    for (int k = 0; k < n; k++)
                    {
                        int c = (int)str[k];
                        if (k < n - 1)
                        {
                            // add intermediate states/transitions for all but the last character
                            states.Push(fst.NewState());
                            fst.AddTransition(states[last], states.Last(), c, 0.0f, 0);
                            last = states.Length() - 1;
                        }
                        else
                        {
                            // for the last character, handle the spaces as well
                            if (no < 1000.0f)
                            {
                                // add the last character as a direct transition with no space
                                fst.AddTransition(states[last], states[end + 1], c, cost + no, id);
                            }
                            if (yes < 1000.0f)
                            {
                                // insert another state to handle spaces
                                states.Push(fst.NewState());
                                int space_state = states.Last();
                                fst.AddTransition(states[start], space_state, c, cost, id);
                                fst.AddTransition(space_state, states[end + 1], (int)' ', yes, 0);
                            }
                        }
                    } // for k
                }     // for j
            }         // for i
        }
コード例 #34
0
ファイル: FstUtil.cs プロジェクト: nickun/OCRonet
        /// <summary>
        /// Randomly sample an FST, assuming any input.
        /// </summary>
        /// <param name="result">The array of output symbols, excluding epsilons.</param>
        /// <param name="fst">The FST.</param>
        /// <param name="max">The maximum length of the result.</param>
        /// <returns>total cost</returns>
        public static double fst_sample(Intarray result, IGenericFst fst, int max=1000)
        {
            double total_cost = 0;
            int current = fst.GetStart();

            for (int counter = 0; counter < max; counter++)
            {
                Intarray inputs  = new Intarray();
                Intarray outputs = new Intarray();
                Intarray targets = new Intarray();
                Floatarray costs = new Floatarray();

                fst.Arcs(inputs, targets, outputs, costs, current);

                // now we need to deal with the costs uniformly, so:
                costs.Push(fst.GetAcceptCost(current));
                int choice = sample_by_costs(costs);
                if (choice == costs.Length() - 1)
                    break;
                result.Push(outputs[choice]);
                total_cost += costs[choice];
                current = targets[choice];
            }
            return total_cost + fst.GetAcceptCost(current);
        }
コード例 #35
0
ファイル: AutoMlpClassifier.cs プロジェクト: nickun/OCRonet
 public override void TrainDense(IDataset ds)
 {
     //PSet("%nsamples", ds.nSamples());
     float split = PGetf("cv_split");
     int mlp_cv_max = PGeti("cv_max");
     if (crossvalidate)
     {
         // perform a split for cross-validation, making sure
         // that we don't have the same sample in both the
         // test and the training set (even if the data set
         // is the result of resampling)
         Intarray test_ids = new Intarray();
         Intarray ids = new Intarray();
         for (int i = 0; i < ds.nSamples(); i++)
             ids.Push(ds.Id(i));
         NarrayUtil.Uniq(ids);
         Global.Debugf("cvdetail", "reduced {0} ids to {1} ids", ds.nSamples(), ids.Length());
         NarrayUtil.Shuffle(ids);
         int nids = (int)((1.0 - split) * ids.Length());
         nids = Math.Min(nids, mlp_cv_max);
         for (int i = 0; i < nids; i++)
             test_ids.Push(ids[i]);
         NarrayUtil.Quicksort(test_ids);
         Intarray training = new Intarray();
         Intarray testing = new Intarray();
         for (int i = 0; i < ds.nSamples(); i++)
         {
             int id = ds.Id(i);
             if (ClassifierUtil.Bincontains(test_ids, id))
                 testing.Push(i);
             else
                 training.Push(i);
         }
         Global.Debugf("cvdetail", "#training {0} #testing {1}",
                training.Length(), testing.Length());
         PSet("%ntraining", training.Length());
         PSet("%ntesting", testing.Length());
         Datasubset trs = new Datasubset(ds, training);
         Datasubset tss = new Datasubset(ds, testing);
         TrainBatch(trs, tss);
     }
     else
     {
         TrainBatch(ds, ds);
     }
 }
コード例 #36
0
ファイル: LatinClassifier.cs プロジェクト: nickun/OCRonet
        protected override void Train(IDataset ds)
        {
            bool use_junk = PGetb("junk") && !DisableJunk;

            if (charclass.IsEmpty)
            {
                charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass")));
                TryAttachCharClassifierEvent(charclass.Object);
            }
            if (junkclass.IsEmpty)
            {
                junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass")));
                TryAttachJunkClassifierEvent(junkclass.Object);
            }
            if (ulclass.IsEmpty)
                ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass")));

            Global.Debugf("info", "Training content classifier");
            if (use_junk && !junkclass.IsEmpty)
            {
                Intarray nonjunk = new Intarray();
                for (int i = 0; i < ds.nSamples(); i++)
                    if (ds.Cls(i) != jc())
                        nonjunk.Push(i);
                Datasubset nonjunkds = new Datasubset(ds, nonjunk);
                charclass.Object.XTrain(nonjunkds);
            }
            else
            {
                charclass.Object.XTrain(ds);
            }

            if (use_junk && !junkclass.IsEmpty)
            {
                Global.Debugf("info", "Training junk classifier");
                Intarray isjunk = new Intarray();
                int njunk = 0;
                for (int i = 0; i < ds.nSamples(); i++)
                {
                    bool j = (ds.Cls(i) == jc());
                    isjunk.Push(Convert.ToInt32(j));
                    if (j) njunk++;
                }
                if (njunk > 0)
                {
                    MappedDataset junkds = new MappedDataset(ds, isjunk);
                    junkclass.Object.XTrain(junkds);
                }
                else
                {
                    Global.Debugf("warn", "you are training a junk class but there are no samples to train on");
                    junkclass.SetComponent(null);
                }

                if (PGeti("ul") > 0 && !ulclass.IsEmpty)
                {
                    throw new Exception("ulclass not implemented");
                }
            }
        }
コード例 #37
0
ファイル: FstUtil.cs プロジェクト: nickun/OCRonet
        /// <summary>
        /// Copy one FST to another, preserving only lowest-cost arcs.
        /// This is useful for visualization.
        /// </summary>
        /// <param name="dst">The destination. Will be cleared before copying.</param>
        /// <param name="src">The FST to copy.</param>
        public static void fst_copy_best_arcs_only(IGenericFst dst, IGenericFst src)
        {
            dst.Clear();
            int n = src.nStates();
            for (int i = 0; i < n; i++)
                dst.NewState();
            dst.SetStart(src.GetStart());
            for(int i = 0; i < n; i++)
            {
                dst.SetAccept(i, src.GetAcceptCost(i));
                Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray();
                Floatarray costs = new Floatarray();
                src.Arcs(inputs, targets, outputs, costs, i);
                int inlen = inputs.Length();
                if (inlen != targets.Length())
                    throw new Exception("ASSERT: inputs.length() == targets.length()");
                if (inlen != outputs.Length())
                    throw new Exception("ASSERT: inputs.length() == outputs.length()");
                if (inlen != costs.Length())
                    throw new Exception("ASSERT: inputs.length() == costs.length()");
                Dictionary< int, int > hash = new Dictionary<int,int>();
                for(int j = 0; j < n; j++) {
                    int t = targets[j];
                    int best_so_far = -1;
                    if (hash.ContainsKey(t))
                        best_so_far = hash[t];
                    if(best_so_far == -1 || costs[j] < costs[best_so_far])
                        hash[t] = j;
                }
                Intarray keys = new Intarray();
                //hash.keys(keys);
                keys.Clear();
                foreach (int key in hash.Keys)
                {
                    keys.Push(key);
                }

                for(int k = 0; k < keys.Length(); k++) {
                    int j = hash[keys[k]];
                    dst.AddTransition(i, targets[j], outputs[j], costs[j], inputs[j]);
                }
            }
        }