Exemple #1
0
        public Linerec(string classifier1 = "latin" /*none*/, string extractor1 = "scaledfe",
                       string segmenter1  = "DpSegmenter", int use_reject       = 1)
        {
            transcript = "";
            //line = new Bytearray();
            segmentation = new Intarray();
            binarized    = new Bytearray();

            // component choices
            PDef("classifier", classifier1, "character classifier");
            PDef("extractor", extractor1, "feature extractor");
            PDef("segmenter", segmenter1, "line segmenter");
            PDef("grouper", "SimpleGrouper", "line grouper");
            // retraining
            PDef("cpreload", "none", "classifier to be loaded prior to training");
            // debugging
            PDef("verbose", 0, "verbose output from glinerec");
            // outputs
            PDef("use_priors", 0, "correct the classifier output by priors");
            PDef("use_reject", use_reject, "use a reject class (use posteriors only and train on junk chars)");
            PDef("maxcost", 20.0, "maximum cost of a character to be added to the output");
            PDef("minclass", 32, "minimum output class to be added (default=unicode space)");
            PDef("minprob", 1e-9, "minimum probability for a character to appear in the output at all");
            PDef("invert", 1, "invert the input line prior to char extraction");
            // segmentation
            PDef("maxrange", 5, "maximum number of components that are grouped together");
            // sanity limits on input
            PDef("minheight", 9, "minimum height of input line");
            PDef("maxheight", 300, "maximum height of input line");
            PDef("maxaspect", 2.0, "maximum height/width ratio of input line");
            // space estimation (FIXME factor this out eventually)
            PDef("space_fractile", 0.5, "fractile for space estimation");
            PDef("space_multiplier", 2.0, "multipler for space estimation");
            PDef("space_min", 0.2, "minimum space threshold (in xheight)");
            PDef("space_max", 1.1, "maximum space threshold (in xheight)");
            PDef("space_yes", 1.0, "cost of inserting a space");
            PDef("space_no", 5.0, "cost of not inserting a space");
            // back compability
            PDef("minsize_factor", 0.0, "");

            counts     = new Intarray();
            segmenter  = new ComponentContainerISegmentLine(ComponentCreator.MakeComponent <ISegmentLine>(PGet("segmenter")));
            grouper    = new ComponentContainerIGrouper(ComponentCreator.MakeComponent <IGrouper>(PGet("grouper")));
            classifier = new ComponentContainerIModel(IModel.MakeModel(PGet("classifier")));
            TryAttachClassifierEvent(classifier.Object);

            Persist(classifier, "classifier");
            Persist(counts, "counts");
            Persist(segmenter, "segmenter");
            Persist(grouper, "grouper");

            if (!classifier.IsEmpty)
            {
                classifier.Object.Set("junk", PGeti("use_reject"));
                classifier.Object.SetExtractor(PGet("extractor"));
            }
            ntrained      = 0;
            counts_warned = false;
        }
 public LatinClassifier()
 {
     DRandomizer.Default.init_drand(DateTime.Now.Millisecond);
     charclass = new ComponentContainerIModel(IModel.MakeModel(PGet("charclass")));
     junkclass = new ComponentContainerIModel(IModel.MakeModel(PGet("junkclass")));
     ulclass   = new ComponentContainerIModel();
     PDef("junkchar", (int)'~', "junk character");
     PDef("junkclass", "mlp", "junk classifier");
     PDef("charclass", "mappedmlp", "character classifier");
     PDef("junk", 1, "train a separate junk classifier");
     PDef("ul", 0, "do upper/lower reclassification");
     PDef("ulclass", "mlp", "upper/lower classifier");
     junkchar = -1;
     Persist(charclass, "charclass");
     Persist(junkclass, "junkclass");
     Persist(ulclass, "ulclass");
     TryAttachCharClassifierEvent(charclass.Object);
     TryAttachJunkClassifierEvent(junkclass.Object);
 }