public Linerec(string classifier1 = "latin" /*none*/, string extractor1 = "scaledfe", string segmenter1 = "DpSegmenter", int use_reject = 1) { transcript = ""; //line = new Bytearray(); segmentation = new Intarray(); binarized = new Bytearray(); // component choices PDef("classifier", classifier1, "character classifier"); PDef("extractor", extractor1, "feature extractor"); PDef("segmenter", segmenter1, "line segmenter"); PDef("grouper", "SimpleGrouper", "line grouper"); // retraining PDef("cpreload", "none", "classifier to be loaded prior to training"); // debugging PDef("verbose", 0, "verbose output from glinerec"); // outputs PDef("use_priors", 0, "correct the classifier output by priors"); PDef("use_reject", use_reject, "use a reject class (use posteriors only and train on junk chars)"); PDef("maxcost", 20.0, "maximum cost of a character to be added to the output"); PDef("minclass", 32, "minimum output class to be added (default=unicode space)"); PDef("minprob", 1e-9, "minimum probability for a character to appear in the output at all"); PDef("invert", 1, "invert the input line prior to char extraction"); // segmentation PDef("maxrange", 5, "maximum number of components that are grouped together"); // sanity limits on input PDef("minheight", 9, "minimum height of input line"); PDef("maxheight", 300, "maximum height of input line"); PDef("maxaspect", 2.0, "maximum height/width ratio of input line"); // space estimation (FIXME factor this out eventually) PDef("space_fractile", 0.5, "fractile for space estimation"); PDef("space_multiplier", 2.0, "multipler for space estimation"); PDef("space_min", 0.2, "minimum space threshold (in xheight)"); PDef("space_max", 1.1, "maximum space threshold (in xheight)"); PDef("space_yes", 1.0, "cost of inserting a space"); PDef("space_no", 5.0, "cost of not inserting a space"); // back compability PDef("minsize_factor", 0.0, ""); counts = new Intarray(); segmenter = new ComponentContainerISegmentLine(ComponentCreator.MakeComponent <ISegmentLine>(PGet("segmenter"))); grouper = new ComponentContainerIGrouper(ComponentCreator.MakeComponent <IGrouper>(PGet("grouper"))); classifier = new ComponentContainerIModel(IModel.MakeModel(PGet("classifier"))); TryAttachClassifierEvent(classifier.Object); Persist(classifier, "classifier"); Persist(counts, "counts"); Persist(segmenter, "segmenter"); Persist(grouper, "grouper"); if (!classifier.IsEmpty) { classifier.Object.Set("junk", PGeti("use_reject")); classifier.Object.SetExtractor(PGet("extractor")); } ntrained = 0; counts_warned = false; }
public LatinClassifier() { DRandomizer.Default.init_drand(DateTime.Now.Millisecond); charclass = new ComponentContainerIModel(IModel.MakeModel(PGet("charclass"))); junkclass = new ComponentContainerIModel(IModel.MakeModel(PGet("junkclass"))); ulclass = new ComponentContainerIModel(); PDef("junkchar", (int)'~', "junk character"); PDef("junkclass", "mlp", "junk classifier"); PDef("charclass", "mappedmlp", "character classifier"); PDef("junk", 1, "train a separate junk classifier"); PDef("ul", 0, "do upper/lower reclassification"); PDef("ulclass", "mlp", "upper/lower classifier"); junkchar = -1; Persist(charclass, "charclass"); Persist(junkclass, "junkclass"); Persist(ulclass, "ulclass"); TryAttachCharClassifierEvent(charclass.Object); TryAttachJunkClassifierEvent(junkclass.Object); }