/// <summary> /// This looks at the transition from state pair /// (f1,f2) -> (t1,t2), withthe given cost. /// </summary> public void Relax(int f1, int f2, // input state pair int t1, int t2, // output state pair float cost, // transition cost int arc_id1, // (unused) int arc_id2, // (unused) int input, // input label int intermediate, // (unused) int output, // output label float base_cost, // cost of the path so far int trail_index) { //logger.format("relaxing %d %d -> %d %d (bcost %f, cost %f)", f1, f2, t1, t2, base_cost, cost); if (!nbest.AddReplacingId(t1 * fst2.nStates() + t2, all_costs.Length(), -base_cost - cost)) { return; } //logger.format("nbest changed"); //nbest.log(logger); if (input > 0) { // The candidate for the next beam is stored in all_XX arrays. // (can we store it in the stree instead?) all_inputs.Push(input); all_targets1.Push(t1); all_targets2.Push(t2); all_outputs.Push(output); all_costs.Push(cost); parent_trails.Push(trail_index); } else { // Beam control hack // ----------------- // if a node is important (changes nbest) AND its input is 0, // then it's added to the CURRENT beam. //logger.format("pushing control point from trail %d to %d, %d", //trail_index, t1, t2); int new_node = stree.Add(beam[trail_index], t1, t2, input, output, (float)cost); beam.Push(new_node); beamcost.Push(base_cost + cost); // This is a stub entry indicating that the node should not // be added to the next generation beam. all_inputs.Push(0); all_targets1.Push(-1); all_targets2.Push(-1); all_outputs.Push(0); all_costs.Push(0); parent_trails.Push(-1); } }
public RowDataset8(Narray <byte> ds, Intarray cs) : this() { for (int i = 0; i < ds.Dim(0); i++) { RowGet(data.Push(new Narray <byte>()), ds, i); classes.Push(cs[i]); } Recompute(); }
/// <summary> /// Randomly sample an FST, assuming any input. /// </summary> /// <param name="result">The array of output symbols, excluding epsilons.</param> /// <param name="fst">The FST.</param> /// <param name="max">The maximum length of the result.</param> /// <returns>total cost</returns> public static double fst_sample(Intarray result, IGenericFst fst, int max = 1000) { double total_cost = 0; int current = fst.GetStart(); for (int counter = 0; counter < max; counter++) { Intarray inputs = new Intarray(); Intarray outputs = new Intarray(); Intarray targets = new Intarray(); Floatarray costs = new Floatarray(); fst.Arcs(inputs, targets, outputs, costs, current); // now we need to deal with the costs uniformly, so: costs.Push(fst.GetAcceptCost(current)); int choice = sample_by_costs(costs); if (choice == costs.Length() - 1) { break; } result.Push(outputs[choice]); total_cost += costs[choice]; current = targets[choice]; } return(total_cost + fst.GetAcceptCost(current)); }
public void Get(Intarray r_vertices1, Intarray r_vertices2, Intarray r_inputs, Intarray r_outputs, Floatarray r_costs, int id) { Intarray t_v1 = new Intarray(); // vertices Intarray t_v2 = new Intarray(); // vertices Intarray t_i = new Intarray(); // inputs Intarray t_o = new Intarray(); // outputs Floatarray t_c = new Floatarray(); // costs int current = id; while (current != -1) { t_v1.Push(v1[current]); t_v2.Push(v2[current]); t_i.Push(inputs[current]); t_o.Push(outputs[current]); t_c.Push(costs[current]); current = parents[current]; } NarrayUtil.Reverse(r_vertices1, t_v1); NarrayUtil.Reverse(r_vertices2, t_v2); NarrayUtil.Reverse(r_inputs, t_i); NarrayUtil.Reverse(r_outputs, t_o); NarrayUtil.Reverse(r_costs, t_c); }
protected virtual void GetLinesOfPage(Intarray lines, int ipage) { lines.Clear(); string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage); DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9])\.png"); if (dpattern.Length > 0) { lines.ReserveTo(dpattern.Length); } List <int> llist = new List <int>(dpattern.Length); for (int i = 0; i < dpattern.Length; i++) { int k = int.Parse(dpattern[i]); llist.Add(k); //lines.Push(k); } IEnumerable <int> query = llist.OrderBy(i => i); foreach (int iline in query) { lines.Push(iline); } }
protected override void Train(IDataset ds) { if (!(ds.nSamples() > 0)) { throw new Exception("nSamples of IDataset must be > 0"); } if (!(ds.nFeatures() > 0)) { throw new Exception("nFeatures of IDataset must be > 0"); } if (c2i.Length() < 1) { Intarray raw_classes = new Intarray(); raw_classes.ReserveTo(ds.nSamples()); for (int i = 0; i < ds.nSamples(); i++) { raw_classes.Push(ds.Cls(i)); } ClassMap(c2i, i2c, raw_classes); /*Intarray classes = new Intarray(); * ctranslate(classes, raw_classes, c2i);*/ //debugf("info","[mapped %d to %d classes]\n",c2i.length(),i2c.length()); } TranslatedDataset mds = new TranslatedDataset(ds, c2i); TrainDense(mds); }
public void Get(Intarray r_vertices1, Intarray r_vertices2, Intarray r_inputs, Intarray r_outputs, Floatarray r_costs, int id) { Intarray t_v1 = new Intarray(); // vertices Intarray t_v2 = new Intarray(); // vertices Intarray t_i = new Intarray(); // inputs Intarray t_o = new Intarray(); // outputs Floatarray t_c = new Floatarray(); // costs int current = id; while (current != -1) { t_v1.Push(v1[current]); t_v2.Push(v2[current]); t_i.Push(inputs[current]); t_o.Push(outputs[current]); t_c.Push(costs[current]); current = parents[current]; } NarrayUtil.Reverse(r_vertices1, t_v1); NarrayUtil.Reverse(r_vertices2, t_v2); NarrayUtil.Reverse(r_inputs, t_i); NarrayUtil.Reverse(r_outputs, t_o); NarrayUtil.Reverse(r_costs, t_c); }
/// <summary> /// Original name: inc_class /// </summary> public void IncClass(int c) { while (counts.Length() <= c) { counts.Push(0); } counts[c]++; }
/// <summary> /// Copy one FST to another, preserving only lowest-cost arcs. /// This is useful for visualization. /// </summary> /// <param name="dst">The destination. Will be cleared before copying.</param> /// <param name="src">The FST to copy.</param> public static void fst_copy_best_arcs_only(IGenericFst dst, IGenericFst src) { dst.Clear(); int n = src.nStates(); for (int i = 0; i < n; i++) { dst.NewState(); } dst.SetStart(src.GetStart()); for (int i = 0; i < n; i++) { dst.SetAccept(i, src.GetAcceptCost(i)); Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray(); Floatarray costs = new Floatarray(); src.Arcs(inputs, targets, outputs, costs, i); int inlen = inputs.Length(); if (inlen != targets.Length()) { throw new Exception("ASSERT: inputs.length() == targets.length()"); } if (inlen != outputs.Length()) { throw new Exception("ASSERT: inputs.length() == outputs.length()"); } if (inlen != costs.Length()) { throw new Exception("ASSERT: inputs.length() == costs.length()"); } Dictionary <int, int> hash = new Dictionary <int, int>(); for (int j = 0; j < n; j++) { int t = targets[j]; int best_so_far = -1; if (hash.ContainsKey(t)) { best_so_far = hash[t]; } if (best_so_far == -1 || costs[j] < costs[best_so_far]) { hash[t] = j; } } Intarray keys = new Intarray(); //hash.keys(keys); keys.Clear(); foreach (int key in hash.Keys) { keys.Push(key); } for (int k = 0; k < keys.Length(); k++) { int j = hash[keys[k]]; dst.AddTransition(i, targets[j], outputs[j], costs[j], inputs[j]); } } }
public override void TrainDense(IDataset ds) { //PSet("%nsamples", ds.nSamples()); float split = PGetf("cv_split"); int mlp_cv_max = PGeti("cv_max"); if (crossvalidate) { // perform a split for cross-validation, making sure // that we don't have the same sample in both the // test and the training set (even if the data set // is the result of resampling) Intarray test_ids = new Intarray(); Intarray ids = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { ids.Push(ds.Id(i)); } NarrayUtil.Uniq(ids); Global.Debugf("cvdetail", "reduced {0} ids to {1} ids", ds.nSamples(), ids.Length()); NarrayUtil.Shuffle(ids); int nids = (int)((1.0 - split) * ids.Length()); nids = Math.Min(nids, mlp_cv_max); for (int i = 0; i < nids; i++) { test_ids.Push(ids[i]); } NarrayUtil.Quicksort(test_ids); Intarray training = new Intarray(); Intarray testing = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { int id = ds.Id(i); if (ClassifierUtil.Bincontains(test_ids, id)) { testing.Push(i); } else { training.Push(i); } } Global.Debugf("cvdetail", "#training {0} #testing {1}", training.Length(), testing.Length()); PSet("%ntraining", training.Length()); PSet("%ntesting", testing.Length()); Datasubset trs = new Datasubset(ds, training); Datasubset tss = new Datasubset(ds, testing); TrainBatch(trs, tss); } else { TrainBatch(ds, ds); } }
public bool reconstruct_vertices(Intarray result_vertices) { Intarray vertices = new Intarray(); if (accepted_from == -1) { return(false); } vertices.Push(accepted_from); int last = accepted_from; int next; while ((next = came_from[last]) != last) { vertices.Push(next); last = next; } NarrayUtil.Reverse(result_vertices, vertices); return(true); }
/// <summary> /// The main loop iteration. /// </summary> public void Radiate() { Clear(); //logger("beam", beam); //logger("beamcost", beamcost); int control_beam_start = beam.Length(); for (int i = 0; i < control_beam_start; i++) { TryAccept(i); } // in this loop, traversal may add "control nodes" to the beam for (int i = 0; i < beam.Length(); i++) { Traverse(stree.v1[beam[i]], stree.v2[beam[i]], beamcost[i], i); } // try accepts from control beam nodes // (they're not going to the next beam) for (int i = control_beam_start; i < beam.Length(); i++) { TryAccept(i); } Intarray new_beam = new Intarray(); Floatarray new_beamcost = new Floatarray(); for (int i = 0; i < nbest.Length(); i++) { int k = nbest.Tag(i); if (parent_trails[k] < 0) // skip the control beam nodes { continue; } new_beam.Push(stree.Add(beam[parent_trails[k]], all_targets1[k], all_targets2[k], all_inputs[k], all_outputs[k], all_costs[k])); new_beamcost.Push(beamcost[parent_trails[k]] + all_costs[k]); //logger.format("to new beam: trail index %d, stree %d, target %d,%d", //k, new_beam[new_beam.length() - 1], all_targets1[k], all_targets2[k]); } //move(beam, new_beam); beam.Move(new_beam); //move(beamcost, new_beamcost); beamcost.Move(new_beamcost); }
public void Copy(Floatarray v, float eps = 1e-11f) { Clear(); int n = v.Length(); for (int i = 0; i < n; i++) { float value = v.At1d(i); if (Math.Abs(value) >= eps) { _keys.Push(i); _values.Push(value); } } _len = v.Length(); _keys.Resize(_len); for (int i = 0; i < _len; i++) { _keys.Put1d(i, i); } _values.Copy(v); }
protected override void GetLinesOfPage(Intarray lines, int ipage) { lines.Clear(); string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage); //DirPattern dpattern = new DirPattern(dirName, @"([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])\.png"); DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9][0-9][0-9])\.png"); if (dpattern.Length > 0) lines.ReserveTo(dpattern.Length); for (int i = 0; i < dpattern.Length; i++) { int k = int.Parse(dpattern[i]); lines.Push(k); } }
public int Add(int parent, int vertex1, int vertex2, int input, int output, float cost) { int n = parents.Length(); //logger.format("stree: [%d]: parent %d, v1 %d, v2 %d, cost %f", // n, parent, vertex1, vertex2, cost); parents.Push(parent); v1.Push(vertex1); v2.Push(vertex2); inputs.Push(input); outputs.Push(output); costs.Push(cost); return(n); }
public override void Add(Floatarray v, int c) { CHECK_ARG(NarrayUtil.Min(v) > -1.2f && NarrayUtil.Max(v) < 1.2f, "float8: value out of range (-1.2..1.2)"); CHECK_ARG(c >= -1, "c>=-1"); if (c >= nc) { nc = c + 1; } if (nf < 0) { nf = v.Length(); } RowPush(data, v); classes.Push(c); }
public static void local_minima(ref Intarray result, Floatarray data, int r, float threshold) { int n = data.Length(); result.Clear(); Floatarray lmin = new Floatarray(); local_min(ref lmin, data, r); for (int i = 1; i < n - 1; i++) { if (data[i] <= threshold && data[i] <= lmin[i] && data[i] <= data[i - 1] && data[i] < data[i + 1]) { result.Push(i); } } }
/// <summary> /// simple interface for line recognizers /// </summary> public virtual void SetString(string text, Floatarray costs, Intarray ids) { int n = text.Length; Intarray states = new Intarray(); states.Clear(); for (int i = 0; i < n + 1; i++) { states.Push(NewState()); } for (int i = 0; i < n; i++) { AddTransition(states[i], states[i + 1], text[i], costs[i], ids[i]); } SetStart(states[0]); SetAccept(states[n]); }
protected override void GetLinesOfPage(Intarray lines, int ipage) { lines.Clear(); string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage); //DirPattern dpattern = new DirPattern(dirName, @"([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])\.png"); DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9][0-9][0-9])\.png"); if (dpattern.Length > 0) { lines.ReserveTo(dpattern.Length); } for (int i = 0; i < dpattern.Length; i++) { int k = int.Parse(dpattern[i]); lines.Push(k); } }
public static void weighted_sample(Intarray samples, Floatarray weights, int n) { Floatarray cs = new Floatarray(); cs.Copy(weights); for (int i = 1; i < cs.Length(); i++) { cs[i] += cs[i - 1]; } cs /= NarrayUtil.Max(cs); samples.Clear(); for (int i = 0; i < n; i++) { float value = (float)DRandomizer.Default.drand(); int where = Binsearch(cs, value); samples.Push(where); } }
/// <summary> /// Push the node in the heap if it's not already there, otherwise promote. /// </summary> /// <returns> /// True if the heap was changed, false if the item was already /// in the heap and with a better cost. /// </returns> public bool Push(int node, float cost) { int i = heapback[node]; if (i != -1) { if (cost < costs[i]) { costs[i] = cost; heapify_up(i); return(true); } return(false); } else { heap.Push(node); costs.Push(cost); heapback[node] = heap.Length() - 1; heapify_up(heap.Length() - 1); return(true); } }
/// <summary> /// Compute the groups for a segmentation (internal method). /// </summary> private void computeGroups() { rboxes.Clear(); ImgLabels.bounding_boxes(ref rboxes, labels); int n = rboxes.Length(); // NB: we start with i=1 because i=0 is the background for (int i = 1; i < n; i++) { for (int range = 1; range <= maxrange; range++) { if (i + range > n) { continue; } Rect box = rboxes.At1d(i); Intarray seg = new Intarray(); bool bad = false; for (int j = i; j < i + range; j++) { if (j > i && rboxes.At1d(j).x0 - rboxes.At1d(j - 1).x1 > maxdist) { bad = true; break; } box.Include(rboxes.At1d(j)); seg.Push(j); } if (bad) { continue; } boxes.Push(box); segments.Push(seg); } } }
protected override void Train(IDataset ds) { if (!(ds.nSamples() > 0)) throw new Exception("nSamples of IDataset must be > 0"); if (!(ds.nFeatures() > 0)) throw new Exception("nFeatures of IDataset must be > 0"); if (c2i.Length() < 1) { Intarray raw_classes = new Intarray(); raw_classes.ReserveTo(ds.nSamples()); for (int i = 0; i < ds.nSamples(); i++) raw_classes.Push(ds.Cls(i)); ClassMap(c2i, i2c, raw_classes); /*Intarray classes = new Intarray(); ctranslate(classes, raw_classes, c2i);*/ //debugf("info","[mapped %d to %d classes]\n",c2i.length(),i2c.length()); } TranslatedDataset mds = new TranslatedDataset(ds, c2i); TrainDense(mds); }
protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; int nsamples = ds.nSamples(); if (PExists("%nsamples")) { nsamples += PGeti("%nsamples"); } Global.Debugf("info", "Training content classifier"); if (CharClass.IsEmpty) { Initialize(CreateClassesFromDataset(ds)); } if (use_junk /*&& !JunkClass.IsEmpty*/) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { if (ds.Cls(i) != jc()) { nonjunk.Push(i); } } Datasubset nonjunkds = new Datasubset(ds, nonjunk); CharClass.TrainDense(nonjunkds, PGeti("epochs")); } else { CharClass.TrainDense(ds, PGeti("epochs")); } if (use_junk /*&& !JunkClass.IsEmpty*/) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(JunkClass.Classes[Convert.ToInt32(j)]); if (j) { njunk++; } } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); JunkClass.TrainDense(junkds, PGeti("epochs")); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); JunkClass.DeleteLenet(); } } PSet("%nsamples", nsamples); }
protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; int nsamples = ds.nSamples(); if (PExists("%nsamples")) nsamples += PGeti("%nsamples"); Global.Debugf("info", "Training content classifier"); if (CharClass.IsEmpty) { Initialize(CreateClassesFromDataset(ds)); } if (use_junk/*&& !JunkClass.IsEmpty*/) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) if (ds.Cls(i) != jc()) nonjunk.Push(i); Datasubset nonjunkds = new Datasubset(ds, nonjunk); CharClass.TrainDense(nonjunkds, PGeti("epochs")); } else { CharClass.TrainDense(ds, PGeti("epochs")); } if (use_junk /*&& !JunkClass.IsEmpty*/) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(JunkClass.Classes[Convert.ToInt32(j)]); if (j) njunk++; } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); JunkClass.TrainDense(junkds, PGeti("epochs")); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); JunkClass.DeleteLenet(); } } PSet("%nsamples", nsamples); }
public bool reconstruct_vertices(Intarray result_vertices) { Intarray vertices = new Intarray(); if (accepted_from == -1) return false; vertices.Push(accepted_from); int last = accepted_from; int next; while ((next = came_from[last]) != last) { vertices.Push(next); last = next; } NarrayUtil.Reverse(result_vertices, vertices); return true; }
public override void Arcs(Intarray ids, Intarray targets, Intarray outputs, Floatarray costs, int node) { int n1 = node / l2.nStates(); int n2 = node % l2.nStates(); Intarray ids1 = new Intarray(); Intarray ids2 = new Intarray(); Intarray t1 = new Intarray(); Intarray t2 = new Intarray(); Intarray o1 = new Intarray(); Intarray o2 = new Intarray(); Floatarray c1 = new Floatarray(); Floatarray c2 = new Floatarray(); l1.Arcs(ids1, t1, o1, c1, n1); l2.Arcs(ids2, t2, o2, c2, n2); // sort & permute Intarray p1 = new Intarray(); Intarray p2 = new Intarray(); NarrayUtil.Quicksort(p1, o1); NarrayUtil.Permute(ids1, p1); NarrayUtil.Permute(t1, p1); NarrayUtil.Permute(o1, p1); NarrayUtil.Permute(c1, p1); NarrayUtil.Quicksort(p2, ids2); NarrayUtil.Permute(ids2, p2); NarrayUtil.Permute(t2, p2); NarrayUtil.Permute(o2, p2); NarrayUtil.Permute(c2, p2); int k1, k2; // l1 epsilon moves for (k1 = 0; k1 < o1.Length() && o1.At1d(k1) == 0; k1++) { ids.Push(ids1.At1d(k1)); targets.Push(Combine(t1.At1d(k1), n2)); outputs.Push(0); costs.Push(c1.At1d(k1)); } // l2 epsilon moves for (k2 = 0; k2 < o2.Length() && ids2.At1d(k2) == 0; k2++) { ids.Push(0); targets.Push(Combine(n1, t2.At1d(k2))); outputs.Push(o2.At1d(k2)); costs.Push(c2.At1d(k2)); } // non-epsilon moves while (k1 < o1.Length() && k2 < ids2.Length()) { while (k1 < o1.Length() && o1.At1d(k1) < ids2.At1d(k2)) { k1++; } if (k1 >= o1.Length()) { break; } while (k2 < ids2.Length() && o1.At1d(k1) > ids2.At1d(k2)) { k2++; } while (k1 < o1.Length() && k2 < ids2.Length() && o1.At1d(k1) == ids2.At1d(k2)) { for (int j = k2; j < ids2.Length() && o1.At1d(k1) == ids2.At1d(j); j++) { ids.Push(ids1.At1d(k1)); targets.Push(Combine(t1.At1d(k1), t2.At1d(j))); outputs.Push(o2.At1d(j)); costs.Push(c1.At1d(k1) + c2.At1d(j)); } k1++; } } }
/// <summary> /// The main loop iteration. /// </summary> public void Radiate() { Clear(); //logger("beam", beam); //logger("beamcost", beamcost); int control_beam_start = beam.Length(); for (int i = 0; i < control_beam_start; i++) TryAccept(i); // in this loop, traversal may add "control nodes" to the beam for (int i = 0; i < beam.Length(); i++) { Traverse(stree.v1[beam[i]], stree.v2[beam[i]], beamcost[i], i); } // try accepts from control beam nodes // (they're not going to the next beam) for (int i = control_beam_start; i < beam.Length(); i++) TryAccept(i); Intarray new_beam = new Intarray(); Floatarray new_beamcost = new Floatarray(); for (int i = 0; i < nbest.Length(); i++) { int k = nbest.Tag(i); if (parent_trails[k] < 0) // skip the control beam nodes continue; new_beam.Push(stree.Add(beam[parent_trails[k]], all_targets1[k], all_targets2[k], all_inputs[k], all_outputs[k], all_costs[k])); new_beamcost.Push(beamcost[parent_trails[k]] + all_costs[k]); //logger.format("to new beam: trail index %d, stree %d, target %d,%d", //k, new_beam[new_beam.length() - 1], all_targets1[k], all_targets2[k]); } //move(beam, new_beam); beam.Move(new_beam); //move(beamcost, new_beamcost); beamcost.Move(new_beamcost); }
protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; if (charclass.IsEmpty) { charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass"))); TryAttachCharClassifierEvent(charclass.Object); } if (junkclass.IsEmpty) { junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass"))); TryAttachJunkClassifierEvent(junkclass.Object); } if (ulclass.IsEmpty) { ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass"))); } Global.Debugf("info", "Training content classifier"); if (use_junk && !junkclass.IsEmpty) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { if (ds.Cls(i) != jc()) { nonjunk.Push(i); } } Datasubset nonjunkds = new Datasubset(ds, nonjunk); charclass.Object.XTrain(nonjunkds); } else { charclass.Object.XTrain(ds); } if (use_junk && !junkclass.IsEmpty) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(Convert.ToInt32(j)); if (j) { njunk++; } } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); junkclass.Object.XTrain(junkds); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); junkclass.SetComponent(null); } if (PGeti("ul") > 0 && !ulclass.IsEmpty) { throw new Exception("ulclass not implemented"); } } }
public override void Arcs(Intarray ids, Intarray targets, Intarray outputs, Floatarray costs, int node) { int n1 = node / l2.nStates(); int n2 = node % l2.nStates(); Intarray ids1 = new Intarray(); Intarray ids2 = new Intarray(); Intarray t1 = new Intarray(); Intarray t2 = new Intarray(); Intarray o1 = new Intarray(); Intarray o2 = new Intarray(); Floatarray c1 = new Floatarray(); Floatarray c2 = new Floatarray(); l1.Arcs(ids1, t1, o1, c1, n1); l2.Arcs(ids2, t2, o2, c2, n2); // sort & permute Intarray p1 = new Intarray(); Intarray p2 = new Intarray(); NarrayUtil.Quicksort(p1, o1); NarrayUtil.Permute(ids1, p1); NarrayUtil.Permute(t1, p1); NarrayUtil.Permute(o1, p1); NarrayUtil.Permute(c1, p1); NarrayUtil.Quicksort(p2, ids2); NarrayUtil.Permute(ids2, p2); NarrayUtil.Permute(t2, p2); NarrayUtil.Permute(o2, p2); NarrayUtil.Permute(c2, p2); int k1, k2; // l1 epsilon moves for (k1 = 0; k1 < o1.Length() && o1.At1d(k1) == 0; k1++) { ids.Push(ids1.At1d(k1)); targets.Push(Combine(t1.At1d(k1), n2)); outputs.Push(0); costs.Push(c1.At1d(k1)); } // l2 epsilon moves for (k2 = 0; k2 < o2.Length() && ids2.At1d(k2) == 0; k2++) { ids.Push(0); targets.Push(Combine(n1, t2.At1d(k2))); outputs.Push(o2.At1d(k2)); costs.Push(c2.At1d(k2)); } // non-epsilon moves while (k1 < o1.Length() && k2 < ids2.Length()) { while (k1 < o1.Length() && o1.At1d(k1) < ids2.At1d(k2)) k1++; if (k1 >= o1.Length()) break; while (k2 < ids2.Length() && o1.At1d(k1) > ids2.At1d(k2)) k2++; while (k1 < o1.Length() && k2 < ids2.Length() && o1.At1d(k1) == ids2.At1d(k2)) { for (int j = k2; j < ids2.Length() && o1.At1d(k1) == ids2.At1d(j); j++) { ids.Push(ids1.At1d(k1)); targets.Push(Combine(t1.At1d(k1), t2.At1d(j))); outputs.Push(o2.At1d(j)); costs.Push(c1.At1d(k1) + c2.At1d(j)); } k1++; } } }
protected virtual void GetLinesOfPage(Intarray lines, int ipage) { lines.Clear(); string dirName = String.Format("{0}{1}{2:0000}", prefix, Path.DirectorySeparatorChar, ipage); DirPattern dpattern = new DirPattern(dirName, @"([0-9][0-9][0-9][0-9])\.png"); if (dpattern.Length > 0) lines.ReserveTo(dpattern.Length); List<int> llist = new List<int>(dpattern.Length); for (int i = 0; i < dpattern.Length; i++) { int k = int.Parse(dpattern[i]); llist.Add(k); //lines.Push(k); } IEnumerable<int> query = llist.OrderBy(i => i); foreach (int iline in query) lines.Push(iline); }
/// <summary> /// simple interface for line recognizers /// </summary> public virtual void SetString(string text, Floatarray costs, Intarray ids) { int n = text.Length; Intarray states = new Intarray(); states.Clear(); for(int i=0; i<n+1; i++) states.Push(NewState()); for(int i=0; i<n; i++) AddTransition(states[i], states[i+1], text[i], costs[i], ids[i]); SetStart(states[0]); SetAccept(states[n]); }
/// <summary> /// Output the segmentation into a segmentation graph. /// Construct a state for each of the segments, then /// add transitions between states (segments) /// from min(segments[i]) to max(segments[i])+1. /// </summary> public override void GetLattice(IGenericFst fst) { fst.Clear(); int final = NarrayUtil.Max(labels) + 1; Intarray states = new Intarray(final + 1); states.Fill(-1); for (int i = 1; i < states.Length(); i++) { states[i] = fst.NewState(); } fst.SetStart(states[1]); fst.SetAccept(states[final]); for (int i = 0; i < boxes.Length(); i++) { int start = NarrayUtil.Min(segments.At1d(i)); int end = NarrayUtil.Max(segments.At1d(i)); int id = (start << 16) + end; if (segments.At1d(i).Length() == 0) { id = 0; } float yes = spaces[i, 0]; float no = spaces[i, 1]; // if no space is set, assume no space is present if (yes == float.PositiveInfinity && no == float.PositiveInfinity) { no = 0.0f; } for (int j = 0; j < class_costs[i].Length(); j++) { float cost = class_costs[i][j]; string str = class_outputs[i][j]; int n = str.Length; int last = start; for (int k = 0; k < n; k++) { int c = (int)str[k]; if (k < n - 1) { // add intermediate states/transitions for all but the last character states.Push(fst.NewState()); fst.AddTransition(states[last], states.Last(), c, 0.0f, 0); last = states.Length() - 1; } else { // for the last character, handle the spaces as well if (no < 1000.0f) { // add the last character as a direct transition with no space fst.AddTransition(states[last], states[end + 1], c, cost + no, id); } if (yes < 1000.0f) { // insert another state to handle spaces states.Push(fst.NewState()); int space_state = states.Last(); fst.AddTransition(states[start], space_state, c, cost, id); fst.AddTransition(space_state, states[end + 1], (int)' ', yes, 0); } } } // for k } // for j } // for i }
/// <summary> /// Randomly sample an FST, assuming any input. /// </summary> /// <param name="result">The array of output symbols, excluding epsilons.</param> /// <param name="fst">The FST.</param> /// <param name="max">The maximum length of the result.</param> /// <returns>total cost</returns> public static double fst_sample(Intarray result, IGenericFst fst, int max=1000) { double total_cost = 0; int current = fst.GetStart(); for (int counter = 0; counter < max; counter++) { Intarray inputs = new Intarray(); Intarray outputs = new Intarray(); Intarray targets = new Intarray(); Floatarray costs = new Floatarray(); fst.Arcs(inputs, targets, outputs, costs, current); // now we need to deal with the costs uniformly, so: costs.Push(fst.GetAcceptCost(current)); int choice = sample_by_costs(costs); if (choice == costs.Length() - 1) break; result.Push(outputs[choice]); total_cost += costs[choice]; current = targets[choice]; } return total_cost + fst.GetAcceptCost(current); }
public override void TrainDense(IDataset ds) { //PSet("%nsamples", ds.nSamples()); float split = PGetf("cv_split"); int mlp_cv_max = PGeti("cv_max"); if (crossvalidate) { // perform a split for cross-validation, making sure // that we don't have the same sample in both the // test and the training set (even if the data set // is the result of resampling) Intarray test_ids = new Intarray(); Intarray ids = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) ids.Push(ds.Id(i)); NarrayUtil.Uniq(ids); Global.Debugf("cvdetail", "reduced {0} ids to {1} ids", ds.nSamples(), ids.Length()); NarrayUtil.Shuffle(ids); int nids = (int)((1.0 - split) * ids.Length()); nids = Math.Min(nids, mlp_cv_max); for (int i = 0; i < nids; i++) test_ids.Push(ids[i]); NarrayUtil.Quicksort(test_ids); Intarray training = new Intarray(); Intarray testing = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) { int id = ds.Id(i); if (ClassifierUtil.Bincontains(test_ids, id)) testing.Push(i); else training.Push(i); } Global.Debugf("cvdetail", "#training {0} #testing {1}", training.Length(), testing.Length()); PSet("%ntraining", training.Length()); PSet("%ntesting", testing.Length()); Datasubset trs = new Datasubset(ds, training); Datasubset tss = new Datasubset(ds, testing); TrainBatch(trs, tss); } else { TrainBatch(ds, ds); } }
protected override void Train(IDataset ds) { bool use_junk = PGetb("junk") && !DisableJunk; if (charclass.IsEmpty) { charclass.SetComponent(ComponentCreator.MakeComponent(PGet("charclass"))); TryAttachCharClassifierEvent(charclass.Object); } if (junkclass.IsEmpty) { junkclass.SetComponent(ComponentCreator.MakeComponent(PGet("junkclass"))); TryAttachJunkClassifierEvent(junkclass.Object); } if (ulclass.IsEmpty) ulclass.SetComponent(ComponentCreator.MakeComponent(PGet("ulclass"))); Global.Debugf("info", "Training content classifier"); if (use_junk && !junkclass.IsEmpty) { Intarray nonjunk = new Intarray(); for (int i = 0; i < ds.nSamples(); i++) if (ds.Cls(i) != jc()) nonjunk.Push(i); Datasubset nonjunkds = new Datasubset(ds, nonjunk); charclass.Object.XTrain(nonjunkds); } else { charclass.Object.XTrain(ds); } if (use_junk && !junkclass.IsEmpty) { Global.Debugf("info", "Training junk classifier"); Intarray isjunk = new Intarray(); int njunk = 0; for (int i = 0; i < ds.nSamples(); i++) { bool j = (ds.Cls(i) == jc()); isjunk.Push(Convert.ToInt32(j)); if (j) njunk++; } if (njunk > 0) { MappedDataset junkds = new MappedDataset(ds, isjunk); junkclass.Object.XTrain(junkds); } else { Global.Debugf("warn", "you are training a junk class but there are no samples to train on"); junkclass.SetComponent(null); } if (PGeti("ul") > 0 && !ulclass.IsEmpty) { throw new Exception("ulclass not implemented"); } } }
/// <summary> /// Copy one FST to another, preserving only lowest-cost arcs. /// This is useful for visualization. /// </summary> /// <param name="dst">The destination. Will be cleared before copying.</param> /// <param name="src">The FST to copy.</param> public static void fst_copy_best_arcs_only(IGenericFst dst, IGenericFst src) { dst.Clear(); int n = src.nStates(); for (int i = 0; i < n; i++) dst.NewState(); dst.SetStart(src.GetStart()); for(int i = 0; i < n; i++) { dst.SetAccept(i, src.GetAcceptCost(i)); Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray(); Floatarray costs = new Floatarray(); src.Arcs(inputs, targets, outputs, costs, i); int inlen = inputs.Length(); if (inlen != targets.Length()) throw new Exception("ASSERT: inputs.length() == targets.length()"); if (inlen != outputs.Length()) throw new Exception("ASSERT: inputs.length() == outputs.length()"); if (inlen != costs.Length()) throw new Exception("ASSERT: inputs.length() == costs.length()"); Dictionary< int, int > hash = new Dictionary<int,int>(); for(int j = 0; j < n; j++) { int t = targets[j]; int best_so_far = -1; if (hash.ContainsKey(t)) best_so_far = hash[t]; if(best_so_far == -1 || costs[j] < costs[best_so_far]) hash[t] = j; } Intarray keys = new Intarray(); //hash.keys(keys); keys.Clear(); foreach (int key in hash.Keys) { keys.Push(key); } for(int k = 0; k < keys.Length(); k++) { int j = hash[keys[k]]; dst.AddTransition(i, targets[j], outputs[j], costs[j], inputs[j]); } } }