protected static void read_header_and_symbols(IGenericFst fst, BinaryReader reader) { if (read_int32_LE(reader) != OPENFST_MAGIC) throw new Exception("invalid magic number"); read_magic_string(reader, "vector"); read_magic_string(reader, "standard"); int version = read_int32_LE(reader); if (version < MIN_VERSION) throw new Exception("file has too old version"); int flags = read_int32_LE(reader); read_int64_LE(reader); // properties Int64 start = read_int64_LE(reader); Int64 nstates = read_int64_LE(reader); if (nstates < 0) return; // to prevent creating 2^31 nodes in case of sudden EOF fst.Clear(); for (int i = 0; i < nstates; i++) fst.NewState(); fst.SetStart((int)start); read_int64_LE(reader); // narcs if ((flags & FLAG_HAS_ISYMBOLS) > 0) skip_symbol_table(reader); if ((flags & FLAG_HAS_OSYMBOLS) > 0) skip_symbol_table(reader); }
public static void a_star_backwards(Floatarray costs_for_all_nodes, IGenericFst fst) { IGenericFst reverse = FstFactory.MakeOcroFST(); FstUtil.fst_copy_reverse(reverse, fst, true); // creates an extra vertex AStarSearch a = new AStarSearch(reverse); a.Loop(); costs_for_all_nodes.Copy(a.g); costs_for_all_nodes.Pop(); // remove the extra vertex }
int n; // the number of nodes; also the virtual accept index #endregion Fields #region Constructors public AStarSearch(IGenericFst fst) { this.fst = fst; this.accepted_from = -1; this.heap = new Heap(fst.nStates() + 1); this.n = fst.nStates(); this.came_from = new Intarray(n); this.came_from.Fill(-1); this.g = new Floatarray(n); // insert the start node int s = fst.GetStart(); g[s] = 0; came_from[s] = s; heap.Push(s, Convert.ToSingle(Heuristic(s))); }
public CompositionFstImpl(IGenericFst l1, IGenericFst l2, int o_s, int o_f) { override_start = o_s; override_finish = o_f; if (l1.nStates() == 0) throw new Exception("CHECK_ARG: l1->nStates() > 0"); if (l2.nStates() == 0) throw new Exception("CHECK_ARG: l2->nStates() > 0"); // this should be here, not in the initializers. // (otherwise if CHECKs throw an exception, bad things happen) this.l1 = l1; this.l2 = l2; }
/// <summary> /// Copy one FST to another, preserving only lowest-cost arcs. /// This is useful for visualization. /// </summary> /// <param name="dst">The destination. Will be cleared before copying.</param> /// <param name="src">The FST to copy.</param> public static void fst_copy_best_arcs_only(IGenericFst dst, IGenericFst src) { dst.Clear(); int n = src.nStates(); for (int i = 0; i < n; i++) dst.NewState(); dst.SetStart(src.GetStart()); for(int i = 0; i < n; i++) { dst.SetAccept(i, src.GetAcceptCost(i)); Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray(); Floatarray costs = new Floatarray(); src.Arcs(inputs, targets, outputs, costs, i); int inlen = inputs.Length(); if (inlen != targets.Length()) throw new Exception("ASSERT: inputs.length() == targets.length()"); if (inlen != outputs.Length()) throw new Exception("ASSERT: inputs.length() == outputs.length()"); if (inlen != costs.Length()) throw new Exception("ASSERT: inputs.length() == costs.length()"); Dictionary< int, int > hash = new Dictionary<int,int>(); for(int j = 0; j < n; j++) { int t = targets[j]; int best_so_far = -1; if (hash.ContainsKey(t)) best_so_far = hash[t]; if(best_so_far == -1 || costs[j] < costs[best_so_far]) hash[t] = j; } Intarray keys = new Intarray(); //hash.keys(keys); keys.Clear(); foreach (int key in hash.Keys) { keys.Push(key); } for(int k = 0; k < keys.Length(); k++) { int j = hash[keys[k]]; dst.AddTransition(i, targets[j], outputs[j], costs[j], inputs[j]); } } }
/// <summary> /// Copy one FST to another. /// </summary> /// <param name="dst">The destination. Will be cleared before copying.</param> /// <param name="src">The FST to copy.</param> public static void fst_copy(IGenericFst dst, IGenericFst src) { dst.Clear(); int n = src.nStates(); for (int i = 0; i < n; i++) dst.NewState(); dst.SetStart(src.GetStart()); for (int i = 0; i < n; i++) { dst.SetAccept(i, src.GetAcceptCost(i)); Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray(); Floatarray costs = new Floatarray(); src.Arcs(inputs, targets, outputs, costs, i); int inlen = inputs.Length(); if (inlen != targets.Length()) throw new Exception("ASSERT: inputs.length() == targets.length()"); if (inlen != outputs.Length()) throw new Exception("ASSERT: inputs.length() == outputs.length()"); if (inlen != costs.Length()) throw new Exception("ASSERT: inputs.length() == costs.length()"); for (int j = 0; j < inputs.Length(); j++) dst.AddTransition(i, targets.At1d(j), outputs.At1d(j), costs.At1d(j), inputs.At1d(j)); } }
/// <summary> /// recognize a line with or without a given segmentation /// if useit is set to true, the given segmentation is just displayed in loggers, but not used, /// the segmenter computes the segmentation and the recognition uses its output /// if useit is set to false, the segmenter is still launched for the loggers, but the given /// segmentation is really used for the recognition /// </summary> public virtual double RecognizeLineSeg(IGenericFst result, Intarray segmentation, Bytearray image) { return this.RecognizeLine(segmentation, result, image); }
/// <summary> /// Recognize a text line and return a lattice representing /// the recognition alternatives. /// </summary> public abstract double RecognizeLine(IGenericFst result, Bytearray image);
public override IGenericFst Move2() { IGenericFst result = l2; l2 = null; return result; }
protected static void read_node(BinaryReader reader, IGenericFst fst, int index) { fst.SetAccept(index, read_float(reader)); Int64 narcs = read_int64_LE(reader); for (int i = 0; i < narcs; i++) { int input = read_int32_LE(reader); int output = read_int32_LE(reader); float cost = read_float(reader); int target = read_int32_LE(reader); fst.AddTransition(index, target, output, cost, input); } }
public static void fst_write(BinaryWriter writer, IGenericFst fst) { write_header_and_symbols(writer, fst); for (int i = 0; i < fst.nStates(); i++) write_node(writer, fst, i); }
protected static void write_node(BinaryWriter writer, IGenericFst fst, int index) { Intarray inputs = new Intarray(); Intarray targets = new Intarray(); Intarray outputs = new Intarray(); Floatarray costs = new Floatarray(); fst.Arcs(inputs, targets, outputs, costs, index); int narcs = targets.Length(); write_float(writer, fst.GetAcceptCost(index)); write_int64_LE(writer, narcs); for (int i = 0; i < narcs; i++) { write_int32_LE(writer, inputs[i]); write_int32_LE(writer, outputs[i]); write_float(writer, costs[i]); write_int32_LE(writer, targets[i]); } }
/// <summary> /// Reverse the FST's arcs, adding a new start vertex (former accept). /// </summary> public static void fst_copy_reverse(IGenericFst dst, IGenericFst src, bool no_accept = false) { dst.Clear(); int n = src.nStates(); for (int i = 0; i <= n; i++) dst.NewState(); if (!no_accept) dst.SetAccept(src.GetStart()); dst.SetStart(n); for (int i = 0; i < n; i++) { dst.AddTransition(n, i, 0, src.GetAcceptCost(i), 0); Intarray targets = new Intarray(), outputs = new Intarray(), inputs = new Intarray(); Floatarray costs = new Floatarray(); src.Arcs(inputs, targets, outputs, costs, i); if (inputs.Length() != targets.Length()) throw new Exception("ASSERT: inputs.length() == targets.length()"); if (inputs.Length() != outputs.Length()) throw new Exception("ASSERT: inputs.length() == outputs.length()"); if (inputs.Length() != costs.Length()) throw new Exception("ASSERT: inputs.length() == costs.length()"); for (int j = 0; j < inputs.Length(); j++) dst.AddTransition(targets.At1d(j), i, outputs.At1d(j), costs.At1d(j), inputs.At1d(j)); } }
/// <summary> /// Align a lattice with a transcription. /// </summary> /// <param name="chars">Characters along the best path. /// Currently, every character in chars must have a corresponding /// region in seg and the characters must be in reading order. /// Eventually, chars may contain characters (e.g., spaces) that /// do not correspond to any region. Note that chars may not /// correspond to any string allowed/suggested by the transcription.</param> /// <param name="seg">Aligned segmentation, colors correspond to chars (starting at 1)</param> /// <param name="costs">Costs corresponding to chars</param> /// <param name="image">Input grayscale image</param> /// <param name="transcription">The "ground truth" lattice to align</param> public virtual void Align(string chars, Intarray seg, Floatarray costs, Bytearray image, IGenericFst transcription) { throw new NotImplementedException("IRecognizeLine:Align: unimplemented"); }
/// <summary> /// recognize a line with or without a given segmentation /// if useit is set to true, the given segmentation is just displayed in loggers, but not used, /// the segmenter computes the segmentation and the recognition uses its output /// if useit is set to false, the segmenter is still launched for the loggers, but the given /// segmentation is really used for the recognition /// </summary> public virtual double RecognizeLineSeg(IGenericFst result, Intarray segmentation, Bytearray image) { return(this.RecognizeLine(segmentation, result, image)); }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public virtual double RecognizeLine(Intarray segmentation, IGenericFst result, Bytearray image) { throw new NotImplementedException("IRecognizeLine:RecognizeLine: unimplemented"); }
public void GetLattice(IGenericFst fst, int page, int line, string variant = null) { string s = PathFile(page, line, variant, "fst"); fst.Load(s); }
/// <summary> /// Compose two FSTs. /// This function copies the composition of two given FSTs. /// That causes expansion (storing all arcs explicitly). /// </summary> public static void fst_expand_composition(IGenericFst outf, OcroFST f1, OcroFST f2) { CompositionFst composition = FstFactory.MakeCompositionFst(f1, f2); try { fst_copy(outf, composition); } catch (Exception ex) { composition.Move1(); composition.Move2(); throw ex; } composition.Move1(); composition.Move2(); }
protected static void write_header_and_symbols(BinaryWriter writer, IGenericFst fst) { write_int32_LE(writer, OPENFST_MAGIC); write_string(writer, "vector"); write_string(writer, "standard"); write_int32_LE(writer, MIN_VERSION); write_int32_LE(writer, /* flags: */ 0); write_int64_LE(writer, PROPERTIES); write_int64_LE(writer, fst.GetStart()); write_int64_LE(writer, fst.nStates()); write_int64_LE(writer, /* narcs (seems to be unused): */ 0L); }
public static void fst_line(IGenericFst fst, string s) { int n = s.Length; Intarray inputs = new Intarray(n); for(int j = 0; j < n; j++) inputs[j] = (int)s[j]; Floatarray costs = new Floatarray(n); costs.Fill(0f); fst.SetString(s, costs, inputs); }
protected static readonly Int64 PROPERTIES = 3; // expanded, mutable #endregion Fields #region Methods public static void fst_read(IGenericFst fst, BinaryReader reader) { read_header_and_symbols(fst, reader); for (int i = 0; i < fst.nStates(); i++) read_node(reader, fst, i); }
internal static bool a_star2_internal(Intarray inputs, Intarray vertices1, Intarray vertices2, Intarray outputs, Floatarray costs, IGenericFst fst1, IGenericFst fst2, Floatarray g1, Floatarray g2, CompositionFst composition) { Intarray vertices = new Intarray(); AStarCompositionSearch a = new AStarCompositionSearch(g1, g2, composition); if (!a.Loop()) return false; if (!a.reconstruct_vertices(vertices)) return false; a.reconstruct_edges(inputs, outputs, costs, vertices); composition.SplitIndices(vertices1, vertices2, vertices); return true; }
/// <summary> /// Extract the lattice corresponding to the classifications /// stored in the Grouper. /// </summary> public abstract void GetLattice(IGenericFst fst);
public override IGenericFst Move1() { IGenericFst result = l1; l1 = null; return result; }
/// <summary> /// This is a weird, optional method that exposes character segmentation /// for those line recognizers that have it segmentation contains colored pixels, /// and a transition in the transducer of the form * --- 1/eps --> * --- 2/a --> * /// means that pixels with color 1 and 2 together form the letter "a" /// </summary> public override double RecognizeLine(Intarray segmentation_, IGenericFst result, Bytearray image_) { double rate = 0.0; CHECK_ARG(image_.Dim(1) < PGeti("maxheight"), String.Format("input line too high ({0} x {1})", image_.Dim(0), image_.Dim(1))); CHECK_ARG(image_.Dim(1) * 1.0 / image_.Dim(0) < PGetf("maxaspect"), String.Format("input line has bad aspect ratio ({0} x {1})", image_.Dim(0), image_.Dim(1))); bool use_reject = PGetb("use_reject") && !DisableJunk; //Console.WriteLine("IMG: imin:{0} imax:{1}", NarrayUtil.ArgMin(image_), NarrayUtil.ArgMax(image_)); Bytearray image = new Bytearray(); image.Copy(image_); SetLine(image_); if (PGeti("invert") > 0) NarrayUtil.Sub(NarrayUtil.Max(image), image); segmentation_.Copy(segmentation); Bytearray available = new Bytearray(); Floatarray cp = new Floatarray(); Floatarray ccosts = new Floatarray(); Floatarray props = new Floatarray(); OutputVector p = new OutputVector(); int ncomponents = grouper.Object.Length(); int minclass = PGeti("minclass"); float minprob = PGetf("minprob"); float space_yes = PGetf("space_yes"); float space_no = PGetf("space_no"); float maxcost = PGetf("maxcost"); // compute priors if possible; fall back on // using no priors if no counts are available Floatarray priors = new Floatarray(); bool use_priors = PGeti("use_priors") > 0; if (use_priors) { if (counts.Length() > 0) { priors.Copy(counts); priors /= NarrayUtil.Sum(priors); } else { if (!counts_warned) Global.Debugf("warn", "use_priors specified but priors unavailable (old model)"); use_priors = false; counts_warned = true; } } EstimateSpaceSize(); for (int i = 0; i < ncomponents; i++) { Rect b; Bytearray mask = new Bytearray(); grouper.Object.GetMask(out b, ref mask, i, 0); Bytearray cv = new Bytearray(); grouper.Object.ExtractWithMask(cv, mask, image, i, 0); //ImgIo.write_image_gray("extrmask_image.png", cv); Floatarray v = new Floatarray(); v.Copy(cv); v /= 255.0f; float ccost = classifier.Object.XOutputs(p, v); if (use_reject && classifier.Object.HigherOutputIsBetter) { ccost = 0; float total = p.Sum(); if (total > 1e-11f) { //p /= total; } else p.Values.Fill(0.0f); } int count = 0; Global.Debugf("dcost", "output {0}", p.Keys.Length()); for (int index = 0; index < p.Keys.Length(); index++) { int j = p.Keys[index]; if (j < minclass) continue; if (j == reject_class) continue; float value = p.Values[index]; if (value <= 0.0f) continue; if (value < minprob) continue; float pcost = classifier.Object.HigherOutputIsBetter ? (float)-Math.Log(value) : value; Global.Debugf("dcost", "{0} {1} {2}", j, pcost + ccost, (j > 32 ? (char)j : '_')); float total_cost = pcost + ccost; if (total_cost < maxcost) { if (use_priors) { total_cost -= (float)-Math.Log(priors[j]); } grouper.Object.SetClass(i, j, total_cost); count++; } } Global.Debugf("dcost", ""); if (count == 0) { float xheight = 10.0f; if (b.Height() < xheight / 2 && b.Width() < xheight / 2) { grouper.Object.SetClass(i, (int)'~', high_cost / 2); } else { grouper.Object.SetClass(i, (int)'#', (b.Width() / xheight) * high_cost); } } if (grouper.Object.PixelSpace(i) > space_threshold) { Global.Debugf("spaces", "space {0}", grouper.Object.PixelSpace(i)); grouper.Object.SetSpaceCost(i, space_yes, space_no); } } grouper.Object.GetLattice(result); return rate; }
/// <summary> /// Randomly sample an FST, assuming any input. /// </summary> /// <param name="result">The array of output symbols, excluding epsilons.</param> /// <param name="fst">The FST.</param> /// <param name="max">The maximum length of the result.</param> /// <returns>total cost</returns> public static double fst_sample(Intarray result, IGenericFst fst, int max=1000) { double total_cost = 0; int current = fst.GetStart(); for (int counter = 0; counter < max; counter++) { Intarray inputs = new Intarray(); Intarray outputs = new Intarray(); Intarray targets = new Intarray(); Floatarray costs = new Floatarray(); fst.Arcs(inputs, targets, outputs, costs, current); // now we need to deal with the costs uniformly, so: costs.Push(fst.GetAcceptCost(current)); int choice = sample_by_costs(costs); if (choice == costs.Length() - 1) break; result.Push(outputs[choice]); total_cost += costs[choice]; current = targets[choice]; } return total_cost + fst.GetAcceptCost(current); }
/// <summary> /// Randomly sample an FST, assuming any input. /// </summary> public static double fst_sample(out string result, IGenericFst fst, int max) { Intarray tmp = new Intarray(); double cost = fst_sample(tmp, fst, max); remove_epsilons(out result, tmp); return cost; }
/// <summary> /// Make an in-place Kleene closure of the FST. /// </summary> public static void fst_star(IGenericFst fst) { int s = fst.GetStart(); fst.SetAccept(s); for (int i = 0; i < fst.nStates(); i++) { double c = fst.GetAcceptCost(i); if (c < 1e37) fst.AddTransition(i, s, 0, (float)c, 0); } }
/// <summary> /// Recognize a text line and return a lattice representing /// the recognition alternatives. /// </summary> public override double RecognizeLine(IGenericFst result, Bytearray image) { Intarray segmentation_ = new Intarray(); return RecognizeLine(segmentation_, result, image); }
/// <summary> /// Make a Kleene closure. /// </summary> public static void fst_star(IGenericFst result, IGenericFst fst) { fst_copy(result, fst); fst_star(result); }
public void PutLattice(IGenericFst fst, int page, int line, string variant = null) { string s = PathFile(page, line, variant, "fst"); fst.Save(s); }