///** ///// Create a decoder by loading the serialized model from a specified ///// filename ///// ///// @param g2pmodel_file ///// the filename of the serialized model // */ //public G2PConverter(String g2pmodel_file) //{ // g2pmodel = ImmutableFst.loadModel(g2pmodel_file); // init(); //} /** * /// Initialize the decoder */ private void Init() { _skipSeqs.Add(Eps); _skipSeqs.Add(Sb); _skipSeqs.Add(Se); _skipSeqs.Add(Skip); _skipSeqs.Add("-"); // keep an augmented copy (for compose) Compose.Augment(0, _g2Pmodel, _g2Pmodel.Semiring); ArcSort.Apply(_g2Pmodel, new ILabelCompare()); var isyms = _g2Pmodel.Isyms; LoadClusters(isyms); // get epsilon filter for composition _epsilonFilter = Compose.GetFilter(_g2Pmodel.Isyms, _g2Pmodel.Semiring); ArcSort.Apply(_epsilonFilter, new ILabelCompare()); }
/** * /// Finds nbest paths in an Fst returned by NShortestPaths operation * /// * /// @param fst * /// the input fst * /// @param nbest * /// the number of paths to return * /// @param skipSeqs * /// the sequences to ignore * /// @param tie * /// the separator symbol * /// @return the paths */ private List <Path> FindAllPaths(Fsts.Fst fst, int nbest, HashSet <String> skipSeqs, string tie) { var semiring = fst.Semiring; // ArrayList<Path> finalPaths = new ArrayList<Path>(); var finalPaths = new Dictionary <String, Path>(); var paths = new Dictionary <State, Path>(); var queue = new Queue <State>(); var p = new Path(fst.Semiring); p.Cost = semiring.One; paths.Add(fst.Start, p); queue.Enqueue(fst.Start); var osyms = fst.Osyms; while (queue.Count != 0) { var s = queue.Dequeue(); var currentPath = paths[s]; if (s.FinalWeight != semiring.Zero) { var pathString = currentPath.GetPath().ToString(); if (finalPaths.ContainsKey(pathString)) { // path already exist. update its cost var old = finalPaths[pathString]; if (old.Cost > currentPath.Cost) { finalPaths.Add(pathString, currentPath); } } else { finalPaths.Add(pathString, currentPath); } } var numArcs = s.GetNumArcs(); for (var j = 0; j < numArcs; j++) { var a = s.GetArc(j); p = new Path(fst.Semiring); var cur = paths[s]; p.Cost = cur.Cost; p.SetPath(cur.GetPath().ToList()); var sym = osyms[a.Olabel]; var symsArray = sym.Split(new String[] { "\\" + tie }, StringSplitOptions.None); for (var i = 0; i < symsArray.Length; i++) { var phone = symsArray[i]; if (!skipSeqs.Contains(phone)) { p.GetPath().Add(phone); } } p.Cost = semiring.Times(p.Cost, a.Weight); var nextState = a.NextState; paths.Add(nextState, p); if (!queue.Contains(nextState)) { queue.Enqueue(nextState); } } } var res = new List <Path>(); foreach (var path in finalPaths.Values) { res.Add(path); } res.Sort(new PathComparator()); var numPaths = res.Count; for (var i = nbest; i < numPaths; i++) { res.RemoveAt(res.Count - 1); } return(res); }
/** * /// Transforms an input spelling/pronunciation into an equivalent FSA, adding * /// extra arcs as needed to accommodate clusters. * /// * /// @param entry * /// the input vector * /// @return the created fst */ private Fsts.Fst EntryToFsa(List <String> entry) { var ts = new TropicalSemiring(); var efst = new Fsts.Fst(ts); var s = new State(ts.Zero); efst.AddState(s); efst.SetStart(s); // Build the basic FSA for (var i = 0; i < entry.Count + 1; i++) { s = new State(ts.Zero); efst.AddState(s); if (i >= 1) { var symIndex = Utils.GetIndex(_g2Pmodel.Isyms, entry[i - 1]); efst.GetState(i).AddArc(new Arc(symIndex, symIndex, 0.0f, s)); } else if (i == 0) { var symIndex = Utils.GetIndex(_g2Pmodel.Isyms, Sb); efst.Start.AddArc(new Arc(symIndex, symIndex, 0.0f, s)); } if (i == entry.Count) { var s1 = new State(ts.Zero); efst.AddState(s1); var symIndex = Utils.GetIndex(_g2Pmodel.Isyms, Se); s.AddArc(new Arc(symIndex, symIndex, 0.0f, s1)); s1.FinalWeight = 0.0f; } } // Add any cluster arcs for (var value = 0; value < _clusters.Count; value++) { var cluster = _clusters[value]; if (cluster != null) { var start = 0; var k = 0; while (k != -1) { k = Utils.Search(entry, cluster, start); if (k != -1) { var from = efst.GetState(start + k + 1); from.AddArc(new Arc(value, value, 0.0f, efst .GetState(start + k + cluster.Count + 1))); start = start + k + cluster.Count; } } } } efst.Isyms = _g2Pmodel.Isyms; efst.Osyms = _g2Pmodel.Isyms; return(efst); }