/** * /// Phoneticize a word * /// * /// @param entry * /// the word to phoneticize transformed to an ArrayList of Strings * /// (each element hold a single character) * /// @param nbest * /// the number of distinct pronunciations to return * /// @return the pronunciation(s) of the input word */ public List <Path> Phoneticize(List <String> entry, int nbest) { var efst = EntryToFsa(entry); var s = efst.Semiring; Compose.Augment(1, efst, s); ArcSort.Apply(efst, new OLabelCompare()); var result = Compose.compose(efst, _epsilonFilter, s, true); ArcSort.Apply(result, new OLabelCompare()); result = Compose.compose(result, _g2Pmodel, s, true); Project.Apply(result, ProjectType.Output); if (nbest == 1) { result = NShortestPaths.Get(result, 1, false); } else { // Requesting 10 times more best paths than what was asking // as there might be several paths resolving to same pronunciation // due to epsilon transitions. // I really hate cosmological constants :) result = NShortestPaths.Get(result, nbest * 10, false); } // result = NShortestPaths.get(result, nbest, false); result = RmEpsilon.Get(result); var paths = FindAllPaths(result, nbest, _skipSeqs, Tie); return(paths); }
///** ///// Create a decoder by loading the serialized model from a specified ///// filename ///// ///// @param g2pmodel_file ///// the filename of the serialized model // */ //public G2PConverter(String g2pmodel_file) //{ // g2pmodel = ImmutableFst.loadModel(g2pmodel_file); // init(); //} /** * /// Initialize the decoder */ private void Init() { _skipSeqs.Add(Eps); _skipSeqs.Add(Sb); _skipSeqs.Add(Se); _skipSeqs.Add(Skip); _skipSeqs.Add("-"); // keep an augmented copy (for compose) Compose.Augment(0, _g2Pmodel, _g2Pmodel.Semiring); ArcSort.Apply(_g2Pmodel, new ILabelCompare()); var isyms = _g2Pmodel.Isyms; LoadClusters(isyms); // get epsilon filter for composition _epsilonFilter = Compose.GetFilter(_g2Pmodel.Isyms, _g2Pmodel.Semiring); ArcSort.Apply(_epsilonFilter, new ILabelCompare()); }