/// <summary> /// Reads the orthography conversion settings file and applies the specified mapping to a list of strings /// </summary> /// <param name="fragments">The texts (as an IEnumerable<string>) to apply the mapping to</param> /// <param name="orthographyConversionFile">The filename containing the conversion settings. It should be tab-delimited with 2 columns. The 1st column is a sequence of 1 or more characters in the source language. The 2nd column contains a sequence of characteres to map to in the target language.</param> /// <returns></returns> public static IEnumerable <string> ApplyOrthographyConversion(IEnumerable <string> fragments, string orthographyConversionFile) { var converter = new OrthographyConverter(orthographyConversionFile); foreach (var fragment in fragments) { string mappedFragment = converter.ApplyMappings(fragment); yield return(mappedFragment); } }
/// <summary> /// Reads the orthography conversion settings file and applies the specified mapping to a piece of text /// </summary> /// <param name="text">The text (as a scalar string) to apply the mapping to</param> /// <param name="orthographyConversionFile">The filename containing the conversion settings. It should be tab-delimited with 2 columns. The 1st column is a sequence of 1 or more characters in the source language. The 2nd column contains a sequence of characteres to map to in the target language.</param> /// <returns></returns> public static string ApplyOrthographyConversion(string text, string orthographyConversionFile) { var converter = new OrthographyConverter(orthographyConversionFile); return(converter.ApplyMappings(text)); }
// Determines which language code to use for eSpeak private string GetBestSupportedLanguage(string requestedLangCode, out string stdOut, out string stdErr) { stdOut = ""; stdErr = ""; // Normally requestedLangCode should be under our control. // But just do a quick and easy check to make sure it looks reasonable. (there are some highly contrived scenarios where a XSS injection would be possible with some social engineering.) if (requestedLangCode.Contains('"') || requestedLangCode.Contains('\\')) { // This doesn't look like a lang code and has non-zero potential for injection. just return a default value instead Debug.Assert(false); return("eo"); } // First try the requested langauge directly. // (We need to test eSpeak directly instead of Aeneas because when using TTS overrides, there's no Aeneas error message that tells us if the language is unsupported. // Therefore, we explicitly test if the language is supported by the dependency (eSpeak) before getting started. if (!DoesCommandCauseError($"espeak -v {requestedLangCode} -q \"hello world\"", kWorkingDirectory, out stdOut, out stdErr)) { return(requestedLangCode); } // Nope, looks like the requested language is not supported by the eSpeak installation. // Let's check the fallback languages. var potentialFallbackLangs = new List <string>(); // Check the orthography conversion files. If present, they specify the (first) fallback language to be used. string collectionPath = _bookSelection.CurrentSelection.CollectionSettings.FolderPath; var matchingFiles = Directory.EnumerateFiles(collectionPath, $"convert_{requestedLangCode}_to_*.txt"); foreach (var matchingFile in matchingFiles) { Tuple <string, string> sourceAndTargetTuple = OrthographyConverter.ParseSourceAndTargetFromFilename(matchingFile); if (sourceAndTargetTuple != null) { string targetLang = sourceAndTargetTuple.Item2; potentialFallbackLangs.Add(targetLang); break; } } // Add more default fallback languages to the end potentialFallbackLangs.Add("eo"); // "eo" is Esperanto potentialFallbackLangs.Add("en"); // Now go and try the fallback languages until we (possibly) find one that works string langCode = null; foreach (var langCodeToTry in potentialFallbackLangs) { if (!DoesCommandCauseError($"espeak -v {langCodeToTry} -q \"hello world\"", kWorkingDirectory, out stdOut, out stdErr)) { langCode = langCodeToTry; break; } } return(langCode); }