예제 #1
0
        /// <summary>
        /// Reads the orthography conversion settings file and applies the specified mapping to a list of strings
        /// </summary>
        /// <param name="fragments">The texts (as an IEnumerable<string>) to apply the mapping to</param>
        /// <param name="orthographyConversionFile">The filename containing the conversion settings. It should be tab-delimited with 2 columns. The 1st column is a sequence of 1 or more characters in the source language. The 2nd column contains a sequence of characteres to map to in the target language.</param>
        /// <returns></returns>
        public static IEnumerable <string> ApplyOrthographyConversion(IEnumerable <string> fragments, string orthographyConversionFile)
        {
            var converter = new OrthographyConverter(orthographyConversionFile);

            foreach (var fragment in fragments)
            {
                string mappedFragment = converter.ApplyMappings(fragment);
                yield return(mappedFragment);
            }
        }
예제 #2
0
        /// <summary>
        /// Reads the orthography conversion settings file and applies the specified mapping to a piece of text
        /// </summary>
        /// <param name="text">The text (as a scalar string) to apply the mapping to</param>
        /// <param name="orthographyConversionFile">The filename containing the conversion settings. It should be tab-delimited with 2 columns. The 1st column is a sequence of 1 or more characters in the source language. The 2nd column contains a sequence of characteres to map to in the target language.</param>
        /// <returns></returns>
        public static string ApplyOrthographyConversion(string text, string orthographyConversionFile)
        {
            var converter = new OrthographyConverter(orthographyConversionFile);

            return(converter.ApplyMappings(text));
        }
예제 #3
0
        // Determines which language code to use for eSpeak
        private string GetBestSupportedLanguage(string requestedLangCode, out string stdOut, out string stdErr)
        {
            stdOut = "";
            stdErr = "";

            // Normally requestedLangCode should be under our control.
            // But just do a quick and easy check to make sure it looks reasonable. (there are some highly contrived scenarios where a XSS injection would be possible with some social engineering.)
            if (requestedLangCode.Contains('"') || requestedLangCode.Contains('\\'))
            {
                // This doesn't look like a lang code and has non-zero potential for injection. just return a default value instead
                Debug.Assert(false);

                return("eo");
            }

            // First try the requested langauge directly.
            // (We need to test eSpeak directly instead of Aeneas because when using TTS overrides, there's no Aeneas error message that tells us if the language is unsupported.
            // Therefore, we explicitly test if the language is supported by the dependency (eSpeak) before getting started.
            if (!DoesCommandCauseError($"espeak -v {requestedLangCode} -q \"hello world\"", kWorkingDirectory, out stdOut, out stdErr))
            {
                return(requestedLangCode);
            }

            // Nope, looks like the requested language is not supported by the eSpeak installation.
            // Let's check the fallback languages.

            var potentialFallbackLangs = new List <string>();

            // Check the orthography conversion files. If present, they specify the (first) fallback language to be used.
            string collectionPath = _bookSelection.CurrentSelection.CollectionSettings.FolderPath;
            var    matchingFiles  = Directory.EnumerateFiles(collectionPath, $"convert_{requestedLangCode}_to_*.txt");

            foreach (var matchingFile in matchingFiles)
            {
                Tuple <string, string> sourceAndTargetTuple = OrthographyConverter.ParseSourceAndTargetFromFilename(matchingFile);
                if (sourceAndTargetTuple != null)
                {
                    string targetLang = sourceAndTargetTuple.Item2;
                    potentialFallbackLangs.Add(targetLang);
                    break;
                }
            }

            // Add more default fallback languages to the end
            potentialFallbackLangs.Add("eo");                   // "eo" is Esperanto
            potentialFallbackLangs.Add("en");

            // Now go and try the fallback languages until we (possibly) find one that works
            string langCode = null;

            foreach (var langCodeToTry in potentialFallbackLangs)
            {
                if (!DoesCommandCauseError($"espeak -v {langCodeToTry} -q \"hello world\"", kWorkingDirectory, out stdOut, out stdErr))
                {
                    langCode = langCodeToTry;
                    break;
                }
            }

            return(langCode);
        }