Ejemplo n.º 1
0
        /// <summary>
        /// Process the logic for custom dictionary post processor used to handle user custom vocab translation.
        /// </summary>
        /// <param name="translatedDocument">Translated document.</param>
        /// <param name="languageId">Current source language id.</param>
        /// <returns>A <see cref="PostProcessedDocument"/> stores the original translated document state and the newly post processed message.</returns>
        public PostProcessedDocument Process(TranslatedDocument translatedDocument, string languageId)
        {
            // Check if provided custom dictionary for this language is not empty
            if (_userCustomDictionaries.GetLanguageDictionary(languageId).Count > 0)
            {
                string processedResult;
                var    languageDictionary = _userCustomDictionaries.GetLanguageDictionary(languageId);

                // Loop for all the original message tokens, and check if any of these tokens exists in the user custom dictionary,
                // to forcibly overwrite this token's translation with the user provided translation
                for (var i = 0; i < translatedDocument.SourceTokens.Length; i++)
                {
                    if (languageDictionary.ContainsKey(translatedDocument.SourceTokens[i]))
                    {
                        // If a token of the original source message/phrase found in the user dictionary,
                        // replace it's equivalent translated token with the user provided translation
                        // the equivalent translated token can be found using the alignment map in the translated document
                        translatedDocument.TranslatedTokens[translatedDocument.IndexedAlignment[i]] = languageDictionary[translatedDocument.SourceTokens[i]];
                    }
                }

                // Finally return PostProcessedDocument object that holds the orignal TRanslatedDocument and a string that joins all the translated tokens together
                processedResult = PostProcessingUtilities.Join(" ", translatedDocument.TranslatedTokens);
                return(new PostProcessedDocument(translatedDocument, processedResult));
            }
            else
            {
                return(new PostProcessedDocument(translatedDocument, string.Empty));
            }
        }
        /// <summary>
        /// Process the logic for patterns post processor used to handle numbers and no translate list.
        /// </summary>
        /// <param name="translatedDocument">Translated document.</param>
        /// <param name="languageId">Current source language id.</param>
        /// <returns>A <see cref="PostProcessedDocument"/> stores the original translated document state and the newly post processed message.</returns>
        public PostProcessedDocument Process(TranslatedDocument translatedDocument, string languageId)
        {
            // validate function arguments for null and incorrect format
            ValidateParameters(translatedDocument);

            // flag to indicate if the source message contains number , will used for
            var containsNum = Regex.IsMatch(translatedDocument.SourceMessage, @"\d");

            // output variable declaration
            string processedResult;

            // temporary pattern is used to contain two set of patterns :
            //  - the post processed patterns that was configured by the user ie : _processedPatterns and
            //  - the   liternal no translate pattern ie : translatedDocument.LiteranlNoTranslatePhrases , which takes the following regx "<literal>(.*)</literal>" , so the following code checks if this pattern exists in the translated document object to be added to the no translate list
            //  - ex : translatedDocument.sourceMessage = I like my friend <literal>happy</literal> , the literal tag here specifies that the word "happy" shouldn't be translated
            var temporaryPatterns = _processedPatterns[languageId];

            if (translatedDocument.LiteranlNoTranslatePhrases != null && translatedDocument.LiteranlNoTranslatePhrases.Count > 0)
            {
                temporaryPatterns.UnionWith(translatedDocument.LiteranlNoTranslatePhrases);
            }

            if (temporaryPatterns.Count == 0 && !containsNum)
            {
                processedResult = translatedDocument.TargetMessage;
            }

            if (string.IsNullOrWhiteSpace(translatedDocument.RawAlignment))
            {
                processedResult = translatedDocument.TargetMessage;
            }

            // loop for all the patterns and substitute each no translate pattern match with the original source words

            // ex : assuming the pattern = "mon nom est (.+)"
            // and the phrase = "mon nom est l'etat"
            // the original translator output for this phrase would be "My name is the state",
            // after applying the patterns post processor , the output would be : "My name is l'etat"
            foreach (var pattern in temporaryPatterns)
            {
                if (Regex.IsMatch(translatedDocument.SourceMessage, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase))
                {
                    SubstituteNoTranslatePattern(translatedDocument, pattern);
                }
            }

            SubstituteNumericPattern(translatedDocument);
            processedResult = PostProcessingUtilities.Join(" ", translatedDocument.TranslatedTokens);
            return(new PostProcessedDocument(translatedDocument, processedResult));
        }