/// <summary> /// Process the logic for custom dictionary post processor used to handle user custom vocab translation. /// </summary> /// <param name="translatedDocument">Translated document.</param> /// <param name="languageId">Current source language id.</param> /// <returns>A <see cref="PostProcessedDocument"/> stores the original translated document state and the newly post processed message.</returns> public PostProcessedDocument Process(TranslatedDocument translatedDocument, string languageId) { // Check if provided custom dictionary for this language is not empty if (_userCustomDictionaries.GetLanguageDictionary(languageId).Count > 0) { string processedResult; var languageDictionary = _userCustomDictionaries.GetLanguageDictionary(languageId); // Loop for all the original message tokens, and check if any of these tokens exists in the user custom dictionary, // to forcibly overwrite this token's translation with the user provided translation for (var i = 0; i < translatedDocument.SourceTokens.Length; i++) { if (languageDictionary.ContainsKey(translatedDocument.SourceTokens[i])) { // If a token of the original source message/phrase found in the user dictionary, // replace it's equivalent translated token with the user provided translation // the equivalent translated token can be found using the alignment map in the translated document translatedDocument.TranslatedTokens[translatedDocument.IndexedAlignment[i]] = languageDictionary[translatedDocument.SourceTokens[i]]; } } // Finally return PostProcessedDocument object that holds the orignal TRanslatedDocument and a string that joins all the translated tokens together processedResult = PostProcessingUtilities.Join(" ", translatedDocument.TranslatedTokens); return(new PostProcessedDocument(translatedDocument, processedResult)); } else { return(new PostProcessedDocument(translatedDocument, string.Empty)); } }
/// <summary> /// Process the logic for patterns post processor used to handle numbers and no translate list. /// </summary> /// <param name="translatedDocument">Translated document.</param> /// <param name="languageId">Current source language id.</param> /// <returns>A <see cref="PostProcessedDocument"/> stores the original translated document state and the newly post processed message.</returns> public PostProcessedDocument Process(TranslatedDocument translatedDocument, string languageId) { // validate function arguments for null and incorrect format ValidateParameters(translatedDocument); // flag to indicate if the source message contains number , will used for var containsNum = Regex.IsMatch(translatedDocument.SourceMessage, @"\d"); // output variable declaration string processedResult; // temporary pattern is used to contain two set of patterns : // - the post processed patterns that was configured by the user ie : _processedPatterns and // - the liternal no translate pattern ie : translatedDocument.LiteranlNoTranslatePhrases , which takes the following regx "<literal>(.*)</literal>" , so the following code checks if this pattern exists in the translated document object to be added to the no translate list // - ex : translatedDocument.sourceMessage = I like my friend <literal>happy</literal> , the literal tag here specifies that the word "happy" shouldn't be translated var temporaryPatterns = _processedPatterns[languageId]; if (translatedDocument.LiteranlNoTranslatePhrases != null && translatedDocument.LiteranlNoTranslatePhrases.Count > 0) { temporaryPatterns.UnionWith(translatedDocument.LiteranlNoTranslatePhrases); } if (temporaryPatterns.Count == 0 && !containsNum) { processedResult = translatedDocument.TargetMessage; } if (string.IsNullOrWhiteSpace(translatedDocument.RawAlignment)) { processedResult = translatedDocument.TargetMessage; } // loop for all the patterns and substitute each no translate pattern match with the original source words // ex : assuming the pattern = "mon nom est (.+)" // and the phrase = "mon nom est l'etat" // the original translator output for this phrase would be "My name is the state", // after applying the patterns post processor , the output would be : "My name is l'etat" foreach (var pattern in temporaryPatterns) { if (Regex.IsMatch(translatedDocument.SourceMessage, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase)) { SubstituteNoTranslatePattern(translatedDocument, pattern); } } SubstituteNumericPattern(translatedDocument); processedResult = PostProcessingUtilities.Join(" ", translatedDocument.TranslatedTokens); return(new PostProcessedDocument(translatedDocument, processedResult)); }