/// <summary> /// Process the logic for patterns post processor used to handle numbers and no translate list. /// </summary> /// <param name="translatedDocument">Translated document.</param> /// <param name="languageId">Current source language id.</param> /// <returns>A <see cref="PostProcessedDocument"/> stores the original translated document state and the newly post processed message.</returns> public PostProcessedDocument Process(ITranslatedDocument translatedDocument, string languageId) { // validate function arguments for null and incorrect format ValidateParameters(translatedDocument); // flag to indicate if the source message contains number , will used for var containsNum = Regex.IsMatch(translatedDocument.GetSourceMessage(), @"\d"); // output variable declaration string processedResult; // temporary pattern is used to contain two set of patterns : // - the post processed patterns that was configured by the user ie : _processedPatterns and // - the liternal no translate pattern ie : translatedDocument.LiteranlNoTranslatePhrases , which takes the following regx "<literal>(.*)</literal>" , so the following code checks if this pattern exists in the translated document object to be added to the no translate list // - ex : translatedDocument.SourceMessage = I like my friend <literal>happy</literal> , the literal tag here specifies that the word "happy" shouldn't be translated var temporaryPatterns = _processedPatterns[languageId]; if (translatedDocument.GetLiteranlNoTranslatePhrases() != null && translatedDocument.GetLiteranlNoTranslatePhrases().Count > 0) { temporaryPatterns.UnionWith(translatedDocument.GetLiteranlNoTranslatePhrases()); } if (temporaryPatterns.Count == 0 && !containsNum) { processedResult = translatedDocument.GetTranslatedMessage(); } if (string.IsNullOrWhiteSpace(translatedDocument.GetRawAlignment())) { processedResult = translatedDocument.GetTranslatedMessage(); } // loop for all the patterns and substitute each no translate pattern match with the original source words // ex : assuming the pattern = "mon nom est (.+)" // and the phrase = "mon nom est l'etat" // the original translator output for this phrase would be "My name is the state", // after applying the patterns post processor , the output would be : "My name is l'etat" foreach (var pattern in temporaryPatterns) { if (Regex.IsMatch(translatedDocument.GetSourceMessage(), pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase)) { SubstituteNoTranslatePattern(translatedDocument, pattern); } } SubstituteNumericPattern(translatedDocument); processedResult = PostProcessingUtilities.Join(" ", translatedDocument.GetTranslatedTokens()); return(new PostProcessedDocument(translatedDocument, processedResult)); }
/// <summary> /// Validate <see cref="ITranslatedDocument"/> object main parameters for null values. /// </summary> /// <param name="translatedDocument">The document to validate.</param> private void ValidateParameters(ITranslatedDocument translatedDocument) { if (translatedDocument == null) { throw new ArgumentNullException(nameof(translatedDocument)); } if (translatedDocument.GetSourceMessage() == null) { throw new ArgumentNullException(nameof(translatedDocument.GetSourceMessage)); } if (translatedDocument.GetTranslatedMessage() == null) { throw new ArgumentNullException(nameof(translatedDocument.GetTranslatedMessage)); } }