コード例 #1
0
 public void TranslatorUtilities_WordAlignmentParse_InvalidArguments_InvalidAlignmentsFormat()
 {
     string[] alignments       = new string[] { "test", "test" };
     string[] sourceTokens     = new string[] { };
     string[] translatedTokens = new string[] { };
     Assert.ThrowsException <FormatException>(() => PostProcessingUtilities.WordAlignmentParse(alignments, sourceTokens, translatedTokens));
 }
コード例 #2
0
 public void TranslatorUtilities_WordAlignmentParse_InvalidArguments_NullTranslatedTokens()
 {
     string[] alignments       = new string[] { };
     string[] sourceTokens     = new string[] { };
     string[] translatedTokens = null;
     Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.WordAlignmentParse(alignments, sourceTokens, translatedTokens));
 }
コード例 #3
0
        public void TranslatorUtilities_SplitSentence_TargetSentence_SimpleScenario()
        {
            //testing splitting without using translation alignment information, in this case only split using white space delimiter
            string translatedSentence = "My dog's name is Enzo";
            var    splittedSentence   = PostProcessingUtilities.SplitSentence(translatedSentence, isSourceSentence: false);

            AreEqualArrays(new string[] { "My", "dog's", "name", "is", "Enzo" }, splittedSentence);
        }
コード例 #4
0
        public void TranslatorUtilities_SplitSentence_SourceSentence_SimpleScenario()
        {
            //testing splitting without using translation alignment information, in this case only split using white space delimiter
            string sourceSentence   = "mon nom est l'etat";
            var    splittedSentence = PostProcessingUtilities.SplitSentence(sourceSentence);

            AreEqualArrays(new string[] { "mon", "nom", "est", "l'etat" }, splittedSentence);
        }
コード例 #5
0
        /// <summary>
        /// Translates an array of strings from a source language to a target language.
        /// </summary>
        /// <param name="translateArraySourceTexts">The strings to translate.</param>
        /// <param name="sourceLanguage">The language code of the translation text. For example, "en" for English.</param>
        /// <param name="targetLanguage">The language code to translate the text into.</param>
        /// <returns>A task that represents the translation operation.
        /// The task result contains a list of the translated documents.</returns>
        public async Task <List <ITranslatedDocument> > TranslateArrayAsync(string[] translateArraySourceTexts, string sourceLanguage, string targetLanguage)
        {
            var translatedDocuments = new List <ITranslatedDocument>();

            for (var srcTxtIndx = 0; srcTxtIndx < translateArraySourceTexts.Length; srcTxtIndx++)
            {
                var currentTranslatedDocument = new TranslatedDocument(translateArraySourceTexts[srcTxtIndx]);
                translatedDocuments.Add(currentTranslatedDocument);

                // Check for literal tag in input user message
                _preProcessor.PreprocessMessage(currentTranslatedDocument.GetSourceMessage(), out var processedText, out var literanlNoTranslateList);
                currentTranslatedDocument.SetSourceMessage(processedText);
                translateArraySourceTexts[srcTxtIndx] = processedText;
                currentTranslatedDocument.SetLiteranlNoTranslatePhrases(literanlNoTranslateList);
            }

            // list of translation request for the service
            var payload = translateArraySourceTexts.Select(s => new TranslatorRequestModel {
                Text = s
            });

            using (var request = _requestBuilder.BuildTranslateRequest(sourceLanguage, targetLanguage, payload))
            {
                var translatedResults = await _responseGenerator.GenerateTranslateResponseAsync(request).ConfigureAwait(false);

                var sentIndex = 0;
                foreach (var translatedValue in translatedResults)
                {
                    var translation = translatedValue.Translations.First();
                    var currentTranslatedDocument = translatedDocuments[sentIndex];
                    currentTranslatedDocument.SetRawAlignment(translation.Alignment?.Projection ?? null);
                    currentTranslatedDocument.SetTranslatedMessage(translation.Text);

                    if (!string.IsNullOrEmpty(currentTranslatedDocument.GetRawAlignment()))
                    {
                        var alignments = currentTranslatedDocument.GetRawAlignment().Trim().Split(' ');
                        currentTranslatedDocument.SetSourceTokens(PostProcessingUtilities.SplitSentence(currentTranslatedDocument.GetSourceMessage(), alignments));
                        currentTranslatedDocument.SetTranslatedTokens(PostProcessingUtilities.SplitSentence(translation.Text, alignments, false));
                        currentTranslatedDocument.SetIndexedAlignment(PostProcessingUtilities.WordAlignmentParse(alignments, currentTranslatedDocument.GetSourceTokens(), currentTranslatedDocument.GetTranslatedTokens()));
                        currentTranslatedDocument.SetTranslatedMessage(PostProcessingUtilities.Join(" ", currentTranslatedDocument.GetTranslatedTokens()));
                    }
                    else
                    {
                        var translatedText = translation.Text;
                        currentTranslatedDocument.SetTranslatedMessage(translatedText);
                        currentTranslatedDocument.SetSourceTokens(new string[] { currentTranslatedDocument.GetSourceMessage() });
                        currentTranslatedDocument.SetTranslatedTokens(new string[] { currentTranslatedDocument.GetTranslatedMessage() });
                        currentTranslatedDocument.SetIndexedAlignment(new Dictionary <int, int>());
                    }

                    sentIndex++;
                }

                // post process all translated documents
                PostProcesseDocuments(translatedDocuments, sourceLanguage);
                return(translatedDocuments);
            }
        }
コード例 #6
0
        public void TranslatorUtilities_KeepSourceWordInTranslation_InvalidArguments_NullAlignments()
        {
            Dictionary <int, int> alignments = null;

            string[] sourceTokens     = new string[] { };
            string[] translatedTokens = new string[] { };
            int      sourceWordIndex  = 0;

            Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.KeepSourceWordInTranslation(alignments, sourceTokens, translatedTokens, sourceWordIndex));
        }
コード例 #7
0
        public void TranslatorUtilities_SplitSentence_TargetSentence_ComplexScenario()
        {
            //testing splitting using translation alignment information, in this case split splitting takes care of punctuation characters
            string translatedSentence = "My dog's name is Enzo";
            string rawAlignment       = "0:1-0:1 3:7-3:5 9:10-6:7 9:10-14:15 12:16-9:12 18:21-17:20";

            string[] alignments       = rawAlignment.Split(" ");
            var      splittedSentence = PostProcessingUtilities.SplitSentence(translatedSentence, alignments, false);

            AreEqualArrays(new string[] { "My", "dog", "'s", "name", "is", "Enzo" }, splittedSentence);
        }
コード例 #8
0
        public void TranslatorUtilities_SplitSentence_SourceSentence_ComplexScenario()
        {
            //testing splitting using translation alignment information, in this case split splitting takes care of punctuation characters
            string sourceSentence = "mon nom est l'etat";
            string rawAlignment   = "0:2-0:1 4:6-3:6 8:10-8:9 12:13-11:13 14:17-15:19";

            string[] alignments       = rawAlignment.Split(" ");
            var      splittedSentence = PostProcessingUtilities.SplitSentence(sourceSentence, alignments);

            AreEqualArrays(new string[] { "mon", "nom", "est", "l'", "etat" }, splittedSentence);
        }
コード例 #9
0
        public void TranslatorUtilities_JoinWordsIntoSentences_ComplexScenario()
        {
            string delimeter = " ";

            string[] tokens = new string[] { "Mon", "nom", "est", "l'", "etat" };

            //Check special joining case when the tokens contains punctuation marks
            var joinedSentence = PostProcessingUtilities.Join(delimeter, tokens);

            Assert.IsNotNull(joinedSentence);
            Assert.AreEqual("Mon nom est l'etat", joinedSentence);
        }
コード例 #10
0
        public void TranslatorUtilities_KeepSourceWordInTranslation_InvalidArguments_AllInvalidParameters()
        {
            Dictionary <int, int> alignments = new Dictionary <int, int>
            {
                { 0, 0 },
                { 1, 1 },
            };

            string[] sourceTokens     = new string[] { };
            string[] translatedTokens = new string[] { };
            int      sourceWordIndex  = -1;

            Assert.ThrowsException <ArgumentException>(() => PostProcessingUtilities.KeepSourceWordInTranslation(alignments, sourceTokens, translatedTokens, sourceWordIndex));
        }
コード例 #11
0
        public void TranslatorUtilities_JoinWordsIntoSentences_InvalidArguments()
        {
            string delimeter = null;

            string[] tokens = new string[] { };
            Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.Join(delimeter, tokens));

            delimeter = " ";
            tokens    = null;
            Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.Join(delimeter, tokens));

            delimeter = null;
            tokens    = null;
            Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.Join(delimeter, tokens));
        }
コード例 #12
0
        public void TranslatorUtilities_JoinWordsIntoSentences_SimpleScenario()
        {
            string delimeter = " ";

            string[] tokens         = new string[] { };
            var      joinedSentence = PostProcessingUtilities.Join(delimeter, tokens);

            Assert.IsNotNull(joinedSentence);
            Assert.AreEqual("", joinedSentence);

            //Check the direct joining case
            tokens         = new string[] { "My", "name", "is", "Eldad" };
            joinedSentence = PostProcessingUtilities.Join(delimeter, tokens);
            Assert.IsNotNull(joinedSentence);
            Assert.AreEqual("My name is Eldad", joinedSentence);
        }
コード例 #13
0
        public void TranslatorUtilities_WordAlignmentParse()
        {
            string rawAlignment = "0:2-0:1 4:6-3:6 8:10-8:9 12:13-11:13 14:17-15:19";

            string[] alignments                = rawAlignment.Split(" ");
            string[] sourceTokens              = new string[] { "mon", "nom", "est", "l'", "etat" };
            string[] translatedTokens          = new string[] { "my", "name", "is", "the", "state" };
            Dictionary <int, int> alignmentMap = PostProcessingUtilities.WordAlignmentParse(alignments, sourceTokens, translatedTokens);

            Assert.IsNotNull(alignmentMap);
            foreach (KeyValuePair <int, int> alignmentElement in alignmentMap)
            {
                Assert.IsNotNull(alignmentElement);
                Assert.IsNotNull(alignmentElement.Key);
                Assert.IsNotNull(alignmentElement.Value);
                Assert.IsTrue(alignmentElement.Value >= 0);
            }
        }
コード例 #14
0
        public void TranslatorUtilities_KeepSourceWordInTranslation()
        {
            Dictionary <int, int> alignments = new Dictionary <int, int>
            {
                { 0, 1 },
                { 1, 0 },
                { 2, 2 },
            };

            string[] sourceTokens     = new string[] { "ti", "amo", "contento" };
            string[] translatedTokens = new string[] { "love", "you", "happy" };
            int      sourceWordIndex  = 2;

            translatedTokens = PostProcessingUtilities.KeepSourceWordInTranslation(alignments, sourceTokens, translatedTokens, sourceWordIndex);
            Assert.IsNotNull(translatedTokens);
            Assert.IsTrue(translatedTokens.Length == 3);
            AreEqualArrays(new string[] { "love", "you", "contento" }, translatedTokens);
        }
コード例 #15
0
        /// <summary>
        /// Translates an array of strings from a source language to a target language.
        /// </summary>
        /// <param name="translateArraySourceTexts">The strings to translate.</param>
        /// <param name="from">The language code of the translation text. For example, "en" for English.</param>
        /// <param name="to">The language code to translate the text into.</param>
        /// <returns>An array of the translated documents.</returns>
        public async Task <List <TranslatedDocument> > TranslateArrayAsync(string[] translateArraySourceTexts, string from, string to)
        {
            var translatedDocuments = new List <TranslatedDocument>();
            var uri = "https://api.microsofttranslator.com/v2/Http.svc/TranslateArray2";

            for (var srcTxtIndx = 0; srcTxtIndx < translateArraySourceTexts.Length; srcTxtIndx++)
            {
                // Check for literal tag in input user message
                var currentTranslatedDocument = new TranslatedDocument(translateArraySourceTexts[srcTxtIndx]);
                translatedDocuments.Add(currentTranslatedDocument);
                PreprocessMessage(currentTranslatedDocument.SourceMessage, out var processedText, out var literanlNoTranslateList);
                currentTranslatedDocument.SourceMessage = processedText;
                translateArraySourceTexts[srcTxtIndx]   = processedText;
                currentTranslatedDocument.LiteranlNoTranslatePhrases = literanlNoTranslateList;
            }

            // body of http request
            var body = $"<TranslateArrayRequest>" +
                       "<AppId />" +
                       $"<From>{from}</From>" +
                       "<Options>" +
                       " <Category xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" >generalnn</Category>" +
                       "<ContentType xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\">text/plain</ContentType>" +
                       "<ReservedFlags xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" +
                       "<State xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" +
                       "<Uri xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" +
                       "<User xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" +
                       "</Options>" +
                       "<Texts>" +
                       string.Join(string.Empty, translateArraySourceTexts.Select(s => $"<string xmlns=\"http://schemas.microsoft.com/2003/10/Serialization/Arrays\">{SecurityElement.Escape(s)}</string>\n"))
                       + "</Texts>" +
                       $"<To>{to}</To>" +
                       "</TranslateArrayRequest>";

            var accessToken = await _authToken.GetAccessTokenAsync().ConfigureAwait(false);

            using (var request = new HttpRequestMessage())
            {
                request.Method     = HttpMethod.Post;
                request.RequestUri = new Uri(uri);
                request.Content    = new StringContent(body, Encoding.UTF8, "text/xml");
                request.Headers.Add("Authorization", accessToken);

                using (var response = await _httpClient.SendAsync(request))
                {
                    var responseBody = await response.Content.ReadAsStringAsync();

                    switch (response.StatusCode)
                    {
                    case HttpStatusCode.OK:
                        Console.WriteLine("Request status is OK. Result of translate array method is:");
                        var doc       = XDocument.Parse(responseBody);
                        var ns        = XNamespace.Get("http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2");
                        var results   = new List <string>();
                        var sentIndex = 0;
                        foreach (var xe in doc.Descendants(ns + "TranslateArray2Response"))
                        {
                            var currentTranslatedDocument = translatedDocuments[sentIndex];
                            currentTranslatedDocument.RawAlignment = xe.Element(ns + "Alignment").Value;
                            if (!string.IsNullOrEmpty(currentTranslatedDocument.RawAlignment))
                            {
                                var alignments = currentTranslatedDocument.RawAlignment.Trim().Split(' ');
                                currentTranslatedDocument.SourceTokens     = PostProcessingUtilities.SplitSentence(currentTranslatedDocument.SourceMessage, alignments);
                                currentTranslatedDocument.TranslatedTokens = PostProcessingUtilities.SplitSentence(xe.Element(ns + "TranslatedText").Value, alignments, false);
                                currentTranslatedDocument.IndexedAlignment = PostProcessingUtilities.WordAlignmentParse(alignments, currentTranslatedDocument.SourceTokens, currentTranslatedDocument.TranslatedTokens);
                            }

                            sentIndex += 1;
                        }

                        return(translatedDocuments);

                    default:
                        throw new Exception(response.ReasonPhrase);
                    }
                }
            }
        }
コード例 #16
0
        public void TranslatorUtilities_SplitSentence_InvalidArguments()
        {
            string sentence = null;

            Assert.ThrowsException <ArgumentNullException>(() => PostProcessingUtilities.SplitSentence(sentence));
        }
コード例 #17
0
        /// <summary>
        /// Translates an array of strings from a source language to a target language.
        /// </summary>
        /// <param name="translateArraySourceTexts">The strings to translate.</param>
        /// <param name="from">The language code of the translation text. For example, "en" for English.</param>
        /// <param name="to">The language code to translate the text into.</param>
        /// <returns>An array of the translated documents.</returns>
        public async Task <List <TranslatedDocument> > TranslateArrayAsync(string[] translateArraySourceTexts, string from, string to)
        {
            var translatedDocuments = new List <TranslatedDocument>();

            for (var srcTxtIndx = 0; srcTxtIndx < translateArraySourceTexts.Length; srcTxtIndx++)
            {
                // Check for literal tag in input user message
                var currentTranslatedDocument = new TranslatedDocument(translateArraySourceTexts[srcTxtIndx]);
                translatedDocuments.Add(currentTranslatedDocument);
                PreprocessMessage(currentTranslatedDocument.SourceMessage, out var processedText, out var literanlNoTranslateList);
                currentTranslatedDocument.SourceMessage = processedText;
                translateArraySourceTexts[srcTxtIndx]   = processedText;
                currentTranslatedDocument.LiteranlNoTranslatePhrases = literanlNoTranslateList;
            }

            // list of translation request for the service
            var payload = translateArraySourceTexts.Select(s => new TranslatorRequestModel {
                Text = s
            });

            using (var request = GetTranslateRequestMessage(from, to, payload))
            {
                using (var response = await _httpClient.SendAsync(request).ConfigureAwait(false))
                {
                    if (response.IsSuccessStatusCode)
                    {
                        var responseBody = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

                        var translatedResults = JsonConvert.DeserializeObject <IEnumerable <TranslatedResult> >(responseBody);

                        var sentIndex = 0;
                        foreach (var translatedValue in translatedResults)
                        {
                            var translation = translatedValue.Translations.First();
                            var currentTranslatedDocument = translatedDocuments[sentIndex];
                            currentTranslatedDocument.RawAlignment  = translation.Alignment?.Projection ?? null;
                            currentTranslatedDocument.TargetMessage = translation.Text;

                            if (!string.IsNullOrEmpty(currentTranslatedDocument.RawAlignment))
                            {
                                var alignments = currentTranslatedDocument.RawAlignment.Trim().Split(' ');
                                currentTranslatedDocument.SourceTokens     = PostProcessingUtilities.SplitSentence(currentTranslatedDocument.SourceMessage, alignments);
                                currentTranslatedDocument.TranslatedTokens = PostProcessingUtilities.SplitSentence(translation.Text, alignments, false);
                                currentTranslatedDocument.IndexedAlignment = PostProcessingUtilities.WordAlignmentParse(alignments, currentTranslatedDocument.SourceTokens, currentTranslatedDocument.TranslatedTokens);
                                currentTranslatedDocument.TargetMessage    = PostProcessingUtilities.Join(" ", currentTranslatedDocument.TranslatedTokens);
                            }
                            else
                            {
                                var translatedText = translation.Text;
                                currentTranslatedDocument.TargetMessage    = translatedText;
                                currentTranslatedDocument.SourceTokens     = new string[] { currentTranslatedDocument.SourceMessage };
                                currentTranslatedDocument.TranslatedTokens = new string[] { currentTranslatedDocument.TargetMessage };
                                currentTranslatedDocument.IndexedAlignment = new Dictionary <int, int>();
                            }

                            sentIndex++;
                        }

                        return(translatedDocuments);
                    }
                    else
                    {
                        var responseBody = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

                        var errorResult = JsonConvert.DeserializeObject <ErrorModel>(responseBody);
                        throw new ArgumentException(errorResult.Error.Message);
                    }
                }
            }
        }