Ejemplo n.º 1
0
        private void PreOrderMt()
        {
            var projectInfo = this.Project.GetProjectInfo();
            var projectGuid = projectInfo.Id;
            var sourceCode  = projectInfo.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;

            foreach (var targetLang in projectInfo.TargetLanguages)
            {
                var targetCode        = targetLang.CultureInfo.TwoLetterISOLanguageName;
                var uniqueNewSegments = this.ProjectNewSegments[targetLang].Distinct().ToList();
                //Send the new segments to MT service
                var result = OpusCatMTServiceHelper.PreOrderBatch(opusCatOptions.mtServiceAddress, opusCatOptions.mtServicePort, uniqueNewSegments, sourceCode, targetCode, opusCatOptions.modelTag);

                switch (result)
                {
                case "batch translation or customization already in process":
                    throw new Exception("MT engine is currently batch translating or fine-tuning, wait for previous job to finish (or cancel it by restarting MT engine).");

                default:
                    break;
                }
            }
        }
Ejemplo n.º 2
0
        //THIS IS DEPRECATED, REPLACED WITH SEGMENT CHANGE HANDLER EVENT
        //This function starts translating all segments in the document once the document is opened,
        //so that the translator won't have to wait for the translation to finish when opening a segment.
        //Note that Studio contains a feature called LookAhead which attempts to do a similar thing, but
        //LookAhead appears to be buggy with TMs etc., so it's better to rely on a custom caching system.
        private static void TranslateDocumentSegments(Document doc, LanguageDirection langPair, OpusCatOptions options)
        {
            var visitor = new OpusCatMarkupDataVisitor();
            EditorController editorController = SdlTradosStudio.Application.GetController <EditorController>();

            foreach (var segmentPair in doc.SegmentPairs)
            {
                if (segmentPair.Properties.ConfirmationLevel == Sdl.Core.Globalization.ConfirmationLevel.Unspecified)
                {
                    visitor.Reset();
                    segmentPair.Source.AcceptVisitor(visitor);
                    var sourceText = visitor.PlainText;

                    var sourceCode = langPair.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;
                    var targetCode = langPair.TargetLanguage.CultureInfo.TwoLetterISOLanguageName;
                    var langpair   = $"{sourceCode}-{targetCode}";

                    //This will generate the translation and cache it for later use
                    OpusCatMTServiceHelper.Translate(options, sourceText, sourceCode, targetCode, options.modelTag);
                }
            }

            //processedDocuments[langPair].Add(doc);
        }
Ejemplo n.º 3
0
        private static void segmentChanged(LanguageDirection langDir, object sender, EventArgs e)
        {
            var doc = (Document)sender;

            //There are some "segments" the Trados editor view which are not proper segments, like
            //the start of document tag
            if (doc.ActiveSegmentPair == null)
            {
                return;
            }
            var visitor = new OpusCatMarkupDataVisitor();

            var activeOpusCatOptions = OpusCatProvider.GetProjectOpusCatOptions(doc.Project, langDir);

            IEnumerable <OpusCatOptions> activeOpusCatOptionsWithPregenerate;

            if (activeOpusCatOptions == null)
            {
                activeOpusCatOptionsWithPregenerate = null;
            }
            else
            {
                activeOpusCatOptionsWithPregenerate = activeOpusCatOptions.Where(x => x.pregenerateMt);
            }
            //If there is no active OPUS CAT provider, unsubscribe this handler (there's probably no event in Trados
            //API for removing a translation provider from a project, so this is the only way to unsubscribe
            //after translation provider has been removed.
            if (activeOpusCatOptionsWithPregenerate == null || !activeOpusCatOptionsWithPregenerate.Any())
            {
                OpusCatProvider.ClearSegmentHandlers();
                return;
            }

            var sourceSegmentTexts = new List <string>();

            var nextSegmentPairs = doc.SegmentPairs.SkipWhile(x =>
                                                              !(x.Properties.Id == doc.ActiveSegmentPair.Properties.Id &&
                                                                x.GetParagraphUnitProperties().ParagraphUnitId == doc.ActiveSegmentPair.GetParagraphUnitProperties().ParagraphUnitId));

            var segmentsNeeded = activeOpusCatOptionsWithPregenerate.Max(x => x.pregenerateSegmentCount);

            foreach (var segmentPair in nextSegmentPairs)
            {
                if (sourceSegmentTexts.Count == segmentsNeeded)
                {
                    break;
                }

                //Also preorder translations for Draft segments, since quite often there will be draft content
                //provided in segments where having MT is still desirable. This could also be an option.
                if (segmentPair.Properties.ConfirmationLevel == Sdl.Core.Globalization.ConfirmationLevel.Unspecified ||
                    segmentPair.Properties.ConfirmationLevel == Sdl.Core.Globalization.ConfirmationLevel.Draft)
                {
                    visitor.Reset();
                    segmentPair.Source.AcceptVisitor(visitor);
                    var sourceText = visitor.PlainText;
                    sourceSegmentTexts.Add(sourceText);
                }
            }

            var sourceCode = langDir.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;
            var targetCode = langDir.TargetLanguage.CultureInfo.TwoLetterISOLanguageName;

            foreach (var options in activeOpusCatOptionsWithPregenerate)
            {
                //The preorder method doesn't wait for the translation, so the requests return quicker
                var sourceSegmentTextsNeeded = sourceSegmentTexts.Take(options.pregenerateSegmentCount).ToList();
                OpusCatMTServiceHelper.PreOrderBatch(options, sourceSegmentTextsNeeded, sourceCode, targetCode, options.modelTag);
            }
        }
Ejemplo n.º 4
0
        private void Finetune()
        {
            var projectInfo        = this.Project.GetProjectInfo();
            var projectGuid        = projectInfo.Id;
            var sourceCode         = projectInfo.SourceLanguage.CultureInfo.TwoLetterISOLanguageName;
            var collectedLanguages = this.ProjectNewSegments.Keys.Union(this.ProjectTranslations.Keys);

            if (ConnectionControl.MtServiceLanguagePairs == null)
            {
                throw new Exception($"Language pair data not available, check if connection with OPUS-CAT MT Engine is working.");
            }

            var languagePairsWithMt = collectedLanguages.Where(x => ConnectionControl.MtServiceLanguagePairs.Contains($"{sourceCode}-{x.IsoAbbreviation}"));

            //Select the target language with most segments as the one to finetune.
            //If there are many, the selection will be random I suppose.
            Language primaryTargetLanguage;

            if (languagePairsWithMt.Any())
            {
                primaryTargetLanguage = languagePairsWithMt.OrderByDescending(x => this.ProjectTranslations[x].Count + this.ProjectNewSegments.Count).First();
            }
            else
            {
                //This is a backoff in case the iso code of the language does not match
                //the language pair codes from the mt service (e.g. with rare languages where
                //codes may be strange).
                primaryTargetLanguage = collectedLanguages.OrderByDescending(x => this.ProjectTranslations[x].Count + this.ProjectNewSegments.Count).First();
            }

            var targetCode = primaryTargetLanguage.CultureInfo.TwoLetterISOLanguageName;

            //Remove duplicates
            var           uniqueProjectTranslations = this.ProjectTranslations[primaryTargetLanguage].Distinct().ToList();
            List <string> uniqueNewSegments         = this.ProjectNewSegments[primaryTargetLanguage].Distinct().ToList();

            List <Tuple <string, string> > finetuneSet;

            if (this.tms[primaryTargetLanguage].Any())
            {
                var tmExtracts = this.ExtractFromTm(this.tms[primaryTargetLanguage], uniqueNewSegments, uniqueProjectTranslations);
                finetuneSet = uniqueProjectTranslations.Union(tmExtracts).ToList();
            }
            else
            {
                finetuneSet = uniqueProjectTranslations;
            }

            finetuneSet = finetuneSet.Take(this.settings.MaxFinetuningSentences).ToList();

            if (finetuneSet.Count() < OpusCatTpSettings.Default.FinetuningMinSentencePairs)
            {
                throw new Exception(
                          $"Not enough sentence pairs for fine-tuning. Found {finetuneSet.Count}, minimum is {OpusCatTpSettings.Default.FinetuningMinSentencePairs}");
            }

            //Send the tuning set to MT service
            var result = OpusCatMTServiceHelper.Customize(
                this.opusCatOptions.mtServiceAddress,
                this.opusCatOptions.mtServicePort,
                finetuneSet,
                uniqueNewSegments,
                sourceCode,
                targetCode,
                this.opusCatOptions.modelTag,
                this.settings.IncludePlaceholderTags,
                this.settings.IncludeTagPairs);
        }
        private void FetchServiceData(string host, string port, string modeltag)
        {
            StringBuilder connectionResult = new StringBuilder();

            try
            {
                ConnectionControl.MtServiceLanguagePairs = OpusCatMTServiceHelper.ListSupportedLanguages(host, port);
                IEnumerable <string> modelTagLanguagePairs;
                if (this.LanguagePairs != null)
                {
                    var projectLanguagePairsWithMt = ConnectionControl.MtServiceLanguagePairs.Intersect(this.LanguagePairs);
                    modelTagLanguagePairs = projectLanguagePairsWithMt;
                    if (projectLanguagePairsWithMt.Count() == 0)
                    {
                        connectionResult.Append("No MT models available for the language pairs of the project");
                    }
                    else if (this.LanguagePairs.Count == projectLanguagePairsWithMt.Count())
                    {
                        connectionResult.Append("MT models available for all the language pairs of the project");
                    }
                    else
                    {
                        connectionResult.Append($"MT models available for some of the language pairs of the project: {String.Join(", ", projectLanguagePairsWithMt)}");
                    }

                    //Get the detailed status for each project language pair
                    foreach (var pair in this.LanguagePairs)
                    {
                        connectionResult.Append(Environment.NewLine);
                        var sourceTarget = pair.Split('-');
                        connectionResult.Append(OpusCatMTServiceHelper.CheckModelStatus(host, port, sourceTarget[0], sourceTarget[1], modeltag));
                    }
                }
                else
                {
                    //This options is used with the batch task, where there's no easy way of getting
                    //the project language pairs, so all pairs are assumed.
                    modelTagLanguagePairs = ConnectionControl.MtServiceLanguagePairs;
                    connectionResult.Append($"MT models available for following language pairs: {String.Join(", ", ConnectionControl.MtServiceLanguagePairs)}");
                }

                //Get a list of model tags that are supported for these language pairs
                List <string> modelTags = new List <string>();
                foreach (var languagePair in modelTagLanguagePairs)
                {
                    var pairSplit = languagePair.Split('-');
                    modelTags.AddRange(OpusCatMTServiceHelper.GetLanguagePairModelTags(host, port, pairSplit[0], pairSplit[1]));
                }

                this.NoConnection = false;

                Dispatcher.Invoke(() => UpdateModelTags(modelTags, modeltag));
            }
            catch (Exception ex) when(ex is OpusCatEngineConnectionException)
            {
                connectionResult.Append($"No connection to OPUS-CAT MT Engine at {host}:{port}." + Environment.NewLine);
                connectionResult.Append("Make sure OPUS-CAT MT Engine application has been installed on your computer(check help link below) and is running and that it uses the same connection settings as the plugin (default settings should work).");
                this.NoConnection = true;
            }

            Dispatcher.Invoke(() => this.ConnectionStatus = connectionResult.ToString());
        }
        private List <SearchResult> GenerateSystemResult(string sourceText, SearchMode mode, Segment segment, string sourceCode, string targetCode)
        {
            List <SearchResult> systemResults = new List <SearchResult>();

            string translatedSentence;

            if (this._options.opusCatSource == OpusCatOptions.OpusCatSource.OpusCatMtEngine)
            {
                translatedSentence = OpusCatMTServiceHelper.Translate(this._options, sourceText, sourceCode, targetCode, this._options.modelTag);
            }
            else if (this._options.opusCatSource == OpusCatOptions.OpusCatSource.Elg)
            {
                translatedSentence = OpusCatProvider.ElgConnection.Translate(sourceText,
                                                                             sourceCode,
                                                                             targetCode);
            }
            else
            {
                translatedSentence = null;
            }

            if (String.IsNullOrEmpty(translatedSentence))
            {
                return(systemResults);
            }

            // Look up the currently selected segment in the collection (normal segment lookup).
            if (mode == SearchMode.FullSearch || mode == SearchMode.NormalSearch)
            {
                Segment translation = new Segment(_languageDirection.TargetCulture);
                if (_visitor.Placeholders.Any() || _visitor.TagStarts.Any() || _visitor.TagEnds.Any())
                {
                    var split = Regex.Split(translatedSentence, @"\b(PLACEHOLDER|TAGPAIRSTART ?| ?TAGPAIREND)\b");

                    //Tag starts and ends must match, so need a stack to keep track of what tags
                    //have been applied
                    var tagStack = new Stack <Tag>();

                    foreach (var part in split)
                    {
                        //Remove potential spaces from after TAGPAIRSTARTS and before TAGPAIREND
                        var normalpart = part.Replace("TAGPAIRSTART ", "TAGPAIRSTART");
                        normalpart = normalpart.Replace(" TAGPAIREND", "TAGPAIREND");

                        switch (normalpart)
                        {
                        case "PLACEHOLDER":
                            if (_visitor.Placeholders.Count != 0)
                            {
                                translation.Add(_visitor.Placeholders.Dequeue());
                            }
                            break;

                        case "TAGPAIRSTART":
                            if (_visitor.TagStarts.Count != 0)
                            {
                                var startTag = _visitor.TagStarts.Dequeue();
                                tagStack.Push(startTag);
                                translation.Add(startTag);
                            }
                            break;

                        case "TAGPAIREND":
                            if (tagStack.Count != 0)
                            {
                                var correspondingStartTag = tagStack.Pop();
                                var endTag = _visitor.TagEnds[correspondingStartTag.TagID];
                                translation.Add(endTag);
                            }
                            break;

                        default:
                            translation.Add(part);
                            break;
                        }
                    }

                    //Insert missing end tags
                    foreach (var excessStartTag in tagStack)
                    {
                        var nonEndedTagIndex = translation.Elements.IndexOf(excessStartTag);
                        var endTag           = _visitor.TagEnds[excessStartTag.TagID];
                        translation.Elements.Insert(nonEndedTagIndex + 1, endTag);
                    }
                }
                else
                {
                    translation.Add(translatedSentence);
                }


                systemResults.Add(CreateSearchResult(segment, translation, segment.HasTags, "opus-cat"));
            }
            return(systemResults);
        }
Ejemplo n.º 7
0
 public static List <string> GetLanguagePairModelTags(OpusCatOptions options, string srcLangCode, string trgLangCode)
 {
     return(OpusCatMTServiceHelper.GetLanguagePairModelTags(options.mtServiceAddress, options.mtServicePort, srcLangCode, trgLangCode));
 }
Ejemplo n.º 8
0
 public static string GetTokenCode(OpusCatOptions options)
 {
     return(OpusCatMTServiceHelper.GetTokenCode(options.mtServiceAddress, options.mtServicePort));
 }