public void TranslateInteractively_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
             using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                           "me marcho hoy por la tarde .".Split()))
             {
                 TranslationResult result = session.CurrentResults[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("i leave today in the afternoon .".Split()));
             }
 }
 public void SetPrefix_MissingWord_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
             using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                           "caminé a mi habitación .".Split()))
             {
                 TranslationResult result = session.CurrentResults[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("caminé to my room .".Split()));
                 result = session.SetPrefix("i walked".Split(), true)[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("i walked to my room .".Split()));
             }
 }
示例#3
0
        private void Load()
        {
            if (_isLoaded)
            {
                return;
            }

            _smtModel  = _smtModelFactory.Create(this);
            _smtEngine = _smtModel.CreateInteractiveEngine();

            _ruleEngine = _ruleEngineFactory.Create(this);

            _hybridEngine = new HybridTranslationEngine(_smtEngine, _ruleEngine);
            _isLoaded     = true;
        }
        public void Approve_TwoSegmentsUnknownWord_LearnsUnknownWord()
        {
            using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
                using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
                {
                    using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                                  "hablé con recepción .".Split()))
                    {
                        TranslationResult result = session.CurrentResults[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("hablé with reception .".Split()));
                        result = session.SetPrefix("i talked".Split(), true)[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("i talked with reception .".Split()));
                        session.SetPrefix("i talked with reception .".Split(), true);
                        session.Approve();
                    }

                    using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                                  "hablé hasta cinco en punto .".Split()))
                    {
                        TranslationResult result = session.CurrentResults[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("talked until five o ' clock .".Split()));
                    }
                }
        }
示例#5
0
        private void Unload()
        {
            if (!_isLoaded)
            {
                return;
            }

            Save();

            _hybridEngine.Dispose();
            _hybridEngine = null;

            if (_ruleEngine != null)
            {
                _ruleEngine.Dispose();
                _ruleEngine = null;
            }

            _smtEngine.Dispose();
            _smtEngine = null;
            _smtModel.Dispose();
            _smtModel = null;
            _isLoaded = false;
        }
示例#6
0
 public HybridTranslationEngine(IInteractiveSmtEngine smtEngine, ITranslationEngine ruleEngine = null)
 {
     SmtEngine  = smtEngine;
     RuleEngine = ruleEngine;
     _sessions  = new HashSet <HybridInteractiveTranslationSession>();
 }
示例#7
0
        private bool LoadProject(string fileName)
        {
            XElement projectElem;

            try
            {
                projectElem = XElement.Load(fileName);
            }
            catch (Exception)
            {
                return(false);
            }

            XElement engineElem = projectElem.Element("TranslationEngine");

            if (engineElem == null)
            {
                return(false);
            }

            var smtConfig = (string)engineElem.Element("SmtConfig");

            if (smtConfig == null)
            {
                return(false);
            }

            var hcSrcConfig = (string)engineElem.Element("SourceAnalyzerConfig");
            var hcTrgConfig = (string)engineElem.Element("TargetGeneratorConfig");

            string configDir = Path.GetDirectoryName(fileName);

            Debug.Assert(configDir != null);

            ITranslationEngine transferEngine = null;

            if (hcSrcConfig != null && hcTrgConfig != null)
            {
                Language srcLang    = XmlLanguageLoader.Load(Path.Combine(configDir, hcSrcConfig));
                var      srcMorpher = new Morpher(_hcTraceManager, srcLang);

                Language trgLang    = XmlLanguageLoader.Load(Path.Combine(configDir, hcTrgConfig));
                var      trgMorpher = new Morpher(_hcTraceManager, trgLang);

                transferEngine = new TransferEngine(srcMorpher,
                                                    new SimpleTransferer(new GlossMorphemeMapper(trgMorpher)), trgMorpher);
            }

            _smtModel = new ThotSmtModel(Path.Combine(configDir, smtConfig));
            IInteractiveSmtEngine smtEngine = _smtModel.CreateInteractiveEngine();

            _hybridEngine = new HybridTranslationEngine(smtEngine, transferEngine);

            var sourceTexts          = new List <IText>();
            var targetTexts          = new List <IText>();
            var alignmentCollections = new List <ITextAlignmentCollection>();

            using (_texts.BulkUpdate())
            {
                foreach (XElement textElem in projectElem.Elements("Texts").Elements("Text"))
                {
                    var name = (string)textElem.Attribute("name");

                    var metadataFileName = (string)textElem.Element("MetadataFile");
                    if (metadataFileName == null)
                    {
                        return(false);
                    }
                    metadataFileName = Path.Combine(configDir, metadataFileName);

                    var srcTextFileName = (string)textElem.Element("SourceFile");
                    if (srcTextFileName == null)
                    {
                        return(false);
                    }
                    srcTextFileName = Path.Combine(configDir, srcTextFileName);

                    var trgTextFileName = (string)textElem.Element("TargetFile");
                    if (trgTextFileName == null)
                    {
                        return(false);
                    }
                    trgTextFileName = Path.Combine(configDir, trgTextFileName);

                    var alignmentsFileName = (string)textElem.Element("AlignmentsFile");
                    if (alignmentsFileName != null)
                    {
                        alignmentsFileName = Path.Combine(configDir, alignmentsFileName);
                    }

                    var text = new TextViewModel(_tokenizer, name, metadataFileName, srcTextFileName, trgTextFileName,
                                                 alignmentsFileName)
                    {
                        Engine = _hybridEngine
                    };
                    text.PropertyChanged += TextPropertyChanged;
                    _texts.Add(text);

                    Func <TextSegment, bool> segmentFilter = s => text.IsApproved((TextSegmentRef)s.SegmentRef);
                    sourceTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, srcTextFileName),
                                                     segmentFilter));
                    targetTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, trgTextFileName),
                                                     segmentFilter));
                    if (alignmentsFileName != null)
                    {
                        alignmentCollections.Add(new TextFileTextAlignmentCollection(name, alignmentsFileName));
                    }
                }
            }
            if (_texts.Count == 0)
            {
                return(false);
            }

            _sourceCorpus    = new DictionaryTextCorpus(sourceTexts);
            _targetCorpus    = new DictionaryTextCorpus(targetTexts);
            _alignmentCorpus = new DictionaryTextAlignmentCorpus(alignmentCollections);

            CurrentText = _texts[0];
            AcceptChanges();
            RebuildTask.UpdateCanExecute();
            return(true);
        }
示例#8
0
        protected override int ExecuteCommand()
        {
            int code = base.ExecuteCommand();

            if (code != 0)
            {
                return(code);
            }

            if (!File.Exists(EngineConfigFileName))
            {
                Out.WriteLine("The specified engine directory is invalid.");
                return(1);
            }

            double confidenceThreshold = 0.2;

            if (_confidenceOption.HasValue())
            {
                if (!double.TryParse(_confidenceOption.Value(), out confidenceThreshold))
                {
                    Out.WriteLine("The specified confidence is invalid.");
                    return(1);
                }
            }

            int n = 1;

            if (_nOption.HasValue())
            {
                if (!int.TryParse(_nOption.Value(), out n))
                {
                    Out.WriteLine("The specified number of suggestions is invalid.");
                    return(1);
                }
            }

            if (_traceOption.HasValue())
            {
                if (!Directory.Exists(_traceOption.Value()))
                {
                    Directory.CreateDirectory(_traceOption.Value());
                }
            }

            var suggester = new PhraseTranslationSuggester()
            {
                ConfidenceThreshold = confidenceThreshold
            };

            int parallelCorpusCount = GetParallelCorpusCount();

            var watch = Stopwatch.StartNew();

            if (!_quietOption.HasValue())
            {
                Out.Write("Testing... ");
            }
            int segmentCount = 0;

            _acceptedSuggestionCounts = new int[n];
            using (ConsoleProgressBar progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out))
                using (IInteractiveSmtModel smtModel = new ThotSmtModel(EngineConfigFileName))
                    using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
                    {
                        progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount));
                        foreach (ParallelText text in ParallelCorpus.Texts)
                        {
                            using (StreamWriter traceWriter = CreateTraceWriter(text))
                            {
                                foreach (ParallelTextSegment segment in text.Segments.Where(s => !s.IsEmpty))
                                {
                                    TestSegment(engine, suggester, n, segment, traceWriter);
                                    segmentCount++;
                                    progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount));
                                    if (segmentCount == MaxParallelCorpusCount)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (segmentCount == MaxParallelCorpusCount)
                            {
                                break;
                            }
                        }
                    }
            if (!_quietOption.HasValue())
            {
                Out.WriteLine("done.");
            }
            watch.Stop();

            Out.WriteLine($"Execution time: {watch.Elapsed:c}");
            Out.WriteLine($"# of Segments: {segmentCount}");
            Out.WriteLine($"# of Suggestions: {_totalSuggestionCount}");
            Out.WriteLine($"# of Correct Suggestions: {_totalAcceptedSuggestionCount}");
            Out.WriteLine("Correct Suggestion Types");
            double fullPcnt = (double)_fullSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Full: {fullPcnt:0.0000}");
            double initPcnt = (double)_initSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Initial: {initPcnt:0.0000}");
            double finalPcnt = (double)_finalSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Final: {finalPcnt:0.0000}");
            double middlePcnt = (double)_middleSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Middle: {middlePcnt:0.0000}");
            Out.WriteLine("Correct Suggestion N");
            for (int i = 0; i < _acceptedSuggestionCounts.Length; i++)
            {
                double pcnt = (double)_acceptedSuggestionCounts[i] / _totalAcceptedSuggestionCount;
                Out.WriteLine($"-{i + 1}: {pcnt:0.0000}");
            }
            double ksmr = (double)_actionCount / _charCount;

            Out.WriteLine($"KSMR: {ksmr:0.0000}");
            double precision = (double)_totalAcceptedSuggestionCount / _totalSuggestionCount;

            Out.WriteLine($"Precision: {precision:0.0000}");
            return(0);
        }
示例#9
0
        private void TestSegment(IInteractiveSmtEngine engine, ITranslationSuggester suggester, int n,
                                 ParallelTextSegment segment, StreamWriter traceWriter)
        {
            traceWriter?.WriteLine($"Segment:      {segment.SegmentRef}");
            IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(Preprocessors.Lowercase);

            traceWriter?.WriteLine($"Source:       {string.Join(" ", sourceSegment)}");
            IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(Preprocessors.Lowercase);

            traceWriter?.WriteLine($"Target:       {string.Join(" ", targetSegment)}");
            traceWriter?.WriteLine(new string('=', 120));
            string[][] prevSuggestionWords  = null;
            bool       isLastWordSuggestion = false;
            string     suggestionResult     = null;

            using (IInteractiveTranslationSession session = engine.TranslateInteractively(n, sourceSegment))
            {
                while (session.Prefix.Count < targetSegment.Count || !session.IsLastWordComplete)
                {
                    int targetIndex = session.Prefix.Count;
                    if (!session.IsLastWordComplete)
                    {
                        targetIndex--;
                    }

                    bool match = false;
                    TranslationSuggestion[] suggestions = suggester.GetSuggestions(session).ToArray();
                    string[][] suggestionWords          = suggestions.Select((s, k) =>
                                                                             s.TargetWordIndices.Select(j =>
                                                                                                        session.CurrentResults[k].TargetSegment[j]).ToArray()).ToArray();
                    if (prevSuggestionWords == null || !SuggestionsAreEqual(prevSuggestionWords, suggestionWords))
                    {
                        WritePrefix(traceWriter, suggestionResult, session.Prefix);
                        WriteSuggestions(traceWriter, session, suggestions);
                        suggestionResult = null;
                        if (suggestions.Any(s => s.TargetWordIndices.Count > 0))
                        {
                            _totalSuggestionCount++;
                        }
                    }
                    for (int k = 0; k < suggestions.Length; k++)
                    {
                        TranslationSuggestion suggestion = suggestions[k];
                        var accepted = new List <int>();
                        for (int i = 0, j = targetIndex; i < suggestionWords[k].Length && j < targetSegment.Count; i++)
                        {
                            if (suggestionWords[k][i] == targetSegment[j])
                            {
                                accepted.Add(suggestion.TargetWordIndices[i]);
                                j++;
                            }
                            else if (accepted.Count == 0)
                            {
                                j = targetIndex;
                            }
                            else
                            {
                                break;
                            }
                        }

                        if (accepted.Count > 0)
                        {
                            session.AppendSuggestionToPrefix(k, accepted);
                            isLastWordSuggestion = true;
                            _actionCount++;
                            _totalAcceptedSuggestionCount++;
                            if (accepted.Count == suggestion.TargetWordIndices.Count)
                            {
                                suggestionResult = "ACCEPT_FULL";
                                _fullSuggestionCount++;
                            }
                            else if (accepted[0] == suggestion.TargetWordIndices[0])
                            {
                                suggestionResult = "ACCEPT_INIT";
                                _initSuggestionCount++;
                            }
                            else if (accepted[accepted.Count - 1]
                                     == suggestion.TargetWordIndices[suggestion.TargetWordIndices.Count - 1])
                            {
                                suggestionResult = "ACCEPT_FIN";
                                _finalSuggestionCount++;
                            }
                            else
                            {
                                suggestionResult = "ACCEPT_MID";
                                _middleSuggestionCount++;
                            }
                            _acceptedSuggestionCounts[k]++;
                            match = true;
                            break;
                        }
                    }

                    if (!match)
                    {
                        if (isLastWordSuggestion)
                        {
                            _actionCount++;
                            isLastWordSuggestion = false;
                            WritePrefix(traceWriter, suggestionResult, session.Prefix);
                            suggestionResult = null;
                        }

                        int    len        = session.IsLastWordComplete ? 0 : session.Prefix[session.Prefix.Count - 1].Length;
                        string targetWord = targetSegment[targetIndex];
                        if (len == targetWord.Length)
                        {
                            session.AppendToPrefix("", true);
                        }
                        else
                        {
                            string c = targetWord.Substring(len, 1);
                            session.AppendToPrefix(c, false);
                        }

                        suggestionResult = suggestions.Any(s => s.TargetWordIndices.Count > 0) ? "REJECT" : "NONE";
                        _actionCount++;
                    }

                    prevSuggestionWords = suggestionWords;
                }

                WritePrefix(traceWriter, suggestionResult, session.Prefix);

                session.Approve(_approveAlignedOption.HasValue());
            }

            _charCount += targetSegment.Sum(w => w.Length + 1);
            traceWriter?.WriteLine();
        }