public void TranslateInteractively_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "me marcho hoy por la tarde .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("i leave today in the afternoon .".Split())); } }
public void SetPrefix_MissingWord_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "caminé a mi habitación .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("caminé to my room .".Split())); result = session.SetPrefix("i walked".Split(), true)[0]; Assert.That(result.TargetSegment, Is.EqualTo("i walked to my room .".Split())); } }
private void Load() { if (_isLoaded) { return; } _smtModel = _smtModelFactory.Create(this); _smtEngine = _smtModel.CreateInteractiveEngine(); _ruleEngine = _ruleEngineFactory.Create(this); _hybridEngine = new HybridTranslationEngine(_smtEngine, _ruleEngine); _isLoaded = true; }
public void Approve_TwoSegmentsUnknownWord_LearnsUnknownWord() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) { using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "hablé con recepción .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("hablé with reception .".Split())); result = session.SetPrefix("i talked".Split(), true)[0]; Assert.That(result.TargetSegment, Is.EqualTo("i talked with reception .".Split())); session.SetPrefix("i talked with reception .".Split(), true); session.Approve(); } using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "hablé hasta cinco en punto .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("talked until five o ' clock .".Split())); } } }
private void Unload() { if (!_isLoaded) { return; } Save(); _hybridEngine.Dispose(); _hybridEngine = null; if (_ruleEngine != null) { _ruleEngine.Dispose(); _ruleEngine = null; } _smtEngine.Dispose(); _smtEngine = null; _smtModel.Dispose(); _smtModel = null; _isLoaded = false; }
public HybridTranslationEngine(IInteractiveSmtEngine smtEngine, ITranslationEngine ruleEngine = null) { SmtEngine = smtEngine; RuleEngine = ruleEngine; _sessions = new HashSet <HybridInteractiveTranslationSession>(); }
private bool LoadProject(string fileName) { XElement projectElem; try { projectElem = XElement.Load(fileName); } catch (Exception) { return(false); } XElement engineElem = projectElem.Element("TranslationEngine"); if (engineElem == null) { return(false); } var smtConfig = (string)engineElem.Element("SmtConfig"); if (smtConfig == null) { return(false); } var hcSrcConfig = (string)engineElem.Element("SourceAnalyzerConfig"); var hcTrgConfig = (string)engineElem.Element("TargetGeneratorConfig"); string configDir = Path.GetDirectoryName(fileName); Debug.Assert(configDir != null); ITranslationEngine transferEngine = null; if (hcSrcConfig != null && hcTrgConfig != null) { Language srcLang = XmlLanguageLoader.Load(Path.Combine(configDir, hcSrcConfig)); var srcMorpher = new Morpher(_hcTraceManager, srcLang); Language trgLang = XmlLanguageLoader.Load(Path.Combine(configDir, hcTrgConfig)); var trgMorpher = new Morpher(_hcTraceManager, trgLang); transferEngine = new TransferEngine(srcMorpher, new SimpleTransferer(new GlossMorphemeMapper(trgMorpher)), trgMorpher); } _smtModel = new ThotSmtModel(Path.Combine(configDir, smtConfig)); IInteractiveSmtEngine smtEngine = _smtModel.CreateInteractiveEngine(); _hybridEngine = new HybridTranslationEngine(smtEngine, transferEngine); var sourceTexts = new List <IText>(); var targetTexts = new List <IText>(); var alignmentCollections = new List <ITextAlignmentCollection>(); using (_texts.BulkUpdate()) { foreach (XElement textElem in projectElem.Elements("Texts").Elements("Text")) { var name = (string)textElem.Attribute("name"); var metadataFileName = (string)textElem.Element("MetadataFile"); if (metadataFileName == null) { return(false); } metadataFileName = Path.Combine(configDir, metadataFileName); var srcTextFileName = (string)textElem.Element("SourceFile"); if (srcTextFileName == null) { return(false); } srcTextFileName = Path.Combine(configDir, srcTextFileName); var trgTextFileName = (string)textElem.Element("TargetFile"); if (trgTextFileName == null) { return(false); } trgTextFileName = Path.Combine(configDir, trgTextFileName); var alignmentsFileName = (string)textElem.Element("AlignmentsFile"); if (alignmentsFileName != null) { alignmentsFileName = Path.Combine(configDir, alignmentsFileName); } var text = new TextViewModel(_tokenizer, name, metadataFileName, srcTextFileName, trgTextFileName, alignmentsFileName) { Engine = _hybridEngine }; text.PropertyChanged += TextPropertyChanged; _texts.Add(text); Func <TextSegment, bool> segmentFilter = s => text.IsApproved((TextSegmentRef)s.SegmentRef); sourceTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, srcTextFileName), segmentFilter)); targetTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, trgTextFileName), segmentFilter)); if (alignmentsFileName != null) { alignmentCollections.Add(new TextFileTextAlignmentCollection(name, alignmentsFileName)); } } } if (_texts.Count == 0) { return(false); } _sourceCorpus = new DictionaryTextCorpus(sourceTexts); _targetCorpus = new DictionaryTextCorpus(targetTexts); _alignmentCorpus = new DictionaryTextAlignmentCorpus(alignmentCollections); CurrentText = _texts[0]; AcceptChanges(); RebuildTask.UpdateCanExecute(); return(true); }
protected override int ExecuteCommand() { int code = base.ExecuteCommand(); if (code != 0) { return(code); } if (!File.Exists(EngineConfigFileName)) { Out.WriteLine("The specified engine directory is invalid."); return(1); } double confidenceThreshold = 0.2; if (_confidenceOption.HasValue()) { if (!double.TryParse(_confidenceOption.Value(), out confidenceThreshold)) { Out.WriteLine("The specified confidence is invalid."); return(1); } } int n = 1; if (_nOption.HasValue()) { if (!int.TryParse(_nOption.Value(), out n)) { Out.WriteLine("The specified number of suggestions is invalid."); return(1); } } if (_traceOption.HasValue()) { if (!Directory.Exists(_traceOption.Value())) { Directory.CreateDirectory(_traceOption.Value()); } } var suggester = new PhraseTranslationSuggester() { ConfidenceThreshold = confidenceThreshold }; int parallelCorpusCount = GetParallelCorpusCount(); var watch = Stopwatch.StartNew(); if (!_quietOption.HasValue()) { Out.Write("Testing... "); } int segmentCount = 0; _acceptedSuggestionCounts = new int[n]; using (ConsoleProgressBar progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out)) using (IInteractiveSmtModel smtModel = new ThotSmtModel(EngineConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) { progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount)); foreach (ParallelText text in ParallelCorpus.Texts) { using (StreamWriter traceWriter = CreateTraceWriter(text)) { foreach (ParallelTextSegment segment in text.Segments.Where(s => !s.IsEmpty)) { TestSegment(engine, suggester, n, segment, traceWriter); segmentCount++; progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount)); if (segmentCount == MaxParallelCorpusCount) { break; } } } if (segmentCount == MaxParallelCorpusCount) { break; } } } if (!_quietOption.HasValue()) { Out.WriteLine("done."); } watch.Stop(); Out.WriteLine($"Execution time: {watch.Elapsed:c}"); Out.WriteLine($"# of Segments: {segmentCount}"); Out.WriteLine($"# of Suggestions: {_totalSuggestionCount}"); Out.WriteLine($"# of Correct Suggestions: {_totalAcceptedSuggestionCount}"); Out.WriteLine("Correct Suggestion Types"); double fullPcnt = (double)_fullSuggestionCount / _totalAcceptedSuggestionCount; Out.WriteLine($"-Full: {fullPcnt:0.0000}"); double initPcnt = (double)_initSuggestionCount / _totalAcceptedSuggestionCount; Out.WriteLine($"-Initial: {initPcnt:0.0000}"); double finalPcnt = (double)_finalSuggestionCount / _totalAcceptedSuggestionCount; Out.WriteLine($"-Final: {finalPcnt:0.0000}"); double middlePcnt = (double)_middleSuggestionCount / _totalAcceptedSuggestionCount; Out.WriteLine($"-Middle: {middlePcnt:0.0000}"); Out.WriteLine("Correct Suggestion N"); for (int i = 0; i < _acceptedSuggestionCounts.Length; i++) { double pcnt = (double)_acceptedSuggestionCounts[i] / _totalAcceptedSuggestionCount; Out.WriteLine($"-{i + 1}: {pcnt:0.0000}"); } double ksmr = (double)_actionCount / _charCount; Out.WriteLine($"KSMR: {ksmr:0.0000}"); double precision = (double)_totalAcceptedSuggestionCount / _totalSuggestionCount; Out.WriteLine($"Precision: {precision:0.0000}"); return(0); }
private void TestSegment(IInteractiveSmtEngine engine, ITranslationSuggester suggester, int n, ParallelTextSegment segment, StreamWriter traceWriter) { traceWriter?.WriteLine($"Segment: {segment.SegmentRef}"); IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(Preprocessors.Lowercase); traceWriter?.WriteLine($"Source: {string.Join(" ", sourceSegment)}"); IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(Preprocessors.Lowercase); traceWriter?.WriteLine($"Target: {string.Join(" ", targetSegment)}"); traceWriter?.WriteLine(new string('=', 120)); string[][] prevSuggestionWords = null; bool isLastWordSuggestion = false; string suggestionResult = null; using (IInteractiveTranslationSession session = engine.TranslateInteractively(n, sourceSegment)) { while (session.Prefix.Count < targetSegment.Count || !session.IsLastWordComplete) { int targetIndex = session.Prefix.Count; if (!session.IsLastWordComplete) { targetIndex--; } bool match = false; TranslationSuggestion[] suggestions = suggester.GetSuggestions(session).ToArray(); string[][] suggestionWords = suggestions.Select((s, k) => s.TargetWordIndices.Select(j => session.CurrentResults[k].TargetSegment[j]).ToArray()).ToArray(); if (prevSuggestionWords == null || !SuggestionsAreEqual(prevSuggestionWords, suggestionWords)) { WritePrefix(traceWriter, suggestionResult, session.Prefix); WriteSuggestions(traceWriter, session, suggestions); suggestionResult = null; if (suggestions.Any(s => s.TargetWordIndices.Count > 0)) { _totalSuggestionCount++; } } for (int k = 0; k < suggestions.Length; k++) { TranslationSuggestion suggestion = suggestions[k]; var accepted = new List <int>(); for (int i = 0, j = targetIndex; i < suggestionWords[k].Length && j < targetSegment.Count; i++) { if (suggestionWords[k][i] == targetSegment[j]) { accepted.Add(suggestion.TargetWordIndices[i]); j++; } else if (accepted.Count == 0) { j = targetIndex; } else { break; } } if (accepted.Count > 0) { session.AppendSuggestionToPrefix(k, accepted); isLastWordSuggestion = true; _actionCount++; _totalAcceptedSuggestionCount++; if (accepted.Count == suggestion.TargetWordIndices.Count) { suggestionResult = "ACCEPT_FULL"; _fullSuggestionCount++; } else if (accepted[0] == suggestion.TargetWordIndices[0]) { suggestionResult = "ACCEPT_INIT"; _initSuggestionCount++; } else if (accepted[accepted.Count - 1] == suggestion.TargetWordIndices[suggestion.TargetWordIndices.Count - 1]) { suggestionResult = "ACCEPT_FIN"; _finalSuggestionCount++; } else { suggestionResult = "ACCEPT_MID"; _middleSuggestionCount++; } _acceptedSuggestionCounts[k]++; match = true; break; } } if (!match) { if (isLastWordSuggestion) { _actionCount++; isLastWordSuggestion = false; WritePrefix(traceWriter, suggestionResult, session.Prefix); suggestionResult = null; } int len = session.IsLastWordComplete ? 0 : session.Prefix[session.Prefix.Count - 1].Length; string targetWord = targetSegment[targetIndex]; if (len == targetWord.Length) { session.AppendToPrefix("", true); } else { string c = targetWord.Substring(len, 1); session.AppendToPrefix(c, false); } suggestionResult = suggestions.Any(s => s.TargetWordIndices.Count > 0) ? "REJECT" : "NONE"; _actionCount++; } prevSuggestionWords = suggestionWords; } WritePrefix(traceWriter, suggestionResult, session.Prefix); session.Approve(_approveAlignedOption.HasValue()); } _charCount += targetSegment.Sum(w => w.Length + 1); traceWriter?.WriteLine(); }