public BatchTrainer(ThotSmtModel smtModel, ThotSmtParameters parameters, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus) : base(parameters, sourcePreprocessor, targetPreprocessor, corpus) { _smtModel = smtModel; }
public void GetWordGraph_EmptySegment_ReturnsEmptyGraph() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { WordGraph wordGraph = engine.GetWordGraph(new string[0]); Assert.That(wordGraph.IsEmpty, Is.True); } }
public void GetBestPhraseAlignment_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { TranslationResult result = engine.GetBestPhraseAlignment("esto es una prueba .".Split(), "this is a test .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("this is a test .".Split())); } }
public void Translate_NBest_TranslationsCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { IEnumerable <TranslationResult> results = engine.Translate(2, "hablé hasta cinco en punto .".Split()); Assert.That(results.Select(tr => tr.TargetSegment), Is.EqualTo(new[] { "hablé until five o ' clock .".Split(), "hablé until five o ' clock for".Split() })); } }
public void Translate_NBestLessThanN_TranslationsCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { IEnumerable <TranslationResult> results = engine.Translate(3, "voy a marcharme hoy por la tarde .".Split()); Assert.That(results.Select(tr => tr.TargetSegment), Is.EqualTo(new[] { "i am leaving today in the afternoon .".Split() })); } }
public void Translate_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { TranslationResult result = engine.Translate("voy a marcharme hoy por la tarde .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("i am leaving today in the afternoon .".Split())); } }
public ThotSmtEngine(ThotSmtModel smtModel) { _smtModel = smtModel; _sessions = new HashSet <ThotInteractiveTranslationSession>(); LoadHandle(); _segmentAligner = new FuzzyEditDistanceSegmentAligner(GetTranslationProbability); _confidenceEstimator = new Ibm1WordConfidenceEstimator(GetTranslationProbability); //_confidenceEstimator = new WppWordConfidenceEstimator(this); ErrorCorrectionModel = new ErrorCorrectionModel(); }
public void TranslateInteractively_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "me marcho hoy por la tarde .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("i leave today in the afternoon .".Split())); } }
public void SetPrefix_MissingWord_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "caminé a mi habitación .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("caminé to my room .".Split())); result = session.SetPrefix("i walked".Split(), true)[0]; Assert.That(result.TargetSegment, Is.EqualTo("i walked to my room .".Split())); } }
public void TrainSegment_AlignmentSpecified_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { TranslationResult result = engine.Translate("maria no dio una bofetada a la bruja verde .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("maria no dio a bofetada to bruja verde .".Split())); var matrix = new WordAlignmentMatrix(10, 7, AlignmentType.Unknown); SetAligned(matrix, 1, 1); SetAligned(matrix, 2, 2); SetAligned(matrix, 3, 2); SetAligned(matrix, 4, 2); SetSourceNotAligned(matrix, 5); SetAligned(matrix, 8, 4); engine.TrainSegment("maria no dio una bofetada a la bruja verde .".Split(), "mary didn't slap the green witch .".Split(), matrix); result = engine.Translate("maria es una bruja .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("mary is a witch .".Split())); } }
private void TrainTuneCorpus(string trainTMPrefix, string trainLMPrefix, IReadOnlyList <IReadOnlyList <string> > tuneSourceCorpus, IReadOnlyList <IReadOnlyList <string> > tuneTargetCorpus, ThotTrainProgressReporter reporter) { reporter.Step("Finalizing", TrainingStepCount - 1); if (tuneSourceCorpus.Count == 0) { return; } ThotSmtParameters parameters = Parameters.Clone(); parameters.TranslationModelFileNamePrefix = trainTMPrefix; parameters.LanguageModelFileNamePrefix = trainLMPrefix; using (var smtModel = new ThotSmtModel(parameters)) using (ISmtEngine engine = smtModel.CreateEngine()) { for (int i = 0; i < tuneSourceCorpus.Count; i++) { engine.TrainSegment(tuneSourceCorpus[i], tuneTargetCorpus[i]); } } }
private void TrainTuneCorpus(string trainTMPrefix, string trainLMPrefix, IReadOnlyList <IReadOnlyList <string> > tuneSourceCorpus, IReadOnlyList <IReadOnlyList <string> > tuneTargetCorpus, IProgress <ProgressStatus> progress) { if (tuneSourceCorpus.Count == 0) { return; } ThotSmtParameters parameters = Parameters.Clone(); parameters.TranslationModelFileNamePrefix = trainTMPrefix; parameters.LanguageModelFileNamePrefix = trainLMPrefix; using (var smtModel = new ThotSmtModel(parameters)) using (ISmtEngine engine = smtModel.CreateEngine()) { for (int i = 0; i < tuneSourceCorpus.Count; i++) { progress.Report(new ProgressStatus(i, tuneSourceCorpus.Count)); engine.TrainSegment(tuneSourceCorpus[i], tuneTargetCorpus[i]); } progress.Report(new ProgressStatus(1.0)); } }
public void Approve_TwoSegmentsUnknownWord_LearnsUnknownWord() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine()) { using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "hablé con recepción .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("hablé with reception .".Split())); result = session.SetPrefix("i talked".Split(), true)[0]; Assert.That(result.TargetSegment, Is.EqualTo("i talked with reception .".Split())); session.SetPrefix("i talked with reception .".Split(), true); session.Approve(); } using (IInteractiveTranslationSession session = engine.TranslateInteractively(1, "hablé hasta cinco en punto .".Split())) { TranslationResult result = session.CurrentResults[0]; Assert.That(result.TargetSegment, Is.EqualTo("talked until five o ' clock .".Split())); } } }
public BatchTrainer(ThotSmtModel smtModel, string cfgFileName, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus) : base(cfgFileName, sourcePreprocessor, targetPreprocessor, corpus) { _smtModel = smtModel; }