Example #1
0
 public BatchTrainer(ThotSmtModel smtModel, ThotSmtParameters parameters,
                     Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor,
                     ParallelTextCorpus corpus)
     : base(parameters, sourcePreprocessor, targetPreprocessor, corpus)
 {
     _smtModel = smtModel;
 }
Example #2
0
 public void GetWordGraph_EmptySegment_ReturnsEmptyGraph()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (ISmtEngine engine = smtModel.CreateEngine())
         {
             WordGraph wordGraph = engine.GetWordGraph(new string[0]);
             Assert.That(wordGraph.IsEmpty, Is.True);
         }
 }
Example #3
0
 public void GetBestPhraseAlignment_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (ISmtEngine engine = smtModel.CreateEngine())
         {
             TranslationResult result = engine.GetBestPhraseAlignment("esto es una prueba .".Split(), "this is a test .".Split());
             Assert.That(result.TargetSegment, Is.EqualTo("this is a test .".Split()));
         }
 }
Example #4
0
 public void Translate_NBest_TranslationsCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (ISmtEngine engine = smtModel.CreateEngine())
         {
             IEnumerable <TranslationResult> results = engine.Translate(2, "hablé hasta cinco en punto .".Split());
             Assert.That(results.Select(tr => tr.TargetSegment), Is.EqualTo(new[] { "hablé until five o ' clock .".Split(), "hablé until five o ' clock for".Split() }));
         }
 }
Example #5
0
 public void Translate_NBestLessThanN_TranslationsCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (ISmtEngine engine = smtModel.CreateEngine())
         {
             IEnumerable <TranslationResult> results = engine.Translate(3, "voy a marcharme hoy por la tarde .".Split());
             Assert.That(results.Select(tr => tr.TargetSegment), Is.EqualTo(new[] { "i am leaving today in the afternoon .".Split() }));
         }
 }
Example #6
0
 public void Translate_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (ISmtEngine engine = smtModel.CreateEngine())
         {
             TranslationResult result = engine.Translate("voy a marcharme hoy por la tarde .".Split());
             Assert.That(result.TargetSegment, Is.EqualTo("i am leaving today in the afternoon .".Split()));
         }
 }
Example #7
0
 public ThotSmtEngine(ThotSmtModel smtModel)
 {
     _smtModel = smtModel;
     _sessions = new HashSet <ThotInteractiveTranslationSession>();
     LoadHandle();
     _segmentAligner      = new FuzzyEditDistanceSegmentAligner(GetTranslationProbability);
     _confidenceEstimator = new Ibm1WordConfidenceEstimator(GetTranslationProbability);
     //_confidenceEstimator = new WppWordConfidenceEstimator(this);
     ErrorCorrectionModel = new ErrorCorrectionModel();
 }
 public void TranslateInteractively_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
             using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                           "me marcho hoy por la tarde .".Split()))
             {
                 TranslationResult result = session.CurrentResults[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("i leave today in the afternoon .".Split()));
             }
 }
 public void SetPrefix_MissingWord_TranslationCorrect()
 {
     using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
         using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
             using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                           "caminé a mi habitación .".Split()))
             {
                 TranslationResult result = session.CurrentResults[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("caminé to my room .".Split()));
                 result = session.SetPrefix("i walked".Split(), true)[0];
                 Assert.That(result.TargetSegment, Is.EqualTo("i walked to my room .".Split()));
             }
 }
Example #10
0
        public void TrainSegment_AlignmentSpecified_TranslationCorrect()
        {
            using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
                using (ISmtEngine engine = smtModel.CreateEngine())
                {
                    TranslationResult result = engine.Translate("maria no dio una bofetada a la bruja verde .".Split());
                    Assert.That(result.TargetSegment, Is.EqualTo("maria no dio a bofetada to bruja verde .".Split()));

                    var matrix = new WordAlignmentMatrix(10, 7, AlignmentType.Unknown);
                    SetAligned(matrix, 1, 1);
                    SetAligned(matrix, 2, 2);
                    SetAligned(matrix, 3, 2);
                    SetAligned(matrix, 4, 2);
                    SetSourceNotAligned(matrix, 5);
                    SetAligned(matrix, 8, 4);
                    engine.TrainSegment("maria no dio una bofetada a la bruja verde .".Split(), "mary didn't slap the green witch .".Split(), matrix);
                    result = engine.Translate("maria es una bruja .".Split());
                    Assert.That(result.TargetSegment, Is.EqualTo("mary is a witch .".Split()));
                }
        }
Example #11
0
        private void TrainTuneCorpus(string trainTMPrefix, string trainLMPrefix,
                                     IReadOnlyList <IReadOnlyList <string> > tuneSourceCorpus,
                                     IReadOnlyList <IReadOnlyList <string> > tuneTargetCorpus, ThotTrainProgressReporter reporter)
        {
            reporter.Step("Finalizing", TrainingStepCount - 1);

            if (tuneSourceCorpus.Count == 0)
            {
                return;
            }

            ThotSmtParameters parameters = Parameters.Clone();

            parameters.TranslationModelFileNamePrefix = trainTMPrefix;
            parameters.LanguageModelFileNamePrefix    = trainLMPrefix;
            using (var smtModel = new ThotSmtModel(parameters))
                using (ISmtEngine engine = smtModel.CreateEngine())
                {
                    for (int i = 0; i < tuneSourceCorpus.Count; i++)
                    {
                        engine.TrainSegment(tuneSourceCorpus[i], tuneTargetCorpus[i]);
                    }
                }
        }
Example #12
0
        private void TrainTuneCorpus(string trainTMPrefix, string trainLMPrefix,
                                     IReadOnlyList <IReadOnlyList <string> > tuneSourceCorpus,
                                     IReadOnlyList <IReadOnlyList <string> > tuneTargetCorpus, IProgress <ProgressStatus> progress)
        {
            if (tuneSourceCorpus.Count == 0)
            {
                return;
            }

            ThotSmtParameters parameters = Parameters.Clone();

            parameters.TranslationModelFileNamePrefix = trainTMPrefix;
            parameters.LanguageModelFileNamePrefix    = trainLMPrefix;
            using (var smtModel = new ThotSmtModel(parameters))
                using (ISmtEngine engine = smtModel.CreateEngine())
                {
                    for (int i = 0; i < tuneSourceCorpus.Count; i++)
                    {
                        progress.Report(new ProgressStatus(i, tuneSourceCorpus.Count));
                        engine.TrainSegment(tuneSourceCorpus[i], tuneTargetCorpus[i]);
                    }
                    progress.Report(new ProgressStatus(1.0));
                }
        }
        public void Approve_TwoSegmentsUnknownWord_LearnsUnknownWord()
        {
            using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName))
                using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
                {
                    using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                                  "hablé con recepción .".Split()))
                    {
                        TranslationResult result = session.CurrentResults[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("hablé with reception .".Split()));
                        result = session.SetPrefix("i talked".Split(), true)[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("i talked with reception .".Split()));
                        session.SetPrefix("i talked with reception .".Split(), true);
                        session.Approve();
                    }

                    using (IInteractiveTranslationSession session = engine.TranslateInteractively(1,
                                                                                                  "hablé hasta cinco en punto .".Split()))
                    {
                        TranslationResult result = session.CurrentResults[0];
                        Assert.That(result.TargetSegment, Is.EqualTo("talked until five o ' clock .".Split()));
                    }
                }
        }
Example #14
0
 public BatchTrainer(ThotSmtModel smtModel, string cfgFileName, Func <string, string> sourcePreprocessor,
                     Func <string, string> targetPreprocessor, ParallelTextCorpus corpus)
     : base(cfgFileName, sourcePreprocessor, targetPreprocessor, corpus)
 {
     _smtModel = smtModel;
 }