Exemplo n.º 1
0
        private void TrainTranslationModel(string tmPrefix, ThotTrainProgressReporter reporter)
        {
            string invswmPrefix = tmPrefix + "_invswm";

            GenerateSingleWordAlignmentModel(invswmPrefix, _sourcePreprocessor, _targetPreprocessor, _parallelCorpus,
                                             "direct", reporter);

            string swmPrefix = tmPrefix + "_swm";

            GenerateSingleWordAlignmentModel(swmPrefix, _targetPreprocessor, _sourcePreprocessor,
                                             _parallelCorpus.Invert(), "inverse", reporter);

            reporter.Step("Merging alignments");

            Thot.giza_symmetr1(swmPrefix + ".bestal", invswmPrefix + ".bestal", tmPrefix + ".A3.final", true);

            reporter.Step("Generating phrase table");

            Thot.phraseModel_generate(tmPrefix + ".A3.final", 10, tmPrefix + ".ttable");

            reporter.Step("Filtering phrase table");

            FilterPhraseTableNBest(tmPrefix + ".ttable", 20);

            File.WriteAllText(tmPrefix + ".lambda", "0.7 0.7");
            File.WriteAllText(tmPrefix + ".srcsegmlentable", "Uniform");
            File.WriteAllText(tmPrefix + ".trgcutstable", "0.999");
            File.WriteAllText(tmPrefix + ".trgsegmlentable", "Geometric");
        }
Exemplo n.º 2
0
        private void TrainTranslationModel(string tmPrefix, ThotTrainProgressReporter reporter)
        {
            string invswmPrefix = tmPrefix + "_invswm";

            GenerateWordAlignmentModel(invswmPrefix, _sourcePreprocessor, _targetPreprocessor, _parallelCorpus,
                                       reporter, false);

            string swmPrefix = tmPrefix + "_swm";

            GenerateWordAlignmentModel(swmPrefix, _targetPreprocessor, _sourcePreprocessor, _parallelCorpus.Invert(),
                                       reporter, true);

            using (PhaseProgress phaseProgress = reporter.StartNextPhase())
                Thot.giza_symmetr1(swmPrefix + ".bestal", invswmPrefix + ".bestal", tmPrefix + ".A3.final", true);

            using (PhaseProgress phaseProgress = reporter.StartNextPhase())
                Thot.phraseModel_generate(tmPrefix + ".A3.final", 10, tmPrefix + ".ttable");

            using (PhaseProgress phaseProgress = reporter.StartNextPhase())
                FilterPhraseTableNBest(tmPrefix + ".ttable", 20);

            File.WriteAllText(tmPrefix + ".lambda", "0.7 0.7");
            File.WriteAllText(tmPrefix + ".srcsegmlentable", "Uniform");
            File.WriteAllText(tmPrefix + ".trgcutstable", "0.999");
            File.WriteAllText(tmPrefix + ".trgsegmlentable", "Geometric");
        }