コード例 #1
0
 public void SourceWords_Count()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         Assert.That(swAlignModel.SourceWords.Count, Is.EqualTo(513));
     }
 }
コード例 #2
0
 public void TargetWords_Count()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         Assert.That(swAlignModel.TargetWords.Count, Is.EqualTo(363));
     }
 }
コード例 #3
0
 public void TargetWords_IndexAccessor()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         Assert.That(swAlignModel.TargetWords[0], Is.EqualTo("NULL"));
         Assert.That(swAlignModel.TargetWords[362], Is.EqualTo("pay"));
     }
 }
コード例 #4
0
 public void SourceWords_IndexAccessor()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         Assert.That(swAlignModel.SourceWords[0], Is.EqualTo("NULL"));
         Assert.That(swAlignModel.SourceWords[512], Is.EqualTo("pagar"));
     }
 }
コード例 #5
0
 public void GetTranslationProbability_ReturnsCorrectProbability()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         Assert.That(swAlignModel.GetTranslationProbability("esto", "this"), Is.EqualTo(0.0).Within(0.01));
         Assert.That(swAlignModel.GetTranslationProbability("es", "is"), Is.EqualTo(0.65).Within(0.01));
         Assert.That(swAlignModel.GetTranslationProbability("una", "a"), Is.EqualTo(0.70).Within(0.01));
         Assert.That(swAlignModel.GetTranslationProbability("prueba", "test"), Is.EqualTo(0.0).Within(0.01));
     }
 }
コード例 #6
0
 public void GetBestAlignment_ReturnsCorrectAlignment()
 {
     using (var swAlignModel = new ThotWordAlignmentModel(ModelPath))
     {
         string[]            sourceSegment = "por favor , ¿ podríamos ver otra habitación ?".Split(' ');
         string[]            targetSegment = "could we see another room , please ?".Split(' ');
         WordAlignmentMatrix waMatrix      = swAlignModel.GetBestAlignment(sourceSegment, targetSegment);
         Assert.That(waMatrix.ToGizaFormat(sourceSegment, targetSegment), Is.EqualTo("could we see another room , please ?\n"
                                                                                     + "NULL ({ }) por ({ 6 }) favor ({ 7 }) , ({ }) ¿ ({ 8 }) podríamos ({ 1 2 }) ver ({ 3 }) otra ({ 4 }) habitación ({ 5 }) ? ({ })\n"));
     }
 }
コード例 #7
0
        public ThotSmtModel(ThotSmtParameters parameters)
        {
            Parameters = parameters;
            Parameters.Freeze();

            Handle = Thot.LoadSmtModel(Parameters);

            _directWordAlignmentModel = new ThotWordAlignmentModel(
                Thot.smtModel_getSingleWordAlignmentModel(Handle));
            _inverseWordAlignmentModel = new ThotWordAlignmentModel(
                Thot.smtModel_getInverseSingleWordAlignmentModel(Handle));
        }
コード例 #8
0
 private void TrainWordAlignmentModel(string swmPrefix, Func <string, string> sourcePreprocessor,
                                      Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress)
 {
     using (var model = new ThotWordAlignmentModel(swmPrefix, true))
     {
         foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
         {
             model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor);
         }
         model.Train(progress);
         model.Save();
     }
 }
コード例 #9
0
        private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor,
                                            Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name,
                                            ThotTrainProgressReporter reporter)
        {
            reporter.Step($"Generating best {name} alignments");

            using (var model = new ThotWordAlignmentModel(swmPrefix))
                using (var writer = new StreamWriter(fileName))
                {
                    foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
                    {
                        writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n");
                        writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor));

                        reporter.CheckCanceled();
                    }
                }
        }
コード例 #10
0
        private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor,
                                            Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress)
        {
            using (var model = new ThotWordAlignmentModel(swmPrefix))
                using (var writer = new StreamWriter(fileName))
                {
                    int i = 0;
                    foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
                    {
                        progress.Report(new ProgressStatus(i, Stats.TrainedSegmentCount));

                        writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n");
                        writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor));
                        i++;
                    }
                    progress.Report(new ProgressStatus(1.0));
                }
        }
コード例 #11
0
        private void TrainWordAlignmentModel(string swmPrefix, Func <string, string> sourcePreprocessor,
                                             Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name,
                                             ThotTrainProgressReporter reporter)
        {
            using (var model = new ThotWordAlignmentModel(swmPrefix, true))
            {
                foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
                {
                    model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor);
                }
                for (int i = 0; i < 5; i++)
                {
                    reporter.Step($"Training {name} alignment model");

                    model.TrainingIteration();
                }
                model.Save();
            }
        }