public void SourceWords_Count() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { Assert.That(swAlignModel.SourceWords.Count, Is.EqualTo(513)); } }
public void TargetWords_Count() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { Assert.That(swAlignModel.TargetWords.Count, Is.EqualTo(363)); } }
public void TargetWords_IndexAccessor() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { Assert.That(swAlignModel.TargetWords[0], Is.EqualTo("NULL")); Assert.That(swAlignModel.TargetWords[362], Is.EqualTo("pay")); } }
public void SourceWords_IndexAccessor() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { Assert.That(swAlignModel.SourceWords[0], Is.EqualTo("NULL")); Assert.That(swAlignModel.SourceWords[512], Is.EqualTo("pagar")); } }
public void GetTranslationProbability_ReturnsCorrectProbability() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { Assert.That(swAlignModel.GetTranslationProbability("esto", "this"), Is.EqualTo(0.0).Within(0.01)); Assert.That(swAlignModel.GetTranslationProbability("es", "is"), Is.EqualTo(0.65).Within(0.01)); Assert.That(swAlignModel.GetTranslationProbability("una", "a"), Is.EqualTo(0.70).Within(0.01)); Assert.That(swAlignModel.GetTranslationProbability("prueba", "test"), Is.EqualTo(0.0).Within(0.01)); } }
public void GetBestAlignment_ReturnsCorrectAlignment() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { string[] sourceSegment = "por favor , ¿ podríamos ver otra habitación ?".Split(' '); string[] targetSegment = "could we see another room , please ?".Split(' '); WordAlignmentMatrix waMatrix = swAlignModel.GetBestAlignment(sourceSegment, targetSegment); Assert.That(waMatrix.ToGizaFormat(sourceSegment, targetSegment), Is.EqualTo("could we see another room , please ?\n" + "NULL ({ }) por ({ 6 }) favor ({ 7 }) , ({ }) ¿ ({ 8 }) podríamos ({ 1 2 }) ver ({ 3 }) otra ({ 4 }) habitación ({ 5 }) ? ({ })\n")); } }
public ThotSmtModel(ThotSmtParameters parameters) { Parameters = parameters; Parameters.Freeze(); Handle = Thot.LoadSmtModel(Parameters); _directWordAlignmentModel = new ThotWordAlignmentModel( Thot.smtModel_getSingleWordAlignmentModel(Handle)); _inverseWordAlignmentModel = new ThotWordAlignmentModel( Thot.smtModel_getInverseSingleWordAlignmentModel(Handle)); }
private void TrainWordAlignmentModel(string swmPrefix, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress) { using (var model = new ThotWordAlignmentModel(swmPrefix, true)) { foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor); } model.Train(progress); model.Save(); } }
private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name, ThotTrainProgressReporter reporter) { reporter.Step($"Generating best {name} alignments"); using (var model = new ThotWordAlignmentModel(swmPrefix)) using (var writer = new StreamWriter(fileName)) { foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n"); writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor)); reporter.CheckCanceled(); } } }
private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress) { using (var model = new ThotWordAlignmentModel(swmPrefix)) using (var writer = new StreamWriter(fileName)) { int i = 0; foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { progress.Report(new ProgressStatus(i, Stats.TrainedSegmentCount)); writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n"); writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor)); i++; } progress.Report(new ProgressStatus(1.0)); } }
private void TrainWordAlignmentModel(string swmPrefix, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name, ThotTrainProgressReporter reporter) { using (var model = new ThotWordAlignmentModel(swmPrefix, true)) { foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor); } for (int i = 0; i < 5; i++) { reporter.Step($"Training {name} alignment model"); model.TrainingIteration(); } model.Save(); } }