private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name, ThotTrainProgressReporter reporter) { reporter.Step($"Generating best {name} alignments"); using (var model = new ThotWordAlignmentModel(swmPrefix)) using (var writer = new StreamWriter(fileName)) { foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n"); writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor)); reporter.CheckCanceled(); } } }
private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor, Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress) { using (var model = new ThotWordAlignmentModel(swmPrefix)) using (var writer = new StreamWriter(fileName)) { int i = 0; foreach (ParallelTextSegment segment in GetTrainingSegments(corpus)) { progress.Report(new ProgressStatus(i, Stats.TrainedSegmentCount)); writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n"); writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor)); i++; } progress.Report(new ProgressStatus(1.0)); } }