Esempio n. 1
0
        private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor,
                                            Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, string name,
                                            ThotTrainProgressReporter reporter)
        {
            reporter.Step($"Generating best {name} alignments");

            using (var model = new ThotWordAlignmentModel(swmPrefix))
                using (var writer = new StreamWriter(fileName))
                {
                    foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
                    {
                        writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n");
                        writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor));

                        reporter.CheckCanceled();
                    }
                }
        }
Esempio n. 2
0
        private void GenerateBestAlignments(string swmPrefix, string fileName, Func <string, string> sourcePreprocessor,
                                            Func <string, string> targetPreprocessor, ParallelTextCorpus corpus, IProgress <ProgressStatus> progress)
        {
            using (var model = new ThotWordAlignmentModel(swmPrefix))
                using (var writer = new StreamWriter(fileName))
                {
                    int i = 0;
                    foreach (ParallelTextSegment segment in GetTrainingSegments(corpus))
                    {
                        progress.Report(new ProgressStatus(i, Stats.TrainedSegmentCount));

                        writer.Write($"# {segment.Text.Id} {segment.SegmentRef}\n");
                        writer.Write(model.GetGizaFormatString(segment, sourcePreprocessor, targetPreprocessor));
                        i++;
                    }
                    progress.Report(new ProgressStatus(1.0));
                }
        }