private void TrainAlignmentModels() { string tmDir = Path.Combine(EngineDirectory, "tm"); if (!Directory.Exists(tmDir)) { Directory.CreateDirectory(tmDir); } string tmPrefix = Path.Combine(tmDir, "src_trg"); int parallelCorpusCount = GetParallelCorpusCount(); if (!_quietOption.HasValue()) { Out.Write("Training... "); } using (ConsoleProgressBar progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out)) using (var alignmentModel = new ThotSymmetrizedWordAlignmentModel(tmPrefix + "_invswm", tmPrefix + "_swm", true)) { alignmentModel.AddSegmentPairs(ParallelCorpus, Preprocessors.Lowercase, Preprocessors.Lowercase, MaxParallelCorpusCount); alignmentModel.Train(progress); alignmentModel.Save(); } if (!_quietOption.HasValue()) { Out.WriteLine("done."); } Out.WriteLine($"# of Segments Trained: {parallelCorpusCount}"); }
protected override int ExecuteCommand() { int code = base.ExecuteCommand(); if (code != 0) { return(code); } if (!_outputOption.HasValue()) { Out.WriteLine("The output alignment directory was not specified"); return(1); } if (!Directory.Exists(_outputOption.Value())) { Directory.CreateDirectory(_outputOption.Value()); } int parallelCorpusCount = GetParallelCorpusCount(); string tmPrefix = Path.Combine(EngineDirectory, "tm", "src_trg"); Out.Write("Aligning... "); using (var progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out)) using (var alignmentModel = new ThotSymmetrizedWordAlignmentModel(tmPrefix + "_invswm", tmPrefix + "_swm")) { int segmentCount = 0; progress?.Report(new ProgressData(segmentCount, parallelCorpusCount)); foreach (ParallelText text in ParallelCorpus.Texts) { string fileName = Path.Combine(_outputOption.Value(), text.Id + ".txt"); using (var writer = new StreamWriter(fileName)) { foreach (ParallelTextSegment segment in text.Segments) { if (segment.IsEmpty) { writer.WriteLine(); } else { writer.WriteLine(alignmentModel.GetAlignmentString(segment, _probOption.HasValue(), Preprocessors.Lowercase, Preprocessors.Lowercase)); segmentCount++; progress?.Report(new ProgressData(segmentCount, parallelCorpusCount)); if (segmentCount == MaxParallelCorpusCount) { break; } } } } if (segmentCount == MaxParallelCorpusCount) { break; } } } Out.WriteLine("done."); return(0); }