Ejemplo n.º 1
0
        private void TrainAlignmentModels()
        {
            string tmDir = Path.Combine(EngineDirectory, "tm");

            if (!Directory.Exists(tmDir))
            {
                Directory.CreateDirectory(tmDir);
            }

            string tmPrefix            = Path.Combine(tmDir, "src_trg");
            int    parallelCorpusCount = GetParallelCorpusCount();

            if (!_quietOption.HasValue())
            {
                Out.Write("Training... ");
            }
            using (ConsoleProgressBar progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out))
                using (var alignmentModel = new ThotSymmetrizedWordAlignmentModel(tmPrefix + "_invswm", tmPrefix + "_swm",
                                                                                  true))
                {
                    alignmentModel.AddSegmentPairs(ParallelCorpus, Preprocessors.Lowercase, Preprocessors.Lowercase,
                                                   MaxParallelCorpusCount);
                    alignmentModel.Train(progress);
                    alignmentModel.Save();
                }
            if (!_quietOption.HasValue())
            {
                Out.WriteLine("done.");
            }

            Out.WriteLine($"# of Segments Trained: {parallelCorpusCount}");
        }
Ejemplo n.º 2
0
        protected override int ExecuteCommand()
        {
            int code = base.ExecuteCommand();

            if (code != 0)
            {
                return(code);
            }

            if (!_outputOption.HasValue())
            {
                Out.WriteLine("The output alignment directory was not specified");
                return(1);
            }

            if (!Directory.Exists(_outputOption.Value()))
            {
                Directory.CreateDirectory(_outputOption.Value());
            }

            int parallelCorpusCount = GetParallelCorpusCount();

            string tmPrefix = Path.Combine(EngineDirectory, "tm", "src_trg");

            Out.Write("Aligning... ");
            using (var progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out))
                using (var alignmentModel = new ThotSymmetrizedWordAlignmentModel(tmPrefix + "_invswm", tmPrefix + "_swm"))
                {
                    int segmentCount = 0;
                    progress?.Report(new ProgressData(segmentCount, parallelCorpusCount));
                    foreach (ParallelText text in ParallelCorpus.Texts)
                    {
                        string fileName = Path.Combine(_outputOption.Value(), text.Id + ".txt");
                        using (var writer = new StreamWriter(fileName))
                        {
                            foreach (ParallelTextSegment segment in text.Segments)
                            {
                                if (segment.IsEmpty)
                                {
                                    writer.WriteLine();
                                }
                                else
                                {
                                    writer.WriteLine(alignmentModel.GetAlignmentString(segment, _probOption.HasValue(),
                                                                                       Preprocessors.Lowercase, Preprocessors.Lowercase));
                                    segmentCount++;
                                    progress?.Report(new ProgressData(segmentCount, parallelCorpusCount));
                                    if (segmentCount == MaxParallelCorpusCount)
                                    {
                                        break;
                                    }
                                }
                            }
                        }
                        if (segmentCount == MaxParallelCorpusCount)
                        {
                            break;
                        }
                    }
                }
            Out.WriteLine("done.");

            return(0);
        }