示例#1
0
        private bool LoadProject(string fileName)
        {
            XElement projectElem;

            try
            {
                projectElem = XElement.Load(fileName);
            }
            catch (Exception)
            {
                return(false);
            }

            XElement engineElem = projectElem.Element("TranslationEngine");

            if (engineElem == null)
            {
                return(false);
            }

            var smtConfig = (string)engineElem.Element("SmtConfig");

            if (smtConfig == null)
            {
                return(false);
            }

            var hcSrcConfig = (string)engineElem.Element("SourceAnalyzerConfig");
            var hcTrgConfig = (string)engineElem.Element("TargetGeneratorConfig");

            string configDir = Path.GetDirectoryName(fileName);

            Debug.Assert(configDir != null);

            ITranslationEngine transferEngine = null;

            if (hcSrcConfig != null && hcTrgConfig != null)
            {
                Language srcLang    = XmlLanguageLoader.Load(Path.Combine(configDir, hcSrcConfig));
                var      srcMorpher = new Morpher(_hcTraceManager, srcLang);

                Language trgLang    = XmlLanguageLoader.Load(Path.Combine(configDir, hcTrgConfig));
                var      trgMorpher = new Morpher(_hcTraceManager, trgLang);

                transferEngine = new TransferEngine(srcMorpher,
                                                    new SimpleTransferer(new GlossMorphemeMapper(trgMorpher)), trgMorpher);
            }

            _smtModel = new ThotSmtModel(Path.Combine(configDir, smtConfig));
            IInteractiveSmtEngine smtEngine = _smtModel.CreateInteractiveEngine();

            _hybridEngine = new HybridTranslationEngine(smtEngine, transferEngine);

            var sourceTexts          = new List <IText>();
            var targetTexts          = new List <IText>();
            var alignmentCollections = new List <ITextAlignmentCollection>();

            using (_texts.BulkUpdate())
            {
                foreach (XElement textElem in projectElem.Elements("Texts").Elements("Text"))
                {
                    var name = (string)textElem.Attribute("name");

                    var metadataFileName = (string)textElem.Element("MetadataFile");
                    if (metadataFileName == null)
                    {
                        return(false);
                    }
                    metadataFileName = Path.Combine(configDir, metadataFileName);

                    var srcTextFileName = (string)textElem.Element("SourceFile");
                    if (srcTextFileName == null)
                    {
                        return(false);
                    }
                    srcTextFileName = Path.Combine(configDir, srcTextFileName);

                    var trgTextFileName = (string)textElem.Element("TargetFile");
                    if (trgTextFileName == null)
                    {
                        return(false);
                    }
                    trgTextFileName = Path.Combine(configDir, trgTextFileName);

                    var alignmentsFileName = (string)textElem.Element("AlignmentsFile");
                    if (alignmentsFileName != null)
                    {
                        alignmentsFileName = Path.Combine(configDir, alignmentsFileName);
                    }

                    var text = new TextViewModel(_tokenizer, name, metadataFileName, srcTextFileName, trgTextFileName,
                                                 alignmentsFileName)
                    {
                        Engine = _hybridEngine
                    };
                    text.PropertyChanged += TextPropertyChanged;
                    _texts.Add(text);

                    Func <TextSegment, bool> segmentFilter = s => text.IsApproved((TextSegmentRef)s.SegmentRef);
                    sourceTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, srcTextFileName),
                                                     segmentFilter));
                    targetTexts.Add(new FilteredText(new TextFileText(_tokenizer, name, trgTextFileName),
                                                     segmentFilter));
                    if (alignmentsFileName != null)
                    {
                        alignmentCollections.Add(new TextFileTextAlignmentCollection(name, alignmentsFileName));
                    }
                }
            }
            if (_texts.Count == 0)
            {
                return(false);
            }

            _sourceCorpus    = new DictionaryTextCorpus(sourceTexts);
            _targetCorpus    = new DictionaryTextCorpus(targetTexts);
            _alignmentCorpus = new DictionaryTextAlignmentCorpus(alignmentCollections);

            CurrentText = _texts[0];
            AcceptChanges();
            RebuildTask.UpdateCanExecute();
            return(true);
        }
示例#2
0
        protected override int ExecuteCommand()
        {
            int code = base.ExecuteCommand();

            if (code != 0)
            {
                return(code);
            }

            if (!File.Exists(EngineConfigFileName))
            {
                Out.WriteLine("The specified engine directory is invalid.");
                return(1);
            }

            double confidenceThreshold = 0.2;

            if (_confidenceOption.HasValue())
            {
                if (!double.TryParse(_confidenceOption.Value(), out confidenceThreshold))
                {
                    Out.WriteLine("The specified confidence is invalid.");
                    return(1);
                }
            }

            int n = 1;

            if (_nOption.HasValue())
            {
                if (!int.TryParse(_nOption.Value(), out n))
                {
                    Out.WriteLine("The specified number of suggestions is invalid.");
                    return(1);
                }
            }

            if (_traceOption.HasValue())
            {
                if (!Directory.Exists(_traceOption.Value()))
                {
                    Directory.CreateDirectory(_traceOption.Value());
                }
            }

            var suggester = new PhraseTranslationSuggester()
            {
                ConfidenceThreshold = confidenceThreshold
            };

            int parallelCorpusCount = GetParallelCorpusCount();

            var watch = Stopwatch.StartNew();

            if (!_quietOption.HasValue())
            {
                Out.Write("Testing... ");
            }
            int segmentCount = 0;

            _acceptedSuggestionCounts = new int[n];
            using (ConsoleProgressBar progress = _quietOption.HasValue() ? null : new ConsoleProgressBar(Out))
                using (IInteractiveSmtModel smtModel = new ThotSmtModel(EngineConfigFileName))
                    using (IInteractiveSmtEngine engine = smtModel.CreateInteractiveEngine())
                    {
                        progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount));
                        foreach (ParallelText text in ParallelCorpus.Texts)
                        {
                            using (StreamWriter traceWriter = CreateTraceWriter(text))
                            {
                                foreach (ParallelTextSegment segment in text.Segments.Where(s => !s.IsEmpty))
                                {
                                    TestSegment(engine, suggester, n, segment, traceWriter);
                                    segmentCount++;
                                    progress?.Report(new ProgressStatus(segmentCount, parallelCorpusCount));
                                    if (segmentCount == MaxParallelCorpusCount)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (segmentCount == MaxParallelCorpusCount)
                            {
                                break;
                            }
                        }
                    }
            if (!_quietOption.HasValue())
            {
                Out.WriteLine("done.");
            }
            watch.Stop();

            Out.WriteLine($"Execution time: {watch.Elapsed:c}");
            Out.WriteLine($"# of Segments: {segmentCount}");
            Out.WriteLine($"# of Suggestions: {_totalSuggestionCount}");
            Out.WriteLine($"# of Correct Suggestions: {_totalAcceptedSuggestionCount}");
            Out.WriteLine("Correct Suggestion Types");
            double fullPcnt = (double)_fullSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Full: {fullPcnt:0.0000}");
            double initPcnt = (double)_initSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Initial: {initPcnt:0.0000}");
            double finalPcnt = (double)_finalSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Final: {finalPcnt:0.0000}");
            double middlePcnt = (double)_middleSuggestionCount / _totalAcceptedSuggestionCount;

            Out.WriteLine($"-Middle: {middlePcnt:0.0000}");
            Out.WriteLine("Correct Suggestion N");
            for (int i = 0; i < _acceptedSuggestionCounts.Length; i++)
            {
                double pcnt = (double)_acceptedSuggestionCounts[i] / _totalAcceptedSuggestionCount;
                Out.WriteLine($"-{i + 1}: {pcnt:0.0000}");
            }
            double ksmr = (double)_actionCount / _charCount;

            Out.WriteLine($"KSMR: {ksmr:0.0000}");
            double precision = (double)_totalAcceptedSuggestionCount / _totalSuggestionCount;

            Out.WriteLine($"Precision: {precision:0.0000}");
            return(0);
        }