Ejemplo n.º 1
0
        private void UpdateCorrectionFromArc(TranslationResultBuilder builder, WordGraphArc arc, bool isPrefix,
                                             int alignmentColsToAddCount)
        {
            for (int i = 0; i < arc.Words.Count; i++)
            {
                builder.AppendWord(arc.Words[i], arc.WordConfidences[i], !isPrefix && arc.IsUnknown);
            }

            WordAlignmentMatrix alignment = arc.Alignment;

            if (alignmentColsToAddCount > 0)
            {
                var newAlignment = new WordAlignmentMatrix(alignment.RowCount,
                                                           alignment.ColumnCount + alignmentColsToAddCount);
                for (int j = 0; j < alignment.ColumnCount; j++)
                {
                    for (int i = 0; i < alignment.RowCount; i++)
                    {
                        newAlignment[i, alignmentColsToAddCount + j] = alignment[i, j];
                    }
                }
                alignment = newAlignment;
            }

            builder.MarkPhrase(arc.SourceSegmentRange, alignment);
        }
Ejemplo n.º 2
0
        private void AddBestUncorrectedPrefixState(TranslationResultBuilder builder, int procPrefixPos, int state)
        {
            var arcs = new Stack <WordGraphArc>();

            int curState         = state;
            int curProcPrefixPos = procPrefixPos;

            while (curState != 0)
            {
                int          arcIndex = _stateBestPrevArcs[curState][curProcPrefixPos];
                WordGraphArc arc      = _wordGraph.Arcs[arcIndex];

                for (int i = arc.Words.Count - 1; i >= 0; i--)
                {
                    IReadOnlyList <int> predPrefixWords = _arcEcmScoreInfos[arcIndex][i].GetLastInsPrefixWordFromEsi();
                    curProcPrefixPos = predPrefixWords[curProcPrefixPos];
                }

                arcs.Push(arc);

                curState = arc.PrevState;
            }

            foreach (WordGraphArc arc in arcs)
            {
                UpdateCorrectionFromArc(builder, arc, true, 0);
            }
        }
Ejemplo n.º 3
0
        private void BuildCorrectionFromHypothesis(TranslationResultBuilder builder, string[] prefix,
                                                   bool isLastWordComplete, Hypothesis hypothesis)
        {
            int uncorrectedPrefixLen;

            if (hypothesis.StartArcIndex == -1)
            {
                AddBestUncorrectedPrefixState(builder, prefix.Length, hypothesis.StartState);
                uncorrectedPrefixLen = builder.Words.Count;
            }
            else
            {
                AddBestUncorrectedPrefixSubState(builder, prefix.Length, hypothesis.StartArcIndex,
                                                 hypothesis.StartArcWordIndex);
                WordGraphArc firstArc = _wordGraph.Arcs[hypothesis.StartArcIndex];
                uncorrectedPrefixLen = builder.Words.Count - (firstArc.Words.Count - hypothesis.StartArcWordIndex) + 1;
            }

            int alignmentColsToAddCount = _ecm.CorrectPrefix(builder, uncorrectedPrefixLen, prefix, isLastWordComplete);

            foreach (WordGraphArc arc in hypothesis.Arcs)
            {
                UpdateCorrectionFromArc(builder, arc, false, alignmentColsToAddCount);
                alignmentColsToAddCount = 0;
            }
        }
        public void CorrectPrefix_EmptyUncorrectedPrefix_AppendsPrefix()
        {
            TranslationResultBuilder builder = CreateResultBuilder(string.Empty);

            string[] prefix = "this is a test".Split();
            Assert.That(_ecm.CorrectPrefix(builder, builder.Words.Count, prefix, true), Is.EqualTo(4));
            Assert.That(builder.Confidences.Count, Is.EqualTo(prefix.Length));
            Assert.That(builder.Words, Is.EqualTo(prefix));
            Assert.That(builder.Phrases.Count, Is.EqualTo(0));
        }
        public void Estimate(IReadOnlyList <string> sourceSegment, TranslationResultBuilder builder)
        {
            WordGraph            wordGraph            = _smtEngine.GetWordGraph(sourceSegment);
            WordGraphConfidences wordGraphConfidences = ComputeWordGraphConfidences(wordGraph);

            for (int j = 0; j < builder.Words.Count; j++)
            {
                builder.SetConfidence(j, wordGraphConfidences.GetConfidence(builder.Words[j]));
            }
        }
Ejemplo n.º 6
0
        private void AddBestUncorrectedPrefixSubState(TranslationResultBuilder builder, int procPrefixPos,
                                                      int arcIndex, int arcWordIndex)
        {
            WordGraphArc arc = _wordGraph.Arcs[arcIndex];

            int curProcPrefixPos = procPrefixPos;

            for (int i = arcWordIndex; i >= 0; i--)
            {
                IReadOnlyList <int> predPrefixWords = _arcEcmScoreInfos[arcIndex][i].GetLastInsPrefixWordFromEsi();
                curProcPrefixPos = predPrefixWords[curProcPrefixPos];
            }

            AddBestUncorrectedPrefixState(builder, curProcPrefixPos, arc.PrevState);

            UpdateCorrectionFromArc(builder, arc, true, 0);
        }
        public void CorrectPrefix_SubstringUncorrectedPrefixNewEndWord_InsertsWordAtEnd()
        {
            TranslationResultBuilder ti = CreateResultBuilder("this is a and only a test", 2, 3, 5, 7);

            string[] prefix = "this is a test".Split();
            Assert.That(_ecm.CorrectPrefix(ti, 3, prefix, true), Is.EqualTo(0));
            Assert.That(ti.Confidences.Count, Is.EqualTo(8));
            Assert.That(ti.Words, Is.EqualTo("this is a test and only a test".Split()));
            Assert.That(ti.Phrases.Count, Is.EqualTo(4));
            Assert.That(ti.Phrases[0].TargetCut, Is.EqualTo(2));
            Assert.That(ti.Phrases[0].Alignment.ColumnCount, Is.EqualTo(2));
            Assert.That(ti.Phrases[1].TargetCut, Is.EqualTo(3));
            Assert.That(ti.Phrases[1].Alignment.ColumnCount, Is.EqualTo(1));
            Assert.That(ti.Phrases[2].TargetCut, Is.EqualTo(6));
            Assert.That(ti.Phrases[2].Alignment.ColumnCount, Is.EqualTo(3));
            Assert.That(ti.Phrases[3].TargetCut, Is.EqualTo(8));
            Assert.That(ti.Phrases[3].Alignment.ColumnCount, Is.EqualTo(2));
        }
Ejemplo n.º 8
0
        public int CorrectPrefix(TranslationResultBuilder builder, int uncorrectedPrefixLen, string[] prefix,
                                 bool isLastWordComplete)
        {
            if (uncorrectedPrefixLen == 0)
            {
                foreach (string w in prefix)
                {
                    builder.AppendWord(w);
                }
                return(prefix.Length);
            }

            IEnumerable <EditOperation> wordOps, charOps;

            _segmentEditDistance.ComputePrefix(builder.Words.Take(uncorrectedPrefixLen).ToArray(), prefix,
                                               isLastWordComplete, false, out wordOps, out charOps);
            return(builder.CorrectPrefix(wordOps, charOps, prefix, isLastWordComplete));
        }
        public void Estimate(IReadOnlyList <string> sourceSegment, TranslationResultBuilder builder)
        {
            var range = Range <int> .Create(0, sourceSegment.Count);

            int startIndex = 0;

            foreach (PhraseInfo phrase in builder.Phrases)
            {
                if (PhraseOnly)
                {
                    range = phrase.SourceSegmentRange;
                }

                for (int j = startIndex; j < phrase.TargetCut; j++)
                {
                    double confidence = GetConfidence(sourceSegment, range, builder.Words[j]);
                    builder.SetConfidence(j, confidence);
                }
            }
        }
        private static TranslationResultBuilder CreateResultBuilder(string target, params int[] cuts)
        {
            var builder = new TranslationResultBuilder();

            if (!string.IsNullOrEmpty(target))
            {
                int      i     = 0;
                int      k     = 0;
                string[] words = target.Split();
                for (int j = 0; j < words.Length; j++)
                {
                    builder.AppendWord(words[j], 1);
                    int cut = j + 1;
                    if (k < cuts.Length && cuts[k] == cut)
                    {
                        int len = cut - i;
                        builder.MarkPhrase(Range <int> .Create(i, cut), new WordAlignmentMatrix(len, len));
                        k++;
                        i = cut;
                    }
                }
            }
            return(builder);
        }
Ejemplo n.º 11
0
        public IEnumerable <TranslationResult> Correct(string[] prefix, bool isLastWordComplete, int n)
        {
            // get valid portion of the processed prefix vector
            int validProcPrefixCount = 0;

            for (int i = 0; i < _prevPrefix.Length; i++)
            {
                if (i >= prefix.Length)
                {
                    break;
                }

                if (i == _prevPrefix.Length - 1 && i == prefix.Length - 1)
                {
                    if (_prevPrefix[i] == prefix[i] && _prevIsLastWordComplete == isLastWordComplete)
                    {
                        validProcPrefixCount++;
                    }
                }
                else if (_prevPrefix[i] == prefix[i])
                {
                    validProcPrefixCount++;
                }
            }

            int diffSize = _prevPrefix.Length - validProcPrefixCount;

            if (diffSize > 0)
            {
                // adjust size of info for arcs
                foreach (List <EcmScoreInfo> esis in _arcEcmScoreInfos)
                {
                    foreach (EcmScoreInfo esi in esis)
                    {
                        for (int i = 0; i < diffSize; i++)
                        {
                            esi.RemoveLast();
                        }
                    }
                }

                // adjust size of info for states
                foreach (int state in _statesInvolvedInArcs)
                {
                    for (int i = 0; i < diffSize; i++)
                    {
                        _stateEcmScoreInfos[state].RemoveLast();
                        _stateBestScores[state].RemoveAt(_stateBestScores[state].Count - 1);
                        _stateBestPrevArcs[state].RemoveAt(_stateBestPrevArcs[state].Count - 1);
                    }
                }
            }

            // get difference between prefix and valid portion of processed prefix
            var prefixDiff = new string[prefix.Length - validProcPrefixCount];

            for (int i = 0; i < prefixDiff.Length; i++)
            {
                prefixDiff[i] = prefix[validProcPrefixCount + i];
            }

            // process word-graph given prefix difference
            ProcessWordGraphForPrefixDiff(prefixDiff, isLastWordComplete);

            _prevPrefix             = prefix.ToArray();
            _prevIsLastWordComplete = isLastWordComplete;

            var queue = new PriorityQueue <Hypothesis>(1000);

            GetStateHypotheses(queue);
            GetSubStateHypotheses(queue);

            foreach (Hypothesis hypothesis in NBestSearch(n, queue))
            {
                var builder = new TranslationResultBuilder();
                BuildCorrectionFromHypothesis(builder, prefix, isLastWordComplete, hypothesis);
                yield return(builder.ToResult(_sourceSegment, prefix.Length));
            }
        }