private void UpdateCorrectionFromArc(TranslationResultBuilder builder, WordGraphArc arc, bool isPrefix, int alignmentColsToAddCount) { for (int i = 0; i < arc.Words.Count; i++) { builder.AppendWord(arc.Words[i], arc.WordConfidences[i], !isPrefix && arc.IsUnknown); } WordAlignmentMatrix alignment = arc.Alignment; if (alignmentColsToAddCount > 0) { var newAlignment = new WordAlignmentMatrix(alignment.RowCount, alignment.ColumnCount + alignmentColsToAddCount); for (int j = 0; j < alignment.ColumnCount; j++) { for (int i = 0; i < alignment.RowCount; i++) { newAlignment[i, alignmentColsToAddCount + j] = alignment[i, j]; } } alignment = newAlignment; } builder.MarkPhrase(arc.SourceSegmentRange, alignment); }
private void AddBestUncorrectedPrefixState(TranslationResultBuilder builder, int procPrefixPos, int state) { var arcs = new Stack <WordGraphArc>(); int curState = state; int curProcPrefixPos = procPrefixPos; while (curState != 0) { int arcIndex = _stateBestPrevArcs[curState][curProcPrefixPos]; WordGraphArc arc = _wordGraph.Arcs[arcIndex]; for (int i = arc.Words.Count - 1; i >= 0; i--) { IReadOnlyList <int> predPrefixWords = _arcEcmScoreInfos[arcIndex][i].GetLastInsPrefixWordFromEsi(); curProcPrefixPos = predPrefixWords[curProcPrefixPos]; } arcs.Push(arc); curState = arc.PrevState; } foreach (WordGraphArc arc in arcs) { UpdateCorrectionFromArc(builder, arc, true, 0); } }
private void BuildCorrectionFromHypothesis(TranslationResultBuilder builder, string[] prefix, bool isLastWordComplete, Hypothesis hypothesis) { int uncorrectedPrefixLen; if (hypothesis.StartArcIndex == -1) { AddBestUncorrectedPrefixState(builder, prefix.Length, hypothesis.StartState); uncorrectedPrefixLen = builder.Words.Count; } else { AddBestUncorrectedPrefixSubState(builder, prefix.Length, hypothesis.StartArcIndex, hypothesis.StartArcWordIndex); WordGraphArc firstArc = _wordGraph.Arcs[hypothesis.StartArcIndex]; uncorrectedPrefixLen = builder.Words.Count - (firstArc.Words.Count - hypothesis.StartArcWordIndex) + 1; } int alignmentColsToAddCount = _ecm.CorrectPrefix(builder, uncorrectedPrefixLen, prefix, isLastWordComplete); foreach (WordGraphArc arc in hypothesis.Arcs) { UpdateCorrectionFromArc(builder, arc, false, alignmentColsToAddCount); alignmentColsToAddCount = 0; } }
public void CorrectPrefix_EmptyUncorrectedPrefix_AppendsPrefix() { TranslationResultBuilder builder = CreateResultBuilder(string.Empty); string[] prefix = "this is a test".Split(); Assert.That(_ecm.CorrectPrefix(builder, builder.Words.Count, prefix, true), Is.EqualTo(4)); Assert.That(builder.Confidences.Count, Is.EqualTo(prefix.Length)); Assert.That(builder.Words, Is.EqualTo(prefix)); Assert.That(builder.Phrases.Count, Is.EqualTo(0)); }
public void Estimate(IReadOnlyList <string> sourceSegment, TranslationResultBuilder builder) { WordGraph wordGraph = _smtEngine.GetWordGraph(sourceSegment); WordGraphConfidences wordGraphConfidences = ComputeWordGraphConfidences(wordGraph); for (int j = 0; j < builder.Words.Count; j++) { builder.SetConfidence(j, wordGraphConfidences.GetConfidence(builder.Words[j])); } }
private void AddBestUncorrectedPrefixSubState(TranslationResultBuilder builder, int procPrefixPos, int arcIndex, int arcWordIndex) { WordGraphArc arc = _wordGraph.Arcs[arcIndex]; int curProcPrefixPos = procPrefixPos; for (int i = arcWordIndex; i >= 0; i--) { IReadOnlyList <int> predPrefixWords = _arcEcmScoreInfos[arcIndex][i].GetLastInsPrefixWordFromEsi(); curProcPrefixPos = predPrefixWords[curProcPrefixPos]; } AddBestUncorrectedPrefixState(builder, curProcPrefixPos, arc.PrevState); UpdateCorrectionFromArc(builder, arc, true, 0); }
public void CorrectPrefix_SubstringUncorrectedPrefixNewEndWord_InsertsWordAtEnd() { TranslationResultBuilder ti = CreateResultBuilder("this is a and only a test", 2, 3, 5, 7); string[] prefix = "this is a test".Split(); Assert.That(_ecm.CorrectPrefix(ti, 3, prefix, true), Is.EqualTo(0)); Assert.That(ti.Confidences.Count, Is.EqualTo(8)); Assert.That(ti.Words, Is.EqualTo("this is a test and only a test".Split())); Assert.That(ti.Phrases.Count, Is.EqualTo(4)); Assert.That(ti.Phrases[0].TargetCut, Is.EqualTo(2)); Assert.That(ti.Phrases[0].Alignment.ColumnCount, Is.EqualTo(2)); Assert.That(ti.Phrases[1].TargetCut, Is.EqualTo(3)); Assert.That(ti.Phrases[1].Alignment.ColumnCount, Is.EqualTo(1)); Assert.That(ti.Phrases[2].TargetCut, Is.EqualTo(6)); Assert.That(ti.Phrases[2].Alignment.ColumnCount, Is.EqualTo(3)); Assert.That(ti.Phrases[3].TargetCut, Is.EqualTo(8)); Assert.That(ti.Phrases[3].Alignment.ColumnCount, Is.EqualTo(2)); }
public int CorrectPrefix(TranslationResultBuilder builder, int uncorrectedPrefixLen, string[] prefix, bool isLastWordComplete) { if (uncorrectedPrefixLen == 0) { foreach (string w in prefix) { builder.AppendWord(w); } return(prefix.Length); } IEnumerable <EditOperation> wordOps, charOps; _segmentEditDistance.ComputePrefix(builder.Words.Take(uncorrectedPrefixLen).ToArray(), prefix, isLastWordComplete, false, out wordOps, out charOps); return(builder.CorrectPrefix(wordOps, charOps, prefix, isLastWordComplete)); }
public void Estimate(IReadOnlyList <string> sourceSegment, TranslationResultBuilder builder) { var range = Range <int> .Create(0, sourceSegment.Count); int startIndex = 0; foreach (PhraseInfo phrase in builder.Phrases) { if (PhraseOnly) { range = phrase.SourceSegmentRange; } for (int j = startIndex; j < phrase.TargetCut; j++) { double confidence = GetConfidence(sourceSegment, range, builder.Words[j]); builder.SetConfidence(j, confidence); } } }
private static TranslationResultBuilder CreateResultBuilder(string target, params int[] cuts) { var builder = new TranslationResultBuilder(); if (!string.IsNullOrEmpty(target)) { int i = 0; int k = 0; string[] words = target.Split(); for (int j = 0; j < words.Length; j++) { builder.AppendWord(words[j], 1); int cut = j + 1; if (k < cuts.Length && cuts[k] == cut) { int len = cut - i; builder.MarkPhrase(Range <int> .Create(i, cut), new WordAlignmentMatrix(len, len)); k++; i = cut; } } } return(builder); }
public IEnumerable <TranslationResult> Correct(string[] prefix, bool isLastWordComplete, int n) { // get valid portion of the processed prefix vector int validProcPrefixCount = 0; for (int i = 0; i < _prevPrefix.Length; i++) { if (i >= prefix.Length) { break; } if (i == _prevPrefix.Length - 1 && i == prefix.Length - 1) { if (_prevPrefix[i] == prefix[i] && _prevIsLastWordComplete == isLastWordComplete) { validProcPrefixCount++; } } else if (_prevPrefix[i] == prefix[i]) { validProcPrefixCount++; } } int diffSize = _prevPrefix.Length - validProcPrefixCount; if (diffSize > 0) { // adjust size of info for arcs foreach (List <EcmScoreInfo> esis in _arcEcmScoreInfos) { foreach (EcmScoreInfo esi in esis) { for (int i = 0; i < diffSize; i++) { esi.RemoveLast(); } } } // adjust size of info for states foreach (int state in _statesInvolvedInArcs) { for (int i = 0; i < diffSize; i++) { _stateEcmScoreInfos[state].RemoveLast(); _stateBestScores[state].RemoveAt(_stateBestScores[state].Count - 1); _stateBestPrevArcs[state].RemoveAt(_stateBestPrevArcs[state].Count - 1); } } } // get difference between prefix and valid portion of processed prefix var prefixDiff = new string[prefix.Length - validProcPrefixCount]; for (int i = 0; i < prefixDiff.Length; i++) { prefixDiff[i] = prefix[validProcPrefixCount + i]; } // process word-graph given prefix difference ProcessWordGraphForPrefixDiff(prefixDiff, isLastWordComplete); _prevPrefix = prefix.ToArray(); _prevIsLastWordComplete = isLastWordComplete; var queue = new PriorityQueue <Hypothesis>(1000); GetStateHypotheses(queue); GetSubStateHypotheses(queue); foreach (Hypothesis hypothesis in NBestSearch(n, queue)) { var builder = new TranslationResultBuilder(); BuildCorrectionFromHypothesis(builder, prefix, isLastWordComplete, hypothesis); yield return(builder.ToResult(_sourceSegment, prefix.Length)); } }