internal WordAlignmentMatrix GetHintMatrix(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment, TranslationResult ruleResult) { TranslationResult smtResult = SmtEngine.GetBestPhraseAlignment(sourceSegment, targetSegment); TranslationResult hybridResult = ruleResult == null ? smtResult : smtResult.Merge(targetSegment.Count, RuleEngineThreshold, ruleResult); var matrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count, AlignmentType.Unknown); var iAligned = new HashSet <int>(); for (int j = 0; j < targetSegment.Count; j++) { bool jAligned = false; if (j < hybridResult.WordSources.Count && (hybridResult.WordSources[j] & TranslationSources.Transfer) != 0) { foreach (int i in hybridResult.Alignment.GetColumnAlignedIndices(j)) { matrix[i, j] = AlignmentType.Aligned; iAligned.Add(i); jAligned = true; } } if (jAligned) { for (int i = 0; i < sourceSegment.Count; i++) { if (matrix[i, j] == AlignmentType.Unknown) { matrix[i, j] = AlignmentType.NotAligned; } } } } foreach (int i in iAligned) { for (int j = 0; j < targetSegment.Count; j++) { if (matrix[i, j] == AlignmentType.Unknown) { matrix[i, j] = AlignmentType.NotAligned; } } } return(matrix); }
public TranslationResult Translate(IReadOnlyList <string> segment) { CheckDisposed(); TranslationResult smtResult = SmtEngine.Translate(segment); if (RuleEngine == null) { return(smtResult); } TranslationResult ruleResult = RuleEngine.Translate(segment); return(smtResult.Merge(0, RuleEngineThreshold, ruleResult)); }
public IEnumerable <TranslationResult> Translate(int n, IReadOnlyList <string> segment) { CheckDisposed(); TranslationResult ruleResult = null; foreach (TranslationResult smtResult in SmtEngine.Translate(n, segment)) { if (RuleEngine == null) { yield return(smtResult); } else { if (ruleResult == null) { ruleResult = RuleEngine.Translate(segment); } yield return(smtResult.Merge(0, RuleEngineThreshold, ruleResult)); } } }
public string[] UpdatePrefix(string prefix) { if (!IsInitialized) { throw new InvalidOperationException("The session has not been initialized."); } Range <int>[] tokenRanges = _engine.TargetWordTokenizer.Tokenize(prefix).ToArray(); Prefix = tokenRanges.Select(s => prefix.Substring(s.Start, s.Length)).ToArray(); IsLastWordComplete = tokenRanges.Length == 0 || tokenRanges[tokenRanges.Length - 1].End != prefix.Length; TranslationResult smtResult = _wordGraphProcessor.Correct(Prefix, IsLastWordComplete, 1).FirstOrDefault(); if (smtResult == null) { var builder = new TranslationResultBuilder(); smtResult = builder.ToResult(SourceSegment, Prefix.Length); } if (RuleResult == null) { _curResult = smtResult; } else { int prefixCount = Prefix.Length; if (!IsLastWordComplete) { prefixCount--; } _curResult = smtResult.Merge(prefixCount, RuleEngineThreshold, RuleResult); } UpdateSuggestion(); return(Suggestion); }
private int ComputeLookahead(int prefixCount, TranslationResult result) { int lookaheadCount = 1; int i = -1, j; for (j = prefixCount; j < result.TargetSegment.Count; j++) { int[] sourceIndices = result.Alignment.GetColumnAlignedIndices(j).ToArray(); if (sourceIndices.Length == 0) { lookaheadCount++; } else { lookaheadCount += sourceIndices.Length - 1; foreach (int ti in sourceIndices) { if (i == -1 || ti < i) { i = ti; } } } } if (i == -1) { i = 0; } for (; i < result.SourceSegment.Count; i++) { if (result.Alignment.IsRowAligned(i) == AlignmentType.NotAligned) { lookaheadCount++; } } return(lookaheadCount); }
private static void TranslateInteractively_Success(Assert assert) { var httpClient = new MockHttpClient(); var resultDto = new InteractiveTranslationResultDto { WordGraph = new WordGraphDto { InitialStateScore = -111.111f, FinalStates = new[] { 4 }, Arcs = new[] { new WordGraphArcDto { PrevState = 0, NextState = 1, Score = -11.11f, Words = new[] { "This", "is" }, Confidences = new[] { 0.4f, 0.5f }, SourceSegmentRange = new RangeDto { Start = 0, End = 2 }, IsUnknown = false, Alignment = new[] { new AlignedWordPairDto { SourceIndex = 0, TargetIndex = 0 }, new AlignedWordPairDto { SourceIndex = 1, TargetIndex = 1 } } }, new WordGraphArcDto { PrevState = 1, NextState = 2, Score = -22.22f, Words = new[] { "a" }, Confidences = new[] { 0.6f }, SourceSegmentRange = new RangeDto { Start = 2, End = 3 }, IsUnknown = false, Alignment = new[] { new AlignedWordPairDto { SourceIndex = 0, TargetIndex = 0 } } }, new WordGraphArcDto { PrevState = 2, NextState = 3, Score = 33.33f, Words = new[] { "prueba" }, Confidences = new[] { 0.0f }, SourceSegmentRange = new RangeDto { Start = 3, End = 4 }, IsUnknown = true, Alignment = new[] { new AlignedWordPairDto { SourceIndex = 0, TargetIndex = 0 } } }, new WordGraphArcDto { PrevState = 3, NextState = 4, Score = -44.44f, Words = new[] { "." }, Confidences = new[] { 0.7f }, SourceSegmentRange = new RangeDto { Start = 4, End = 5 }, IsUnknown = false, Alignment = new[] { new AlignedWordPairDto { SourceIndex = 0, TargetIndex = 0 } } } } }, RuleResult = new TranslationResultDto { Target = new[] { "Esto", "es", "una", "test", "." }, Confidences = new[] { 0.0f, 0.0f, 0.0f, 1.0f, 0.0f }, Sources = new[] { TranslationSources.None, TranslationSources.None, TranslationSources.None, TranslationSources.Transfer, TranslationSources.None }, Alignment = new[] { new AlignedWordPairDto { SourceIndex = 0, TargetIndex = 0 }, new AlignedWordPairDto { SourceIndex = 1, TargetIndex = 1 }, new AlignedWordPairDto { SourceIndex = 2, TargetIndex = 2 }, new AlignedWordPairDto { SourceIndex = 3, TargetIndex = 3 }, new AlignedWordPairDto { SourceIndex = 4, TargetIndex = 4 } } } }; httpClient.Requests.Add(new MockRequest { Method = HttpRequestMethod.Post, ResponseText = JsonConvert.SerializeObject(resultDto, RestClientBase.SerializerSettings) }); var engine = new TranslationEngine("http://localhost/", "project1", httpClient); Action done = assert.Async(); engine.TranslateInteractively("Esto es una prueba.", 0.2, session => { assert.NotEqual(session, null); WordGraph wordGraph = session.SmtWordGraph; assert.Equal(wordGraph.InitialStateScore, -111.111); assert.DeepEqual(wordGraph.FinalStates.ToArray(), new[] { 4 }); assert.Equal(wordGraph.Arcs.Count, 4); WordGraphArc arc = wordGraph.Arcs[0]; assert.Equal(arc.PrevState, 0); assert.Equal(arc.NextState, 1); assert.Equal(arc.Score, -11.11); assert.DeepEqual(arc.Words.ToArray(), new[] { "This", "is" }); assert.DeepEqual(arc.WordConfidences.ToArray(), new[] { 0.4, 0.5 }); assert.Equal(arc.SourceSegmentRange.Start, 0); assert.Equal(arc.SourceSegmentRange.End, 2); assert.Equal(arc.IsUnknown, false); assert.Equal(arc.Alignment[0, 0], AlignmentType.Aligned); assert.Equal(arc.Alignment[1, 1], AlignmentType.Aligned); arc = wordGraph.Arcs[2]; assert.Equal(arc.IsUnknown, true); TranslationResult ruleResult = session.RuleResult; assert.DeepEqual(ruleResult.TargetSegment.ToArray(), new[] { "Esto", "es", "una", "test", "." }); assert.DeepEqual(ruleResult.WordConfidences.ToArray(), new[] { 0.0, 0.0, 0.0, 1.0, 0.0 }); assert.DeepEqual(ruleResult.WordSources.ToArray(), new[] { TranslationSources.None, TranslationSources.None, TranslationSources.None, TranslationSources.Transfer, TranslationSources.None }); assert.Equal(ruleResult.Alignment[0, 0], AlignmentType.Aligned); assert.Equal(ruleResult.Alignment[1, 1], AlignmentType.Aligned); assert.Equal(ruleResult.Alignment[2, 2], AlignmentType.Aligned); assert.Equal(ruleResult.Alignment[3, 3], AlignmentType.Aligned); assert.Equal(ruleResult.Alignment[4, 4], AlignmentType.Aligned); done(); }); }
public static string RecaseTargetWord(this TranslationResult result, IReadOnlyList <string> sourceSegment, int targetIndex) { return(result.Alignment.RecaseTargetWord(sourceSegment, 0, result.TargetSegment, targetIndex)); }
internal HybridInteractiveTranslationSession(HybridTranslationEngine engine, IInteractiveTranslationSession smtSession, TranslationResult ruleResult) { _engine = engine; _smtSession = smtSession; _ruleResult = ruleResult; UpdateCurrentResults(); }
public TranslationSuggestion GetSuggestion(int prefixCount, bool isLastWordComplete, TranslationResult result) { int startingJ = prefixCount; if (!isLastWordComplete) { // if the prefix ends with a partial word and it has been completed, // then make sure it is included as a suggestion, // otherwise, don't return any suggestions if ((result.WordSources[startingJ - 1] & TranslationSources.Smt) != 0) { startingJ--; } else { return(new TranslationSuggestion()); } } int lookaheadCount = ComputeLookahead(prefixCount, result); int j = startingJ; bool inPhrase = false; var indices = new List <int>(); double minConfidence = -1; while (j < result.TargetSegment.Count && (lookaheadCount > 0 || inPhrase)) { string word = result.TargetSegment[j]; // stop suggesting at punctuation if (word.All(char.IsPunctuation)) { break; } // criteria for suggesting a word // the word must either: // - meet the confidence threshold // - come from a transfer engine double confidence = result.WordConfidences[j]; TranslationSources sources = result.WordSources[j]; if (confidence >= ConfidenceThreshold || (sources & TranslationSources.Transfer) != 0) { indices.Add(j); if (minConfidence < 0 || confidence < minConfidence) { minConfidence = confidence; } inPhrase = true; lookaheadCount--; } else { // skip over inserted words if (result.Alignment.IsColumnAligned(j) == AlignmentType.Aligned) { lookaheadCount--; // only suggest the first word/phrase we find if (inPhrase) { break; } } } j++; } return(new TranslationSuggestion(indices, minConfidence < 0 ? 0 : minConfidence)); }
public TranslationResult Merge(int prefixCount, double threshold, TranslationResult otherResult) { var mergedTargetSegment = new List <string>(); var mergedConfidences = new List <double>(); var mergedSources = new List <TranslationSources>(); var mergedAlignment = new HashSet <Tuple <int, int> >(); for (int j = 0; j < TargetSegment.Count; j++) { int[] sourceIndices = Alignment.GetColumnAlignedIndices(j).ToArray(); if (sourceIndices.Length == 0) { // target word doesn't align with anything mergedTargetSegment.Add(TargetSegment[j]); mergedConfidences.Add(WordConfidences[j]); mergedSources.Add(WordSources[j]); } else { // target word aligns with some source words if (j < prefixCount || WordConfidences[j] >= threshold) { // use target word of this result mergedTargetSegment.Add(TargetSegment[j]); mergedConfidences.Add(WordConfidences[j]); TranslationSources sources = WordSources[j]; foreach (int i in sourceIndices) { // combine sources for any words that both this result // and the other result translated the same foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i)) { TranslationSources otherSources = otherResult.WordSources[jOther]; if (otherSources != TranslationSources.None && otherResult.TargetSegment[jOther] == TargetSegment[j]) { sources |= otherSources; } } mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1)); } mergedSources.Add(sources); } else { // use target words of other result bool found = false; foreach (int i in sourceIndices) { foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i)) { // look for any translated words from other result TranslationSources otherSources = otherResult.WordSources[jOther]; if (otherSources != TranslationSources.None) { mergedTargetSegment.Add(otherResult.TargetSegment[jOther]); mergedConfidences.Add(otherResult.WordConfidences[jOther]); mergedSources.Add(otherSources); mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1)); found = true; } } } if (!found) { // the other result had no translated words, so just use this result's target word mergedTargetSegment.Add(TargetSegment[j]); mergedConfidences.Add(WordConfidences[j]); mergedSources.Add(WordSources[j]); foreach (int i in sourceIndices) { mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1)); } } } } } var alignment = new WordAlignmentMatrix(SourceSegment.Count, mergedTargetSegment.Count); foreach (Tuple <int, int> t in mergedAlignment) { alignment[t.Item1, t.Item2] = true; } return(new TranslationResult(SourceSegment, mergedTargetSegment, mergedConfidences, mergedSources, alignment, Phrases)); }
public TranslationSuggestion GetSuggestion(int prefixCount, bool isLastWordComplete, TranslationResult result) { int startingJ = prefixCount; if (!isLastWordComplete) { // if the prefix ends with a partial word and it has been completed, // then make sure it is included as a suggestion, // otherwise, don't return any suggestions if ((result.WordSources[startingJ - 1] & TranslationSources.Smt) != 0) { startingJ--; } else { return(new TranslationSuggestion()); } } int k = 0; while (k < result.Phrases.Count && result.Phrases[k].TargetSegmentCut <= startingJ) { k++; } double minConfidence = -1; var indices = new List <int>(); for (; k < result.Phrases.Count; k++) { Phrase phrase = result.Phrases[k]; if (phrase.Confidence >= ConfidenceThreshold) { bool hitBreakingWord = false; for (int j = startingJ; j < phrase.TargetSegmentCut; j++) { string word = result.TargetSegment[j]; TranslationSources sources = result.WordSources[j]; if (sources == TranslationSources.None || word.All(char.IsPunctuation)) { hitBreakingWord = true; break; } indices.Add(j); } if (minConfidence < 0 || phrase.Confidence < minConfidence) { minConfidence = phrase.Confidence; } startingJ = phrase.TargetSegmentCut; if (hitBreakingWord) { break; } } else { break; } } return(new TranslationSuggestion(indices, minConfidence < 0 ? 0 : minConfidence)); }