Ejemplo n.º 1
0
        internal WordAlignmentMatrix GetHintMatrix(IReadOnlyList <string> sourceSegment,
                                                   IReadOnlyList <string> targetSegment, TranslationResult ruleResult)
        {
            TranslationResult smtResult    = SmtEngine.GetBestPhraseAlignment(sourceSegment, targetSegment);
            TranslationResult hybridResult = ruleResult == null ? smtResult : smtResult.Merge(targetSegment.Count,
                                                                                              RuleEngineThreshold, ruleResult);

            var matrix   = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count, AlignmentType.Unknown);
            var iAligned = new HashSet <int>();

            for (int j = 0; j < targetSegment.Count; j++)
            {
                bool jAligned = false;
                if (j < hybridResult.WordSources.Count &&
                    (hybridResult.WordSources[j] & TranslationSources.Transfer) != 0)
                {
                    foreach (int i in hybridResult.Alignment.GetColumnAlignedIndices(j))
                    {
                        matrix[i, j] = AlignmentType.Aligned;
                        iAligned.Add(i);
                        jAligned = true;
                    }
                }

                if (jAligned)
                {
                    for (int i = 0; i < sourceSegment.Count; i++)
                    {
                        if (matrix[i, j] == AlignmentType.Unknown)
                        {
                            matrix[i, j] = AlignmentType.NotAligned;
                        }
                    }
                }
            }

            foreach (int i in iAligned)
            {
                for (int j = 0; j < targetSegment.Count; j++)
                {
                    if (matrix[i, j] == AlignmentType.Unknown)
                    {
                        matrix[i, j] = AlignmentType.NotAligned;
                    }
                }
            }

            return(matrix);
        }
Ejemplo n.º 2
0
        public TranslationResult Translate(IReadOnlyList <string> segment)
        {
            CheckDisposed();

            TranslationResult smtResult = SmtEngine.Translate(segment);

            if (RuleEngine == null)
            {
                return(smtResult);
            }

            TranslationResult ruleResult = RuleEngine.Translate(segment);

            return(smtResult.Merge(0, RuleEngineThreshold, ruleResult));
        }
Ejemplo n.º 3
0
        public IEnumerable <TranslationResult> Translate(int n, IReadOnlyList <string> segment)
        {
            CheckDisposed();

            TranslationResult ruleResult = null;

            foreach (TranslationResult smtResult in SmtEngine.Translate(n, segment))
            {
                if (RuleEngine == null)
                {
                    yield return(smtResult);
                }
                else
                {
                    if (ruleResult == null)
                    {
                        ruleResult = RuleEngine.Translate(segment);
                    }
                    yield return(smtResult.Merge(0, RuleEngineThreshold, ruleResult));
                }
            }
        }
        public string[] UpdatePrefix(string prefix)
        {
            if (!IsInitialized)
            {
                throw new InvalidOperationException("The session has not been initialized.");
            }

            Range <int>[] tokenRanges = _engine.TargetWordTokenizer.Tokenize(prefix).ToArray();
            Prefix             = tokenRanges.Select(s => prefix.Substring(s.Start, s.Length)).ToArray();
            IsLastWordComplete = tokenRanges.Length == 0 || tokenRanges[tokenRanges.Length - 1].End != prefix.Length;

            TranslationResult smtResult = _wordGraphProcessor.Correct(Prefix, IsLastWordComplete, 1).FirstOrDefault();

            if (smtResult == null)
            {
                var builder = new TranslationResultBuilder();
                smtResult = builder.ToResult(SourceSegment, Prefix.Length);
            }

            if (RuleResult == null)
            {
                _curResult = smtResult;
            }
            else
            {
                int prefixCount = Prefix.Length;
                if (!IsLastWordComplete)
                {
                    prefixCount--;
                }

                _curResult = smtResult.Merge(prefixCount, RuleEngineThreshold, RuleResult);
            }

            UpdateSuggestion();

            return(Suggestion);
        }
Ejemplo n.º 5
0
        private int ComputeLookahead(int prefixCount, TranslationResult result)
        {
            int lookaheadCount = 1;
            int i = -1, j;

            for (j = prefixCount; j < result.TargetSegment.Count; j++)
            {
                int[] sourceIndices = result.Alignment.GetColumnAlignedIndices(j).ToArray();
                if (sourceIndices.Length == 0)
                {
                    lookaheadCount++;
                }
                else
                {
                    lookaheadCount += sourceIndices.Length - 1;
                    foreach (int ti in sourceIndices)
                    {
                        if (i == -1 || ti < i)
                        {
                            i = ti;
                        }
                    }
                }
            }
            if (i == -1)
            {
                i = 0;
            }
            for (; i < result.SourceSegment.Count; i++)
            {
                if (result.Alignment.IsRowAligned(i) == AlignmentType.NotAligned)
                {
                    lookaheadCount++;
                }
            }

            return(lookaheadCount);
        }
Ejemplo n.º 6
0
        private static void TranslateInteractively_Success(Assert assert)
        {
            var httpClient = new MockHttpClient();
            var resultDto  = new InteractiveTranslationResultDto
            {
                WordGraph = new WordGraphDto
                {
                    InitialStateScore = -111.111f,
                    FinalStates       = new[] { 4 },
                    Arcs = new[]
                    {
                        new WordGraphArcDto
                        {
                            PrevState          = 0,
                            NextState          = 1,
                            Score              = -11.11f,
                            Words              = new[] { "This", "is" },
                            Confidences        = new[] { 0.4f, 0.5f },
                            SourceSegmentRange = new RangeDto {
                                Start = 0, End = 2
                            },
                            IsUnknown = false,
                            Alignment = new[]
                            {
                                new AlignedWordPairDto {
                                    SourceIndex = 0, TargetIndex = 0
                                },
                                new AlignedWordPairDto {
                                    SourceIndex = 1, TargetIndex = 1
                                }
                            }
                        },
                        new WordGraphArcDto
                        {
                            PrevState          = 1,
                            NextState          = 2,
                            Score              = -22.22f,
                            Words              = new[] { "a" },
                            Confidences        = new[] { 0.6f },
                            SourceSegmentRange = new RangeDto {
                                Start = 2, End = 3
                            },
                            IsUnknown = false,
                            Alignment = new[]
                            {
                                new AlignedWordPairDto {
                                    SourceIndex = 0, TargetIndex = 0
                                }
                            }
                        },
                        new WordGraphArcDto
                        {
                            PrevState          = 2,
                            NextState          = 3,
                            Score              = 33.33f,
                            Words              = new[] { "prueba" },
                            Confidences        = new[] { 0.0f },
                            SourceSegmentRange = new RangeDto {
                                Start = 3, End = 4
                            },
                            IsUnknown = true,
                            Alignment = new[]
                            {
                                new AlignedWordPairDto {
                                    SourceIndex = 0, TargetIndex = 0
                                }
                            }
                        },
                        new WordGraphArcDto
                        {
                            PrevState          = 3,
                            NextState          = 4,
                            Score              = -44.44f,
                            Words              = new[] { "." },
                            Confidences        = new[] { 0.7f },
                            SourceSegmentRange = new RangeDto {
                                Start = 4, End = 5
                            },
                            IsUnknown = false,
                            Alignment = new[]
                            {
                                new AlignedWordPairDto {
                                    SourceIndex = 0, TargetIndex = 0
                                }
                            }
                        }
                    }
                },
                RuleResult = new TranslationResultDto
                {
                    Target      = new[] { "Esto", "es", "una", "test", "." },
                    Confidences = new[] { 0.0f, 0.0f, 0.0f, 1.0f, 0.0f },
                    Sources     = new[]
                    {
                        TranslationSources.None,
                        TranslationSources.None,
                        TranslationSources.None,
                        TranslationSources.Transfer,
                        TranslationSources.None
                    },
                    Alignment = new[]
                    {
                        new AlignedWordPairDto {
                            SourceIndex = 0, TargetIndex = 0
                        },
                        new AlignedWordPairDto {
                            SourceIndex = 1, TargetIndex = 1
                        },
                        new AlignedWordPairDto {
                            SourceIndex = 2, TargetIndex = 2
                        },
                        new AlignedWordPairDto {
                            SourceIndex = 3, TargetIndex = 3
                        },
                        new AlignedWordPairDto {
                            SourceIndex = 4, TargetIndex = 4
                        }
                    }
                }
            };

            httpClient.Requests.Add(new MockRequest
            {
                Method       = HttpRequestMethod.Post,
                ResponseText = JsonConvert.SerializeObject(resultDto, RestClientBase.SerializerSettings)
            });

            var    engine = new TranslationEngine("http://localhost/", "project1", httpClient);
            Action done   = assert.Async();

            engine.TranslateInteractively("Esto es una prueba.", 0.2, session =>
            {
                assert.NotEqual(session, null);

                WordGraph wordGraph = session.SmtWordGraph;
                assert.Equal(wordGraph.InitialStateScore, -111.111);
                assert.DeepEqual(wordGraph.FinalStates.ToArray(), new[] { 4 });
                assert.Equal(wordGraph.Arcs.Count, 4);
                WordGraphArc arc = wordGraph.Arcs[0];
                assert.Equal(arc.PrevState, 0);
                assert.Equal(arc.NextState, 1);
                assert.Equal(arc.Score, -11.11);
                assert.DeepEqual(arc.Words.ToArray(), new[] { "This", "is" });
                assert.DeepEqual(arc.WordConfidences.ToArray(), new[] { 0.4, 0.5 });
                assert.Equal(arc.SourceSegmentRange.Start, 0);
                assert.Equal(arc.SourceSegmentRange.End, 2);
                assert.Equal(arc.IsUnknown, false);
                assert.Equal(arc.Alignment[0, 0], AlignmentType.Aligned);
                assert.Equal(arc.Alignment[1, 1], AlignmentType.Aligned);
                arc = wordGraph.Arcs[2];
                assert.Equal(arc.IsUnknown, true);

                TranslationResult ruleResult = session.RuleResult;
                assert.DeepEqual(ruleResult.TargetSegment.ToArray(), new[] { "Esto", "es", "una", "test", "." });
                assert.DeepEqual(ruleResult.WordConfidences.ToArray(), new[] { 0.0, 0.0, 0.0, 1.0, 0.0 });
                assert.DeepEqual(ruleResult.WordSources.ToArray(),
                                 new[]
                {
                    TranslationSources.None,
                    TranslationSources.None,
                    TranslationSources.None,
                    TranslationSources.Transfer,
                    TranslationSources.None
                });
                assert.Equal(ruleResult.Alignment[0, 0], AlignmentType.Aligned);
                assert.Equal(ruleResult.Alignment[1, 1], AlignmentType.Aligned);
                assert.Equal(ruleResult.Alignment[2, 2], AlignmentType.Aligned);
                assert.Equal(ruleResult.Alignment[3, 3], AlignmentType.Aligned);
                assert.Equal(ruleResult.Alignment[4, 4], AlignmentType.Aligned);
                done();
            });
        }
Ejemplo n.º 7
0
 public static string RecaseTargetWord(this TranslationResult result, IReadOnlyList <string> sourceSegment,
                                       int targetIndex)
 {
     return(result.Alignment.RecaseTargetWord(sourceSegment, 0, result.TargetSegment, targetIndex));
 }
Ejemplo n.º 8
0
 internal HybridInteractiveTranslationSession(HybridTranslationEngine engine,
                                              IInteractiveTranslationSession smtSession, TranslationResult ruleResult)
 {
     _engine     = engine;
     _smtSession = smtSession;
     _ruleResult = ruleResult;
     UpdateCurrentResults();
 }
Ejemplo n.º 9
0
        public TranslationSuggestion GetSuggestion(int prefixCount, bool isLastWordComplete,
                                                   TranslationResult result)
        {
            int startingJ = prefixCount;

            if (!isLastWordComplete)
            {
                // if the prefix ends with a partial word and it has been completed,
                // then make sure it is included as a suggestion,
                // otherwise, don't return any suggestions
                if ((result.WordSources[startingJ - 1] & TranslationSources.Smt) != 0)
                {
                    startingJ--;
                }
                else
                {
                    return(new TranslationSuggestion());
                }
            }

            int    lookaheadCount = ComputeLookahead(prefixCount, result);
            int    j             = startingJ;
            bool   inPhrase      = false;
            var    indices       = new List <int>();
            double minConfidence = -1;

            while (j < result.TargetSegment.Count && (lookaheadCount > 0 || inPhrase))
            {
                string word = result.TargetSegment[j];
                // stop suggesting at punctuation
                if (word.All(char.IsPunctuation))
                {
                    break;
                }

                // criteria for suggesting a word
                // the word must either:
                // - meet the confidence threshold
                // - come from a transfer engine
                double             confidence = result.WordConfidences[j];
                TranslationSources sources    = result.WordSources[j];
                if (confidence >= ConfidenceThreshold || (sources & TranslationSources.Transfer) != 0)
                {
                    indices.Add(j);
                    if (minConfidence < 0 || confidence < minConfidence)
                    {
                        minConfidence = confidence;
                    }
                    inPhrase = true;
                    lookaheadCount--;
                }
                else
                {
                    // skip over inserted words
                    if (result.Alignment.IsColumnAligned(j) == AlignmentType.Aligned)
                    {
                        lookaheadCount--;
                        // only suggest the first word/phrase we find
                        if (inPhrase)
                        {
                            break;
                        }
                    }
                }
                j++;
            }

            return(new TranslationSuggestion(indices, minConfidence < 0 ? 0 : minConfidence));
        }
Ejemplo n.º 10
0
        public TranslationResult Merge(int prefixCount, double threshold, TranslationResult otherResult)
        {
            var mergedTargetSegment = new List <string>();
            var mergedConfidences   = new List <double>();
            var mergedSources       = new List <TranslationSources>();
            var mergedAlignment     = new HashSet <Tuple <int, int> >();

            for (int j = 0; j < TargetSegment.Count; j++)
            {
                int[] sourceIndices = Alignment.GetColumnAlignedIndices(j).ToArray();
                if (sourceIndices.Length == 0)
                {
                    // target word doesn't align with anything
                    mergedTargetSegment.Add(TargetSegment[j]);
                    mergedConfidences.Add(WordConfidences[j]);
                    mergedSources.Add(WordSources[j]);
                }
                else
                {
                    // target word aligns with some source words
                    if (j < prefixCount || WordConfidences[j] >= threshold)
                    {
                        // use target word of this result
                        mergedTargetSegment.Add(TargetSegment[j]);
                        mergedConfidences.Add(WordConfidences[j]);
                        TranslationSources sources = WordSources[j];
                        foreach (int i in sourceIndices)
                        {
                            // combine sources for any words that both this result
                            // and the other result translated the same
                            foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i))
                            {
                                TranslationSources otherSources = otherResult.WordSources[jOther];
                                if (otherSources != TranslationSources.None &&
                                    otherResult.TargetSegment[jOther] == TargetSegment[j])
                                {
                                    sources |= otherSources;
                                }
                            }

                            mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                        }
                        mergedSources.Add(sources);
                    }
                    else
                    {
                        // use target words of other result
                        bool found = false;
                        foreach (int i in sourceIndices)
                        {
                            foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i))
                            {
                                // look for any translated words from other result
                                TranslationSources otherSources = otherResult.WordSources[jOther];
                                if (otherSources != TranslationSources.None)
                                {
                                    mergedTargetSegment.Add(otherResult.TargetSegment[jOther]);
                                    mergedConfidences.Add(otherResult.WordConfidences[jOther]);
                                    mergedSources.Add(otherSources);
                                    mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                                    found = true;
                                }
                            }
                        }

                        if (!found)
                        {
                            // the other result had no translated words, so just use this result's target word
                            mergedTargetSegment.Add(TargetSegment[j]);
                            mergedConfidences.Add(WordConfidences[j]);
                            mergedSources.Add(WordSources[j]);
                            foreach (int i in sourceIndices)
                            {
                                mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                            }
                        }
                    }
                }
            }

            var alignment = new WordAlignmentMatrix(SourceSegment.Count, mergedTargetSegment.Count);

            foreach (Tuple <int, int> t in mergedAlignment)
            {
                alignment[t.Item1, t.Item2] = true;
            }
            return(new TranslationResult(SourceSegment, mergedTargetSegment, mergedConfidences, mergedSources,
                                         alignment, Phrases));
        }
Ejemplo n.º 11
0
        public TranslationSuggestion GetSuggestion(int prefixCount, bool isLastWordComplete,
                                                   TranslationResult result)
        {
            int startingJ = prefixCount;

            if (!isLastWordComplete)
            {
                // if the prefix ends with a partial word and it has been completed,
                // then make sure it is included as a suggestion,
                // otherwise, don't return any suggestions
                if ((result.WordSources[startingJ - 1] & TranslationSources.Smt) != 0)
                {
                    startingJ--;
                }
                else
                {
                    return(new TranslationSuggestion());
                }
            }

            int k = 0;

            while (k < result.Phrases.Count && result.Phrases[k].TargetSegmentCut <= startingJ)
            {
                k++;
            }

            double minConfidence = -1;
            var    indices       = new List <int>();

            for (; k < result.Phrases.Count; k++)
            {
                Phrase phrase = result.Phrases[k];
                if (phrase.Confidence >= ConfidenceThreshold)
                {
                    bool hitBreakingWord = false;
                    for (int j = startingJ; j < phrase.TargetSegmentCut; j++)
                    {
                        string             word    = result.TargetSegment[j];
                        TranslationSources sources = result.WordSources[j];
                        if (sources == TranslationSources.None || word.All(char.IsPunctuation))
                        {
                            hitBreakingWord = true;
                            break;
                        }
                        indices.Add(j);
                    }
                    if (minConfidence < 0 || phrase.Confidence < minConfidence)
                    {
                        minConfidence = phrase.Confidence;
                    }
                    startingJ = phrase.TargetSegmentCut;
                    if (hitBreakingWord)
                    {
                        break;
                    }
                }
                else
                {
                    break;
                }
            }

            return(new TranslationSuggestion(indices, minConfidence < 0 ? 0 : minConfidence));
        }