예제 #1
0
        private void UpdateCorrectionFromArc(TranslationResultBuilder builder, WordGraphArc arc, bool isPrefix,
                                             int alignmentColsToAddCount)
        {
            for (int i = 0; i < arc.Words.Count; i++)
            {
                builder.AppendWord(arc.Words[i], arc.WordConfidences[i], !isPrefix && arc.IsUnknown);
            }

            WordAlignmentMatrix alignment = arc.Alignment;

            if (alignmentColsToAddCount > 0)
            {
                var newAlignment = new WordAlignmentMatrix(alignment.RowCount,
                                                           alignment.ColumnCount + alignmentColsToAddCount);
                for (int j = 0; j < alignment.ColumnCount; j++)
                {
                    for (int i = 0; i < alignment.RowCount; i++)
                    {
                        newAlignment[i, alignmentColsToAddCount + j] = alignment[i, j];
                    }
                }
                alignment = newAlignment;
            }

            builder.MarkPhrase(arc.SourceSegmentRange, alignment);
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment,
                                                    WordAlignmentMatrix hintMatrix = null)
        {
            var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment, GetWordIndices)
            {
                Mode = AlignmentMode.Global,
                ExpansionCompressionEnabled = true,
                TranspositionEnabled        = true
            };

            paa.Compute();
            Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First();
            var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count);

            for (int c = 0; c < alignment.ColumnCount; c++)
            {
                foreach (int i in alignment[0, c])
                {
                    foreach (int j in alignment[1, c])
                    {
                        waMatrix[i, j] = AlignmentType.Aligned;
                    }
                }
            }

            return(waMatrix);
        }
예제 #3
0
        public void SymmetrizeWith(WordAlignmentMatrix other)
        {
            if (RowCount != other.RowCount || ColumnCount != other.ColumnCount)
            {
                throw new ArgumentException("The matrices are not the same size.", nameof(other));
            }

            WordAlignmentMatrix aux = Clone();

            IntersectWith(other);
            WordAlignmentMatrix prev = null;

            while (!ValueEquals(prev))
            {
                prev = Clone();
                for (int i = 0; i < RowCount; i++)
                {
                    for (int j = 0; j < ColumnCount; j++)
                    {
                        if ((other._matrix[i, j] == AlignmentType.Aligned || aux._matrix[i, j] == AlignmentType.Aligned) && _matrix[i, j] == AlignmentType.NotAligned)
                        {
                            if (IsColumnAligned(j) == AlignmentType.NotAligned && IsRowAligned(i) == AlignmentType.NotAligned)
                            {
                                _matrix[i, j] = AlignmentType.Aligned;
                            }
                            else if (IsNeighborAligned(i, j))
                            {
                                _matrix[i, j] = AlignmentType.Aligned;
                            }
                        }
                    }
                }
            }
        }
예제 #4
0
        public TranslationResult(IEnumerable <string> sourceSegment, IEnumerable <string> targetSegment,
                                 IEnumerable <double> confidences, IEnumerable <TranslationSources> sources, WordAlignmentMatrix alignment,
                                 IEnumerable <Phrase> phrases)
        {
            SourceSegment   = sourceSegment.ToArray();
            TargetSegment   = targetSegment.ToArray();
            WordConfidences = confidences.ToArray();
            if (WordConfidences.Count != TargetSegment.Count)
            {
                throw new ArgumentException("The confidences must be the same length as the target segment.",
                                            nameof(confidences));
            }
            WordSources = sources.ToArray();
            if (WordSources.Count != TargetSegment.Count)
            {
                throw new ArgumentException("The sources must be the same length as the target segment.",
                                            nameof(sources));
            }
            Alignment = alignment;
            if (Alignment.RowCount != SourceSegment.Count)
            {
                throw new ArgumentException(
                          "The alignment source length must be the same length as the source segment.", nameof(alignment));
            }
            if (Alignment.ColumnCount != TargetSegment.Count)
            {
                throw new ArgumentException(
                          "The alignment target length must be the same length as the target segment.", nameof(alignment));
            }

            Phrases = phrases.ToArray();
        }
예제 #5
0
        public static WordAlignmentMatrix CreateAlignmentMatrix(this ParallelTextSegment segment, bool isUnknown = true)
        {
            if (segment.AlignedWordPairs == null)
            {
                return(null);
            }

            var matrix = new WordAlignmentMatrix(segment.SourceSegment.Count, segment.TargetSegment.Count,
                                                 isUnknown ? AlignmentType.Unknown : AlignmentType.NotAligned);

            foreach (AlignedWordPair wordPair in segment.AlignedWordPairs)
            {
                matrix[wordPair.SourceIndex, wordPair.TargetIndex] = AlignmentType.Aligned;
                if (isUnknown)
                {
                    for (int i = 0; i < segment.SourceSegment.Count; i++)
                    {
                        if (matrix[i, wordPair.TargetIndex] == AlignmentType.Unknown)
                        {
                            matrix[i, wordPair.TargetIndex] = AlignmentType.NotAligned;
                        }
                    }

                    for (int j = 0; j < segment.TargetSegment.Count; j++)
                    {
                        if (matrix[wordPair.SourceIndex, j] == AlignmentType.Unknown)
                        {
                            matrix[wordPair.SourceIndex, j] = AlignmentType.NotAligned;
                        }
                    }
                }
            }

            return(matrix);
        }
예제 #6
0
        public WordAlignmentMatrix Approve()
        {
            CheckDisposed();

            WordAlignmentMatrix matrix = _engine.GetHintMatrix(SourceSegment, Prefix, _ruleResult);

            _engine.SmtEngine.TrainSegment(SourceSegment, Prefix, matrix);
            return(matrix);
        }
예제 #7
0
        public IEnumerable <TranslationResult> Translate(int n, IReadOnlyList <string> segment)
        {
            IEnumerable <IEnumerable <WordAnalysis> > sourceAnalyses = segment
                                                                       .Select(word => _sourceAnalyzer.AnalyzeWord(word));

            foreach (TransferResult transferResult in _transferer.Transfer(sourceAnalyses).Take(n))
            {
                IReadOnlyList <WordAnalysis> targetAnalyses = transferResult.TargetAnalyses;
                WordAlignmentMatrix          waMatrix       = transferResult.WordAlignmentMatrix;

                var    translation = new List <string>();
                var    confidences = new List <double>();
                var    sources     = new List <TranslationSources>();
                var    alignment   = new WordAlignmentMatrix(segment.Count, targetAnalyses.Count);
                double confidence  = double.MaxValue;
                for (int j = 0; j < targetAnalyses.Count; j++)
                {
                    int[] sourceIndices = Enumerable.Range(0, waMatrix.RowCount)
                                          .Where(i => waMatrix[i, j]).ToArray();
                    string targetWord = targetAnalyses[j].IsEmpty
                                                ? null
                                                : _targetGenerator.GenerateWords(targetAnalyses[j]).FirstOrDefault();
                    double             wordConfidence = 1.0;
                    TranslationSources source         = TranslationSources.Transfer;
                    if (targetWord == null)
                    {
                        if (sourceIndices.Length > 0)
                        {
                            int i = sourceIndices[0];
                            targetWord      = segment[i];
                            wordConfidence  = 0;
                            source          = TranslationSources.None;
                            alignment[i, j] = true;
                        }
                    }
                    else
                    {
                        foreach (int i in sourceIndices)
                        {
                            alignment[i, j] = true;
                        }
                    }

                    if (targetWord != null)
                    {
                        translation.Add(targetWord);
                        confidences.Add(wordConfidence);
                        sources.Add(source);
                        confidence = Math.Min(confidence, wordConfidence);
                    }
                }

                yield return(new TranslationResult(segment, translation, confidences, sources, alignment,
                                                   new[] { new Phrase(Range <int> .Create(0, segment.Count), translation.Count, confidence) }));
            }
        }
예제 #8
0
        public void AddSegmentPair(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment,
                                   WordAlignmentMatrix hintMatrix = null)
        {
            _directWordAlignmentModel.AddSegmentPair(sourceSegment, targetSegment, hintMatrix);

            WordAlignmentMatrix invertedHintMatrix = hintMatrix?.Clone();

            invertedHintMatrix?.Transpose();
            _inverseWordAlignmentModel.AddSegmentPair(targetSegment, sourceSegment, invertedHintMatrix);
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment,
                                                    IReadOnlyList <string> targetSegment)
        {
            WordAlignmentMatrix matrix    = _srcTrgAligner.GetBestAlignment(sourceSegment, targetSegment);
            WordAlignmentMatrix invMatrix = _trgSrcAligner.GetBestAlignment(targetSegment, sourceSegment);

            invMatrix.Transpose();
            matrix.SymmetrizeWith(invMatrix);
            return(matrix);
        }
예제 #10
0
        public static string GetGizaFormatString(this ISegmentAligner aligner, ParallelTextSegment segment,
                                                 Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null)
        {
            IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor);
            IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor);
            WordAlignmentMatrix    alignment     = aligner.GetBestAlignment(sourceSegment, targetSegment,
                                                                            segment.CreateAlignmentMatrix());

            return(alignment.ToGizaFormat(sourceSegment, targetSegment));
        }
예제 #11
0
 private WordAlignmentMatrix(WordAlignmentMatrix other)
 {
     _matrix = new AlignmentType[other.RowCount, other.ColumnCount];
     for (int i = 0; i < RowCount; i++)
     {
         for (int j = 0; j < ColumnCount; j++)
         {
             _matrix[i, j] = other._matrix[i, j];
         }
     }
 }
예제 #12
0
        public WordAlignmentMatrix TrainSegment(IReadOnlyList <string> sourceSegment,
                                                IReadOnlyList <string> targetSegment)
        {
            CheckDisposed();

            TranslationResult ruleResult = RuleEngine?.Translate(sourceSegment);

            WordAlignmentMatrix matrix = GetHintMatrix(sourceSegment, targetSegment, ruleResult);

            SmtEngine.TrainSegment(sourceSegment, targetSegment, matrix);
            return(matrix);
        }
예제 #13
0
        public static string RecaseTargetWord(this WordAlignmentMatrix alignment, IReadOnlyList <string> sourceSegment,
                                              int sourceStartIndex, IReadOnlyList <string> targetSegment, int targetIndex)
        {
            string targetWord = targetSegment[targetIndex];

            if (alignment.GetColumnAlignedIndices(targetIndex)
                .Any(i => sourceSegment[sourceStartIndex + i].IsTitleCase()))
            {
                return(targetWord.ToTitleCase());
            }
            return(targetWord);
        }
예제 #14
0
        public static WordAlignmentMatrix GetBestAlignment(this ISegmentAligner aligner,
                                                           IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment,
                                                           WordAlignmentMatrix knownAlignment)
        {
            WordAlignmentMatrix estimatedAlignment = aligner.GetBestAlignment(sourceSegment, targetSegment);
            WordAlignmentMatrix alignment          = estimatedAlignment;

            if (knownAlignment != null)
            {
                alignment = knownAlignment.Clone();
                alignment.PrioritySymmetrizeWith(estimatedAlignment);
            }
            return(alignment);
        }
        public TranslationResult ToResult(IReadOnlyList <string> sourceSegment, int prefixCount = 0)
        {
            double[] confidences         = _confidences.ToArray();
            var      sources             = new TranslationSources[Words.Count];
            var      alignment           = new WordAlignmentMatrix(sourceSegment.Count, Words.Count);
            var      phrases             = new List <Phrase>();
            int      trgPhraseStartIndex = 0;

            foreach (PhraseInfo phraseInfo in _phrases)
            {
                double confidence = double.MaxValue;
                for (int j = trgPhraseStartIndex; j < phraseInfo.TargetCut; j++)
                {
                    for (int i = phraseInfo.SourceSegmentRange.Start; i < phraseInfo.SourceSegmentRange.End; i++)
                    {
                        AlignmentType alignmentType = phraseInfo.Alignment[i - phraseInfo.SourceSegmentRange.Start,
                                                                           j - trgPhraseStartIndex];
                        if (alignmentType == AlignmentType.Aligned)
                        {
                            alignment[i, j] = AlignmentType.Aligned;
                        }
                    }

                    if (j < prefixCount)
                    {
                        sources[j] = TranslationSources.Prefix;
                        if (_uncorrectedPrefixWords.Contains(j))
                        {
                            sources[j] |= TranslationSources.Smt;
                        }
                    }
                    else if (_unknownWords.Contains(j))
                    {
                        sources[j] = TranslationSources.None;
                    }
                    else
                    {
                        sources[j] = TranslationSources.Smt;
                    }

                    confidence = Math.Min(confidence, Confidences[j]);
                }

                phrases.Add(new Phrase(phraseInfo.SourceSegmentRange, phraseInfo.TargetCut, confidence));
                trgPhraseStartIndex = phraseInfo.TargetCut;
            }

            return(new TranslationResult(sourceSegment, Words, confidences, sources, alignment, phrases));
        }
예제 #16
0
        internal WordAlignmentMatrix GetHintMatrix(IReadOnlyList <string> sourceSegment,
                                                   IReadOnlyList <string> targetSegment, TranslationResult ruleResult)
        {
            TranslationResult smtResult    = SmtEngine.GetBestPhraseAlignment(sourceSegment, targetSegment);
            TranslationResult hybridResult = ruleResult == null ? smtResult : smtResult.Merge(targetSegment.Count,
                                                                                              RuleEngineThreshold, ruleResult);

            var matrix   = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count, AlignmentType.Unknown);
            var iAligned = new HashSet <int>();

            for (int j = 0; j < targetSegment.Count; j++)
            {
                bool jAligned = false;
                if (j < hybridResult.WordSources.Count &&
                    (hybridResult.WordSources[j] & TranslationSources.Transfer) != 0)
                {
                    foreach (int i in hybridResult.Alignment.GetColumnAlignedIndices(j))
                    {
                        matrix[i, j] = AlignmentType.Aligned;
                        iAligned.Add(i);
                        jAligned = true;
                    }
                }

                if (jAligned)
                {
                    for (int i = 0; i < sourceSegment.Count; i++)
                    {
                        if (matrix[i, j] == AlignmentType.Unknown)
                        {
                            matrix[i, j] = AlignmentType.NotAligned;
                        }
                    }
                }
            }

            foreach (int i in iAligned)
            {
                for (int j = 0; j < targetSegment.Count; j++)
                {
                    if (matrix[i, j] == AlignmentType.Unknown)
                    {
                        matrix[i, j] = AlignmentType.NotAligned;
                    }
                }
            }

            return(matrix);
        }
예제 #17
0
        public static string GetAlignmentString(this IWordAlignmentModel model, ParallelTextSegment segment,
                                                bool includeProbs, Func <string, string> sourcePreprocessor = null,
                                                Func <string, string> targetPreprocessor = null)
        {
            IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor);
            IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor);
            WordAlignmentMatrix    alignment     = model.GetBestAlignment(sourceSegment, targetSegment,
                                                                          segment.CreateAlignmentMatrix());

            if (includeProbs)
            {
                return(alignment.ToString(model, sourceSegment, targetSegment));
            }
            return(alignment.ToString());
        }
예제 #18
0
        private static TranslationResult CreateResult(int sourceLen, int prefixLen, string target,
                                                      params double[] confidences)
        {
            string[] targetArray = target.Split();
            var      targetConfidences = new double[targetArray.Length];
            var      targetSources = new TranslationSources[targetArray.Length];
            var      alignment = new WordAlignmentMatrix(sourceLen, targetArray.Length);
            int      i = 0, j = 0;
            double   phraseConfidence = double.MaxValue;

            foreach (double confidence in confidences)
            {
                if (j < prefixLen)
                {
                    targetSources[j] = TranslationSources.Prefix;
                }

                if (confidence >= 0)
                {
                    alignment[i, j]      = true;
                    targetConfidences[j] = confidence;
                    if (confidence > 0)
                    {
                        targetSources[j] |= TranslationSources.Smt;
                    }
                    i++;
                    j++;
                }
                else if (targetArray.Length > sourceLen)
                {
                    targetConfidences[j] = confidence;
                    j++;
                }
                else if (targetArray.Length < sourceLen)
                {
                    i++;
                }
                else
                {
                    throw new ArgumentException("A confidence was incorrectly set below 0.", nameof(confidences));
                }

                phraseConfidence = Math.Min(phraseConfidence, confidence);
            }
            return(new TranslationResult(Enumerable.Range(0, sourceLen).Select(index => index.ToString()), targetArray,
                                         targetConfidences, targetSources, alignment,
                                         new[] { new Phrase(Range <int> .Create(0, sourceLen), targetArray.Length, phraseConfidence) }));
        }
예제 #19
0
        public static WordAlignmentMatrix CreateAlignmentMatrix(this ParallelTextSegment segment)
        {
            if (segment.AlignedWordPairs == null)
            {
                return(null);
            }

            var matrix = new WordAlignmentMatrix(segment.SourceSegment.Count, segment.TargetSegment.Count);

            foreach (AlignedWordPair wordPair in segment.AlignedWordPairs)
            {
                matrix[wordPair.SourceIndex, wordPair.TargetIndex] = true;
            }

            return(matrix);
        }
예제 #20
0
        public IEnumerable <TransferResult> Transfer(IEnumerable <IEnumerable <WordAnalysis> > sourceAnalyses)
        {
            var targetAnalyses = new List <WordAnalysis>();

            foreach (IEnumerable <WordAnalysis> sourceAnalysisOptions in sourceAnalyses)
            {
                bool found = false;
                foreach (WordAnalysis sourceAnalysisOption in sourceAnalysisOptions)
                {
                    var targetMorphemes = new List <IMorpheme>();
                    foreach (IMorpheme sourceMorpheme in sourceAnalysisOption.Morphemes)
                    {
                        IMorpheme targetMorpheme;
                        if (!_morphemeMapper.TryGetTargetMorpheme(sourceMorpheme, out targetMorpheme))
                        {
                            break;
                        }

                        targetMorphemes.Add(targetMorpheme);
                    }
                    if (targetMorphemes.Count == sourceAnalysisOption.Morphemes.Count)
                    {
                        targetAnalyses.Add(new WordAnalysis(targetMorphemes, sourceAnalysisOption.RootMorphemeIndex, sourceAnalysisOption.Category));
                        found = true;
                        break;
                    }
                }

                if (!found)
                {
                    targetAnalyses.Add(new WordAnalysis());
                }
            }

            var waMatrix = new WordAlignmentMatrix(targetAnalyses.Count, targetAnalyses.Count);

            for (int j = 0; j < targetAnalyses.Count; j++)
            {
                waMatrix[j, j] = true;
            }

            var result = new TransferResult(targetAnalyses, waMatrix);

            return(result.ToEnumerable());
        }
예제 #21
0
        public void IntersectWith(WordAlignmentMatrix other)
        {
            if (RowCount != other.RowCount || ColumnCount != other.ColumnCount)
            {
                throw new ArgumentException("The matrices are not the same size.", nameof(other));
            }

            for (int i = 0; i < RowCount; i++)
            {
                for (int j = 0; j < ColumnCount; j++)
                {
                    if (!(_matrix[i, j] == AlignmentType.Aligned && other._matrix[i, j] == AlignmentType.Aligned))
                    {
                        _matrix[i, j] = AlignmentType.NotAligned;
                    }
                }
            }
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment,
                                                    WordAlignmentMatrix hintMatrix = null)
        {
            WordAlignmentMatrix matrix = _srcTrgAligner.GetBestAlignment(sourceSegment, targetSegment, hintMatrix);

            WordAlignmentMatrix invHintMatrix = null;

            if (hintMatrix != null)
            {
                invHintMatrix = hintMatrix.Clone();
                invHintMatrix.Transpose();
            }
            WordAlignmentMatrix invMatrix = _trgSrcAligner.GetBestAlignment(targetSegment, sourceSegment, invHintMatrix);

            invMatrix.Transpose();
            matrix.SymmetrizeWith(invMatrix);
            return(matrix);
        }
예제 #23
0
 public WordGraphArc(int prevState, int nextState, double score, IEnumerable <string> words,
                     WordAlignmentMatrix alignment, Range <int> sourceSegmentRange, bool isUnknown,
                     IEnumerable <double> wordConfidences = null)
 {
     PrevState          = prevState;
     NextState          = nextState;
     Score              = score;
     Words              = words.ToArray();
     Alignment          = alignment;
     SourceSegmentRange = sourceSegmentRange;
     IsUnknown          = isUnknown;
     if (wordConfidences == null)
     {
         WordConfidences = Enumerable.Repeat(-1.0, Words.Count).ToList();
     }
     else
     {
         WordConfidences = wordConfidences.ToList();
     }
 }
        private void ResizeAlignment(int phraseIndex, List <int> colsToCopy)
        {
            WordAlignmentMatrix curAlignment = _phrases[phraseIndex].Alignment;

            if (colsToCopy.Count == curAlignment.ColumnCount)
            {
                return;
            }

            var newAlignment = new WordAlignmentMatrix(curAlignment.RowCount, colsToCopy.Count);

            for (int j = 0; j < newAlignment.ColumnCount; j++)
            {
                if (colsToCopy[j] != -1)
                {
                    for (int i = 0; i < newAlignment.RowCount; i++)
                    {
                        newAlignment[i, j] = curAlignment[i, colsToCopy[j]];
                    }
                }
            }

            _phrases[phraseIndex].Alignment = newAlignment;
        }
예제 #25
0
        public bool ValueEquals(WordAlignmentMatrix other)
        {
            if (other == null)
            {
                return(false);
            }

            if (RowCount != other.RowCount || ColumnCount != other.ColumnCount)
            {
                return(false);
            }

            for (int i = 0; i < RowCount; i++)
            {
                for (int j = 0; j < ColumnCount; j++)
                {
                    if (_matrix[i, j] != other._matrix[i, j])
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment,
                                                    IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null)
        {
            var paa = new PairwiseAlignmentAlgorithm <IReadOnlyList <string>, int>(_scorer, sourceSegment, targetSegment,
                                                                                   GetWordIndices)
            {
                Mode = AlignmentMode.Global,
                ExpansionCompressionEnabled = true,
                TranspositionEnabled        = true
            };

            paa.Compute();
            Alignment <IReadOnlyList <string>, int> alignment = paa.GetAlignments().First();
            var waMatrix = new WordAlignmentMatrix(sourceSegment.Count, targetSegment.Count);

            for (int c = 0; c < alignment.ColumnCount; c++)
            {
                foreach (int j in alignment[1, c])
                {
                    double bestScore;
                    int    minIndex, maxIndex;
                    if (alignment[0, c].IsNull)
                    {
                        double prob = _getTranslationProb(null, targetSegment[j]);
                        bestScore = ComputeAlignmentScore(prob, 0);
                        int tc = c - 1;
                        while (tc >= 0 && alignment[0, tc].IsNull)
                        {
                            tc--;
                        }
                        int i = tc == -1 ? 0 : alignment[0, tc].Last;
                        minIndex = i;
                        maxIndex = i + 1;
                    }
                    else
                    {
                        double prob = alignment[0, c]
                                      .Average(i => _getTranslationProb(sourceSegment[i], targetSegment[j]));
                        bestScore = ComputeAlignmentScore(prob, 0);
                        minIndex  = alignment[0, c].First - 1;
                        maxIndex  = alignment[0, c].Last + 1;
                    }

                    int bestIndex = -1;
                    for (int i = minIndex; i >= Math.Max(0, minIndex - _maxDistance); i--)
                    {
                        double prob          = _getTranslationProb(sourceSegment[i], targetSegment[j]);
                        double distanceScore = ComputeDistanceScore(i, minIndex + 1, sourceSegment.Count);
                        double score         = ComputeAlignmentScore(prob, distanceScore);
                        if (score > bestScore)
                        {
                            bestScore = score;
                            bestIndex = i;
                        }
                    }

                    for (int i = maxIndex; i < Math.Min(sourceSegment.Count, maxIndex + _maxDistance); i++)
                    {
                        double prob          = _getTranslationProb(sourceSegment[i], targetSegment[j]);
                        double distanceScore = ComputeDistanceScore(i, maxIndex - 1, sourceSegment.Count);
                        double score         = ComputeAlignmentScore(prob, distanceScore);
                        if (score > bestScore)
                        {
                            bestScore = score;
                            bestIndex = i;
                        }
                    }

                    if (bestIndex == -1)
                    {
                        if (!alignment[0, c].IsNull)
                        {
                            waMatrix[minIndex + 1, j] = AlignmentType.Aligned;
                            waMatrix[maxIndex - 1, j] = AlignmentType.Aligned;
                        }
                    }
                    else
                    {
                        waMatrix[bestIndex, j] = AlignmentType.Aligned;
                    }
                }
            }

            return(waMatrix);
        }
 public PhraseInfo(Range <int> sourceSegmentRange, int targetCut, WordAlignmentMatrix alignment)
 {
     SourceSegmentRange = sourceSegmentRange;
     TargetCut          = targetCut;
     Alignment          = alignment;
 }
예제 #28
0
        public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment,
                                                    IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null)
        {
            CheckDisposed();

            return(_aligner.GetBestAlignment(sourceSegment, targetSegment, hintMatrix));
        }
 public void MarkPhrase(Range <int> sourceSegmentRange, WordAlignmentMatrix alignment)
 {
     _phrases.Add(new PhraseInfo(sourceSegmentRange, _words.Count, alignment));
 }
예제 #30
0
        public TranslationResult Merge(int prefixCount, double threshold, TranslationResult otherResult)
        {
            var mergedTargetSegment = new List <string>();
            var mergedConfidences   = new List <double>();
            var mergedSources       = new List <TranslationSources>();
            var mergedAlignment     = new HashSet <Tuple <int, int> >();

            for (int j = 0; j < TargetSegment.Count; j++)
            {
                int[] sourceIndices = Alignment.GetColumnAlignedIndices(j).ToArray();
                if (sourceIndices.Length == 0)
                {
                    // target word doesn't align with anything
                    mergedTargetSegment.Add(TargetSegment[j]);
                    mergedConfidences.Add(WordConfidences[j]);
                    mergedSources.Add(WordSources[j]);
                }
                else
                {
                    // target word aligns with some source words
                    if (j < prefixCount || WordConfidences[j] >= threshold)
                    {
                        // use target word of this result
                        mergedTargetSegment.Add(TargetSegment[j]);
                        mergedConfidences.Add(WordConfidences[j]);
                        TranslationSources sources = WordSources[j];
                        foreach (int i in sourceIndices)
                        {
                            // combine sources for any words that both this result
                            // and the other result translated the same
                            foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i))
                            {
                                TranslationSources otherSources = otherResult.WordSources[jOther];
                                if (otherSources != TranslationSources.None &&
                                    otherResult.TargetSegment[jOther] == TargetSegment[j])
                                {
                                    sources |= otherSources;
                                }
                            }

                            mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                        }
                        mergedSources.Add(sources);
                    }
                    else
                    {
                        // use target words of other result
                        bool found = false;
                        foreach (int i in sourceIndices)
                        {
                            foreach (int jOther in otherResult.Alignment.GetRowAlignedIndices(i))
                            {
                                // look for any translated words from other result
                                TranslationSources otherSources = otherResult.WordSources[jOther];
                                if (otherSources != TranslationSources.None)
                                {
                                    mergedTargetSegment.Add(otherResult.TargetSegment[jOther]);
                                    mergedConfidences.Add(otherResult.WordConfidences[jOther]);
                                    mergedSources.Add(otherSources);
                                    mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                                    found = true;
                                }
                            }
                        }

                        if (!found)
                        {
                            // the other result had no translated words, so just use this result's target word
                            mergedTargetSegment.Add(TargetSegment[j]);
                            mergedConfidences.Add(WordConfidences[j]);
                            mergedSources.Add(WordSources[j]);
                            foreach (int i in sourceIndices)
                            {
                                mergedAlignment.Add(Tuple.Create(i, mergedTargetSegment.Count - 1));
                            }
                        }
                    }
                }
            }

            var alignment = new WordAlignmentMatrix(SourceSegment.Count, mergedTargetSegment.Count);

            foreach (Tuple <int, int> t in mergedAlignment)
            {
                alignment[t.Item1, t.Item2] = true;
            }
            return(new TranslationResult(SourceSegment, mergedTargetSegment, mergedConfidences, mergedSources,
                                         alignment, Phrases));
        }