Esempio n. 1
0
 public SymmetrizedWordAlignmentModel(IWordAlignmentModel directWordAlignmentModel,
                                      IWordAlignmentModel inverseWordAlignmentModel)
 {
     _directWordAlignmentModel  = directWordAlignmentModel;
     _inverseWordAlignmentModel = inverseWordAlignmentModel;
     _aligner = new SymmetrizedSegmentAligner(DirectWordAlignmentModel, InverseWordAlignmentModel);
 }
Esempio n. 2
0
 public static void AddSegmentPairs(this IWordAlignmentModel model, ParallelTextCorpus corpus,
                                    Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null,
                                    int maxCount = int.MaxValue)
 {
     foreach (ParallelTextSegment segment in corpus.Segments.Where(s => !s.IsEmpty).Take(maxCount))
     {
         model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor);
     }
 }
Esempio n. 3
0
        public static void AddSegmentPair(this IWordAlignmentModel model, ParallelTextSegment segment,
                                          Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null)
        {
            if (segment.IsEmpty)
            {
                return;
            }

            IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor);
            IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor);

            model.AddSegmentPair(sourceSegment, targetSegment);
        }
        public static void AddSegmentPair(this IWordAlignmentModel model, ParallelTextSegment segment,
                                          Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null,
                                          bool isUnknown = true)
        {
            if (segment.IsEmpty)
            {
                return;
            }

            IReadOnlyList <string> sourceTokens = segment.SourceSegment.Preprocess(sourcePreprocessor);
            IReadOnlyList <string> targetTokens = segment.TargetSegment.Preprocess(targetPreprocessor);

            model.AddSegmentPair(sourceTokens, targetTokens, segment.CreateAlignmentMatrix(isUnknown));
        }
Esempio n. 5
0
        public static string GetAlignmentString(this IWordAlignmentModel model, ParallelTextSegment segment,
                                                bool includeProbs, Func <string, string> sourcePreprocessor = null,
                                                Func <string, string> targetPreprocessor = null)
        {
            IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor);
            IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor);
            WordAlignmentMatrix    alignment     = model.GetBestAlignment(sourceSegment, targetSegment,
                                                                          segment.CreateAlignmentMatrix());

            if (includeProbs)
            {
                return(alignment.ToString(model, sourceSegment, targetSegment));
            }
            return(alignment.ToString());
        }
Esempio n. 6
0
        public IEnumerable <AlignedWordPair> GetAlignedWordPairs(IWordAlignmentModel model,
                                                                 IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment)
        {
            foreach (AlignedWordPair wordPair in GetAlignedWordPairs(out IReadOnlyList <int> sourceIndices,
                                                                     out IReadOnlyList <int> targetIndices))
            {
                string sourceWord = sourceSegment[wordPair.SourceIndex];
                string targetWord = targetSegment[wordPair.TargetIndex];
                wordPair.TranslationProbability = model.GetTranslationProbability(sourceWord, targetWord);

                int prevSourceIndex = wordPair.TargetIndex == 0 ? -1 : sourceIndices[wordPair.TargetIndex - 1];
                int prevTargetIndex = wordPair.SourceIndex == 0 ? -1 : targetIndices[wordPair.SourceIndex - 1];
                wordPair.AlignmentProbability = model.GetAlignmentProbability(sourceSegment.Count, prevSourceIndex,
                                                                              wordPair.SourceIndex, targetSegment.Count, prevTargetIndex, wordPair.TargetIndex);

                yield return(wordPair);
            }
        }
Esempio n. 7
0
        public static double GetAlignmentProbability(this IWordAlignmentModel model, int sourceLen, int prevSourceIndex,
                                                     int sourceIndex, int targetLen, int prevTargetIndex, int targetIndex)
        {
            switch (model)
            {
            case IHmmWordAlignmentModel hmmModel:
                return(hmmModel.GetAlignmentProbability(sourceLen, prevSourceIndex, sourceIndex));

            case IIbm2WordAlignmentModel ibm2Model:
                return(ibm2Model.GetAlignmentProbability(sourceLen, sourceIndex, targetLen, targetIndex));

            case SymmetrizedWordAlignmentModel symmModel:
                return(symmModel.GetAlignmentProbability(sourceLen, prevSourceIndex, sourceIndex, targetLen,
                                                         prevTargetIndex, targetIndex));

            default:
                return(-1);
            }
        }
        public static IDictionary <string, IDictionary <string, double> > GetTranslationTable(
            this IWordAlignmentModel model, double threshold = 0)
        {
            var results = new Dictionary <string, IDictionary <string, double> >();

            for (int i = 0; i < model.SourceWords.Count; i++)
            {
                var row = new Dictionary <string, double>();
                for (int j = 0; j < model.TargetWords.Count; j++)
                {
                    double prob = model.GetTranslationProbability(i, j);
                    if (prob > threshold)
                    {
                        row[model.TargetWords[j]] = prob;
                    }
                }
                results[model.SourceWords[i]] = row;
            }
            return(results);
        }
Esempio n. 9
0
 public string ToString(IWordAlignmentModel model, IReadOnlyList <string> sourceSegment,
                        IReadOnlyList <string> targetSegment)
 {
     return(string.Join(" ", GetAlignedWordPairs(model, sourceSegment, targetSegment)
                        .Select(wp => wp.ToString())));
 }