public SymmetrizedWordAlignmentModel(IWordAlignmentModel directWordAlignmentModel, IWordAlignmentModel inverseWordAlignmentModel) { _directWordAlignmentModel = directWordAlignmentModel; _inverseWordAlignmentModel = inverseWordAlignmentModel; _aligner = new SymmetrizedSegmentAligner(DirectWordAlignmentModel, InverseWordAlignmentModel); }
public static void AddSegmentPairs(this IWordAlignmentModel model, ParallelTextCorpus corpus, Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null, int maxCount = int.MaxValue) { foreach (ParallelTextSegment segment in corpus.Segments.Where(s => !s.IsEmpty).Take(maxCount)) { model.AddSegmentPair(segment, sourcePreprocessor, targetPreprocessor); } }
public static void AddSegmentPair(this IWordAlignmentModel model, ParallelTextSegment segment, Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null) { if (segment.IsEmpty) { return; } IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor); IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor); model.AddSegmentPair(sourceSegment, targetSegment); }
public static void AddSegmentPair(this IWordAlignmentModel model, ParallelTextSegment segment, Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null, bool isUnknown = true) { if (segment.IsEmpty) { return; } IReadOnlyList <string> sourceTokens = segment.SourceSegment.Preprocess(sourcePreprocessor); IReadOnlyList <string> targetTokens = segment.TargetSegment.Preprocess(targetPreprocessor); model.AddSegmentPair(sourceTokens, targetTokens, segment.CreateAlignmentMatrix(isUnknown)); }
public static string GetAlignmentString(this IWordAlignmentModel model, ParallelTextSegment segment, bool includeProbs, Func <string, string> sourcePreprocessor = null, Func <string, string> targetPreprocessor = null) { IReadOnlyList <string> sourceSegment = segment.SourceSegment.Preprocess(sourcePreprocessor); IReadOnlyList <string> targetSegment = segment.TargetSegment.Preprocess(targetPreprocessor); WordAlignmentMatrix alignment = model.GetBestAlignment(sourceSegment, targetSegment, segment.CreateAlignmentMatrix()); if (includeProbs) { return(alignment.ToString(model, sourceSegment, targetSegment)); } return(alignment.ToString()); }
public IEnumerable <AlignedWordPair> GetAlignedWordPairs(IWordAlignmentModel model, IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment) { foreach (AlignedWordPair wordPair in GetAlignedWordPairs(out IReadOnlyList <int> sourceIndices, out IReadOnlyList <int> targetIndices)) { string sourceWord = sourceSegment[wordPair.SourceIndex]; string targetWord = targetSegment[wordPair.TargetIndex]; wordPair.TranslationProbability = model.GetTranslationProbability(sourceWord, targetWord); int prevSourceIndex = wordPair.TargetIndex == 0 ? -1 : sourceIndices[wordPair.TargetIndex - 1]; int prevTargetIndex = wordPair.SourceIndex == 0 ? -1 : targetIndices[wordPair.SourceIndex - 1]; wordPair.AlignmentProbability = model.GetAlignmentProbability(sourceSegment.Count, prevSourceIndex, wordPair.SourceIndex, targetSegment.Count, prevTargetIndex, wordPair.TargetIndex); yield return(wordPair); } }
public static double GetAlignmentProbability(this IWordAlignmentModel model, int sourceLen, int prevSourceIndex, int sourceIndex, int targetLen, int prevTargetIndex, int targetIndex) { switch (model) { case IHmmWordAlignmentModel hmmModel: return(hmmModel.GetAlignmentProbability(sourceLen, prevSourceIndex, sourceIndex)); case IIbm2WordAlignmentModel ibm2Model: return(ibm2Model.GetAlignmentProbability(sourceLen, sourceIndex, targetLen, targetIndex)); case SymmetrizedWordAlignmentModel symmModel: return(symmModel.GetAlignmentProbability(sourceLen, prevSourceIndex, sourceIndex, targetLen, prevTargetIndex, targetIndex)); default: return(-1); } }
public static IDictionary <string, IDictionary <string, double> > GetTranslationTable( this IWordAlignmentModel model, double threshold = 0) { var results = new Dictionary <string, IDictionary <string, double> >(); for (int i = 0; i < model.SourceWords.Count; i++) { var row = new Dictionary <string, double>(); for (int j = 0; j < model.TargetWords.Count; j++) { double prob = model.GetTranslationProbability(i, j); if (prob > threshold) { row[model.TargetWords[j]] = prob; } } results[model.SourceWords[i]] = row; } return(results); }
public string ToString(IWordAlignmentModel model, IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment) { return(string.Join(" ", GetAlignedWordPairs(model, sourceSegment, targetSegment) .Select(wp => wp.ToString()))); }