public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, Alignment <Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass) { ShapeNode leftNode = alignment.GetLeftNode(seq, col); Ngram <Segment> target = alignment[seq, col].ToNgram(segmentPool); ShapeNode rightNode = alignment.GetRightNode(seq, col); soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode)); return(soundClass != null); }
public static SoundContext ToSoundContext(this Alignment <Word, ShapeNode> alignment, SegmentPool segmentPool, int seq, int col, IEnumerable <SoundClass> soundClasses) { ShapeNode leftNode = alignment.GetLeftNode(seq, col); SoundClass leftEnv; if (leftNode == null || !soundClasses.TryGetMatchingSoundClass(segmentPool, leftNode, out leftEnv)) { leftEnv = null; } Ngram <Segment> target = alignment[seq, col].ToNgram(segmentPool); ShapeNode rightNode = alignment.GetRightNode(seq, col); SoundClass rightEnv; if (rightNode == null || !soundClasses.TryGetMatchingSoundClass(segmentPool, rightNode, out rightEnv)) { rightEnv = null; } return(new SoundContext(leftEnv, target, rightEnv)); }
public static bool TryGetMatchingSoundClass(this IEnumerable<SoundClass> soundClasses, SegmentPool segmentPool, Alignment<Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass) { ShapeNode leftNode = alignment.GetLeftNode(seq, col); Ngram<Segment> target = alignment[seq, col].ToNgram(segmentPool); ShapeNode rightNode = alignment.GetRightNode(seq, col); soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode)); return soundClass != null; }
public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); int cat1Count = 0; int cat1And2Count = 0; int totalCount = 0; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); for (int column = 0; column < alignment.ColumnCount; column++) { ShapeNode uLeftNode = alignment.GetLeftNode(0, column); Ngram <Segment> u = alignment[0, column].ToNgram(_segmentPool); ShapeNode uRightNode = alignment.GetRightNode(0, column); ShapeNode vLeftNode = alignment.GetLeftNode(1, column); Ngram <Segment> v = alignment[1, column].ToNgram(_segmentPool); ShapeNode vRightNode = alignment.GetRightNode(1, column); int cat = 3; if (u.Equals(v)) { cat = 1; } else if (IgnoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 0; } else if (u.Length == 0 || v.Length == 0) { if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 1; } else if (IgnoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v)) { cat = 0; } } else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType) { cat = SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2; } else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType) { if (RegularConsonantEqual) { if (IsRegular(wordPair, alignerResult, alignment, column, v)) { cat = 1; } else if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 2; } } else { if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2; } } } if (cat > 0 && cat < 3) { cat1And2Count++; if (cat == 1) { cat1Count++; } } wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture)); if (cat > 0) { totalCount++; } } double type1Score = (double)cat1Count / totalCount; double type1And2Score = (double)cat1And2Count / totalCount; wordPair.PredictedCognacy = type1Score >= 0.5 && type1And2Score >= 0.75; wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25); }
public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); int cat1Count = 0; int cat1And2Count = 0; int totalCount = 0; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); for (int column = 0; column < alignment.ColumnCount; column++) { ShapeNode uLeftNode = alignment.GetLeftNode(0, column); Ngram <Segment> u = alignment[0, column].ToNgram(_segmentPool); ShapeNode uRightNode = alignment.GetRightNode(0, column); ShapeNode vLeftNode = alignment.GetLeftNode(1, column); Ngram <Segment> v = alignment[1, column].ToNgram(_segmentPool); ShapeNode vRightNode = alignment.GetRightNode(1, column); bool regular = wordPair.VarietyPair.SoundChangeFrequencyDistribution[alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses)][v] >= 3; int cat = 3; if (u.Equals(v)) { cat = 1; } else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 0; } else if (u.Length == 0 || v.Length == 0) { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) || regular) { cat = _ignoreRegularInsertionDeletion ? 0 : 1; } } else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType) { cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2; } else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType) { if (_regularConsEqual) { if (regular) { cat = 1; } else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 2; } } else { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = regular ? 1 : 2; } } } if (cat > 0 && cat < 3) { cat1And2Count++; if (cat == 1) { cat1Count++; } } wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture)); if (cat > 0) { totalCount++; } } double type1Score = (double)cat1Count / totalCount; double type1And2Score = (double)cat1And2Count / totalCount; wordPair.AreCognatePredicted = type1Score >= 0.5 && type1And2Score >= 0.75; wordPair.CognicityScore = (type1Score * 0.75) + (type1And2Score * 0.25); }