예제 #1
0
        public static bool TryGetMatchingSoundClass(this IEnumerable <SoundClass> soundClasses, SegmentPool segmentPool, Alignment <Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass)
        {
            ShapeNode       leftNode  = alignment.GetLeftNode(seq, col);
            Ngram <Segment> target    = alignment[seq, col].ToNgram(segmentPool);
            ShapeNode       rightNode = alignment.GetRightNode(seq, col);

            soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode));
            return(soundClass != null);
        }
예제 #2
0
        public static SoundContext ToSoundContext(this Alignment <Word, ShapeNode> alignment, SegmentPool segmentPool, int seq, int col, IEnumerable <SoundClass> soundClasses)
        {
            ShapeNode  leftNode = alignment.GetLeftNode(seq, col);
            SoundClass leftEnv;

            if (leftNode == null || !soundClasses.TryGetMatchingSoundClass(segmentPool, leftNode, out leftEnv))
            {
                leftEnv = null;
            }
            Ngram <Segment> target    = alignment[seq, col].ToNgram(segmentPool);
            ShapeNode       rightNode = alignment.GetRightNode(seq, col);
            SoundClass      rightEnv;

            if (rightNode == null || !soundClasses.TryGetMatchingSoundClass(segmentPool, rightNode, out rightEnv))
            {
                rightEnv = null;
            }
            return(new SoundContext(leftEnv, target, rightEnv));
        }
예제 #3
0
 public static bool TryGetMatchingSoundClass(this IEnumerable<SoundClass> soundClasses, SegmentPool segmentPool, Alignment<Word, ShapeNode> alignment, int seq, int col, out SoundClass soundClass)
 {
     ShapeNode leftNode = alignment.GetLeftNode(seq, col);
     Ngram<Segment> target = alignment[seq, col].ToNgram(segmentPool);
     ShapeNode rightNode = alignment.GetRightNode(seq, col);
     soundClass = soundClasses.FirstOrDefault(sc => sc.Matches(leftNode, target, rightNode));
     return soundClass != null;
 }
예제 #4
0
        public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            int cat1Count     = 0;
            int cat1And2Count = 0;
            int totalCount    = 0;
            Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                ShapeNode       uLeftNode  = alignment.GetLeftNode(0, column);
                Ngram <Segment> u          = alignment[0, column].ToNgram(_segmentPool);
                ShapeNode       uRightNode = alignment.GetRightNode(0, column);
                ShapeNode       vLeftNode  = alignment.GetLeftNode(1, column);
                Ngram <Segment> v          = alignment[1, column].ToNgram(_segmentPool);
                ShapeNode       vRightNode = alignment.GetRightNode(1, column);

                int cat = 3;
                if (u.Equals(v))
                {
                    cat = 1;
                }
                else if (IgnoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                {
                    cat = 0;
                }
                else if (u.Length == 0 || v.Length == 0)
                {
                    if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                    {
                        cat = 1;
                    }
                    else if (IgnoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v))
                    {
                        cat = 0;
                    }
                }
                else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType)
                {
                    cat = SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2;
                }
                else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType)
                {
                    if (RegularConsonantEqual)
                    {
                        if (IsRegular(wordPair, alignerResult, alignment, column, v))
                        {
                            cat = 1;
                        }
                        else if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = 2;
                        }
                    }
                    else
                    {
                        if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2;
                        }
                    }
                }

                if (cat > 0 && cat < 3)
                {
                    cat1And2Count++;
                    if (cat == 1)
                    {
                        cat1Count++;
                    }
                }
                wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture));
                if (cat > 0)
                {
                    totalCount++;
                }
            }

            double type1Score     = (double)cat1Count / totalCount;
            double type1And2Score = (double)cat1And2Count / totalCount;

            wordPair.PredictedCognacy      = type1Score >= 0.5 && type1And2Score >= 0.75;
            wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25);
        }
예제 #5
0
        public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            int cat1Count     = 0;
            int cat1And2Count = 0;
            int totalCount    = 0;
            Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                ShapeNode       uLeftNode  = alignment.GetLeftNode(0, column);
                Ngram <Segment> u          = alignment[0, column].ToNgram(_segmentPool);
                ShapeNode       uRightNode = alignment.GetRightNode(0, column);
                ShapeNode       vLeftNode  = alignment.GetLeftNode(1, column);
                Ngram <Segment> v          = alignment[1, column].ToNgram(_segmentPool);
                ShapeNode       vRightNode = alignment.GetRightNode(1, column);

                bool regular = wordPair.VarietyPair.SoundChangeFrequencyDistribution[alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses)][v] >= 3;

                int cat = 3;
                if (u.Equals(v))
                {
                    cat = 1;
                }
                else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                {
                    cat = 0;
                }
                else if (u.Length == 0 || v.Length == 0)
                {
                    if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) || regular)
                    {
                        cat = _ignoreRegularInsertionDeletion ? 0 : 1;
                    }
                }
                else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType)
                {
                    cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2;
                }
                else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType)
                {
                    if (_regularConsEqual)
                    {
                        if (regular)
                        {
                            cat = 1;
                        }
                        else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = 2;
                        }
                    }
                    else
                    {
                        if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = regular ? 1 : 2;
                        }
                    }
                }

                if (cat > 0 && cat < 3)
                {
                    cat1And2Count++;
                    if (cat == 1)
                    {
                        cat1Count++;
                    }
                }
                wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture));
                if (cat > 0)
                {
                    totalCount++;
                }
            }

            double type1Score     = (double)cat1Count / totalCount;
            double type1And2Score = (double)cat1And2Count / totalCount;

            wordPair.AreCognatePredicted = type1Score >= 0.5 && type1And2Score >= 0.75;
            wordPair.CognicityScore      = (type1Score * 0.75) + (type1And2Score * 0.25);
        }