Пример #1
0
        public WordPairViewModel(IProjectService projectService, IAnalysisService analysisService, WordPair wordPair, bool areVarietiesInOrder)
        {
            _projectService      = projectService;
            _analysisService     = analysisService;
            _wordPair            = wordPair;
            _areVarietiesInOrder = areVarietiesInOrder;
            _meaning             = new MeaningViewModel(_wordPair.Word1.Meaning);
            _variety1            = new VarietyViewModel(_wordPair.VarietyPair.Variety1);
            _variety2            = new VarietyViewModel(_wordPair.VarietyPair.Variety2);

            IWordAlignerResult results = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner].Compute(_wordPair);

            _alignment  = results.GetAlignments().First();
            _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]);
            var nodes = new List <AlignedNodeViewModel>();
            int i     = 0;

            for (int column = 0; column < _alignment.ColumnCount; column++)
            {
                string note = null;
                if (i < _wordPair.AlignmentNotes.Count)
                {
                    note = _wordPair.AlignmentNotes[i];
                }
                nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note));
                i++;
            }
            _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]);

            _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes);

            _showInMultipleWordAlignmentCommand = new RelayCommand(ShowInMultipleWordAlignment);
            _pinUnpinCommand = new RelayCommand(PinUnpin);
        }
Пример #2
0
        public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder)
        {
            _wordPair            = wordPair;
            _areVarietiesInOrder = areVarietiesInOrder;
            _meaning             = new MeaningViewModel(_wordPair.Word1.Meaning);
            _variety1            = new VarietyViewModel(_wordPair.VarietyPair.Variety1);
            _variety2            = new VarietyViewModel(_wordPair.VarietyPair.Variety2);

            IWordAlignerResult results = aligner.Compute(_wordPair);

            _alignment  = results.GetAlignments().First();
            _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]);
            var nodes = new List <AlignedNodeViewModel>();
            int i     = 0;

            for (int column = 0; column < _alignment.ColumnCount; column++)
            {
                string note = null;
                if (i < _wordPair.AlignmentNotes.Count)
                {
                    note = _wordPair.AlignmentNotes[i];
                }
                nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note));
                i++;
            }
            _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]);

            _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes);
        }
Пример #3
0
        private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment <Word, ShapeNode> alignment, int column,
                               Ngram <Segment> v)
        {
            VarietyPair  vp      = wordPair.VarietyPair;
            SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses);
            FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context];
            int threshold;

            if (AutomaticRegularCorrespondenceThreshold)
            {
                int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions
                                .Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment)
                                .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]);
                if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count,
                                                                          out threshold))
                {
                    threshold = DefaultRegularCorrespondenceThreshold;
                }
            }
            else
            {
                threshold = DefaultRegularCorrespondenceThreshold;
            }
            return(freqDist[v] >= threshold);
        }
Пример #4
0
        public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
            int  initialEquivalentClasses         = 0;
            bool mismatchFound = false;

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                AlignmentCell <ShapeNode> cell1 = alignment[0, column];
                AlignmentCell <ShapeNode> cell2 = alignment[1, column];

                if ((cell1.Count > 0 && cell1[0].Type() == CogFeatureSystem.VowelType) || (cell2.Count > 0 && cell2[0].Type() == CogFeatureSystem.VowelType))
                {
                    wordPair.AlignmentNotes.Add("X");
                }
                else
                {
                    if (cell1.StrRep() == cell2.StrRep())
                    {
                        wordPair.AlignmentNotes.Add("1");
                        if (!mismatchFound)
                        {
                            initialEquivalentClasses++;
                        }
                    }
                    else
                    {
                        SoundClass sc1;
                        if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 0, column, out sc1))
                        {
                            sc1 = null;
                        }
                        SoundClass sc2;
                        if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 1, column, out sc2))
                        {
                            sc2 = null;
                        }
                        if (sc1 != null && sc2 != null && sc1 == sc2)
                        {
                            wordPair.AlignmentNotes.Add("1");
                            if (!mismatchFound)
                            {
                                initialEquivalentClasses++;
                            }
                        }
                        else
                        {
                            wordPair.AlignmentNotes.Add("0");
                            mismatchFound = true;
                        }
                    }
                }
            }

            wordPair.AreCognatePredicted = !mismatchFound || initialEquivalentClasses >= _initialEquivalenceThreshold;
            wordPair.CognicityScore      = (double)initialEquivalentClasses / alignment.ColumnCount;
        }
Пример #5
0
        public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            int    alignmentCount      = 0;
            double totalAlignmentScore = 0.0;

            foreach (Alignment <Word, ShapeNode> alignment in alignerResult.GetAlignments())
            {
                totalAlignmentScore += alignment.NormalizedScore;
                alignmentCount++;
            }
            wordPair.CognicityScore      = totalAlignmentScore / alignmentCount;
            wordPair.AreCognatePredicted = wordPair.CognicityScore >= _threshold;
        }
Пример #6
0
        protected override ReturnCode DoWork(TextReader inputReader, TextWriter outputWriter, TextWriter errorWriter)
        {
            ReturnCode retcode = ReturnCode.Okay;

            if (!RawScores && !NormalizedScores)
            {
                Warnings.Add("Neither raw scores nor normalized scores were selected. Defaulting to normalized.");
                RawScores        = false;
                NormalizedScores = true;
            }
            if (RawScores && NormalizedScores)
            {
                Warnings.Add("Please specify either raw or normalized scores, but not both. Defaulting to normalized.");
                RawScores        = false;
                NormalizedScores = true;
            }

            SetupProject();
            Meaning meaning = MeaningFactory.Create();

            IWordAligner wordAligner = Project.WordAligners["primary"];

            foreach (string line in ReadLines(inputReader))
            {
                string[] wordTexts = line.Split(' ');
                if (wordTexts.Length != 2)
                {
                    Errors.Add(line, "Each line should have two space-separated words in it.");
                    continue;
                }
                Word[] words = wordTexts.Select(wordText => ParseWordOnce(wordText, meaning, Project)).ToArray();
                if (words.Length != 2 || words.Any(w => w == null))
                {
                    Errors.Add(line, "One or more of this line's words failed to parse. Successfully parsed words: {0}",
                               string.Join(", ", words.Where(w => w != null).Select(w => w.StrRep)));
                    continue;
                }
                IWordAlignerResult          result    = wordAligner.Compute(words[0], words[1]);
                Alignment <Word, ShapeNode> alignment = result.GetAlignments().First();
                outputWriter.WriteLine("{0} {1} {2}", words[0].StrRep, words[1].StrRep,
                                       RawScores ? alignment.RawScore : alignment.NormalizedScore);
                if (Verbose)
                {
                    outputWriter.Write(alignment.ToString(Enumerable.Empty <string>()));
                    outputWriter.WriteLine();
                }
            }

            return(retcode);
        }
Пример #7
0
        public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            Alignment<Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
            int initialEquivalentClasses = 0;
            bool mismatchFound = false;
            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                AlignmentCell<ShapeNode> cell1 = alignment[0, column];
                AlignmentCell<ShapeNode> cell2 = alignment[1, column];

                if ((cell1.Count > 0 && cell1[0].Type() == CogFeatureSystem.VowelType) || (cell2.Count > 0 && cell2[0].Type() == CogFeatureSystem.VowelType))
                {
                    wordPair.AlignmentNotes.Add("X");
                }
                else
                {
                    if (cell1.StrRep() == cell2.StrRep())
                    {
                        wordPair.AlignmentNotes.Add("1");
                        if (!mismatchFound)
                            initialEquivalentClasses++;
                    }
                    else
                    {
                        SoundClass sc1;
                        if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 0, column, out sc1))
                            sc1 = null;
                        SoundClass sc2;
                        if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 1, column, out sc2))
                            sc2 = null;
                        if (sc1 != null && sc2 != null && sc1 == sc2)
                        {
                            wordPair.AlignmentNotes.Add("1");
                            if (!mismatchFound)
                                initialEquivalentClasses++;
                        }
                        else
                        {
                            wordPair.AlignmentNotes.Add("0");
                            mismatchFound = true;
                        }
                    }
                }
            }

            wordPair.PredictedCognacy = !mismatchFound || initialEquivalentClasses >= _initialEquivalenceThreshold;
            wordPair.PredictedCognacyScore = (double) initialEquivalentClasses / alignment.ColumnCount;
        }
Пример #8
0
        private void E(VarietyPair pair)
        {
            ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[CognateIdentifierId];
            var          cognateCorrCounts       = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            IWordAligner aligner      = _project.WordAligners[AlignerId];
            int          cognateCount = 0;
            double       totalScore   = 0;

            foreach (WordPair wordPair in pair.WordPairs)
            {
                IWordAlignerResult alignerResult = aligner.Compute(wordPair);
                cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                if (wordPair.Cognacy)
                {
                    for (int column = 0; column < alignment.ColumnCount; column++)
                    {
                        SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
                        Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
                        cognateCorrCounts[lhs].Increment(corr);
                    }
                    cognateCount++;
                }
                wordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                totalScore += wordPair.PhoneticSimilarityScore;
            }

            pair.CognateCount = cognateCount;
            pair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
            if (pair.WordPairs.Count == 0)
            {
                pair.LexicalSimilarityScore  = 0;
                pair.PhoneticSimilarityScore = 0;
            }
            else
            {
                pair.LexicalSimilarityScore  = (double)cognateCount / pair.WordPairs.Count;
                pair.PhoneticSimilarityScore = totalScore / pair.WordPairs.Count;
            }
        }
Пример #9
0
        private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable <WordPair> wordPairs)
        {
            bool first = true;

            foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore))
            {
                if (!first)
                {
                    writer.WriteLine();
                }
                IWordAlignerResult          results   = aligner.Compute(pair);
                Alignment <Word, ShapeNode> alignment = results.GetAlignments().First();
                writer.Write(pair.Word1.Meaning.Gloss);
                if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category))
                {
                    writer.Write(" ({0})", pair.Word1.Meaning.Category);
                }
                writer.WriteLine();
                writer.Write(alignment.ToString(pair.AlignmentNotes));
                writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore);
                first = false;
            }
        }
Пример #10
0
        private void AlignWords()
        {
            if (_selectedMeaning == null)
            {
                return;
            }

            _busyService.ShowBusyIndicatorUntilFinishDrawing();

            var words = new HashSet <Word>();

            foreach (VarietyPair vp in _projectService.Project.VarietyPairs)
            {
                WordPair wp;
                if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp))
                {
                    words.Add(wp.Word1);
                    words.Add(wp.Word2);
                }
            }
            if (words.Count == 0)
            {
                _words.Clear();
                return;
            }

            IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            Alignment <Word, ShapeNode> alignment;

            if (words.Count == 1)
            {
                Word word = words.First();
                Annotation <ShapeNode> prefixAnn = word.Prefix;
                var prefix = new AlignmentCell <ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>());
                IEnumerable <AlignmentCell <ShapeNode> > columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell <ShapeNode>(n));
                Annotation <ShapeNode> suffixAnn = word.Suffix;
                var suffix = new AlignmentCell <ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>());
                alignment = new Alignment <Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix));
            }
            else
            {
                IWordAlignerResult result = aligner.Compute(words);
                alignment = result.GetAlignments().First();
            }

            List <Cluster <Word> > cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList();

            ColumnCount = alignment.ColumnCount;
            using (_words.BulkUpdate())
            {
                _words.Clear();
                for (int i = 0; i < alignment.SequenceCount; i++)
                {
                    AlignmentCell <ShapeNode> prefix = alignment.Prefixes[i];
                    Word word = alignment.Sequences[i];
                    IEnumerable <AlignmentCell <ShapeNode> > columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]);
                    AlignmentCell <ShapeNode> suffix = alignment.Suffixes[i];
                    int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word));
                    _words.Add(new MultipleWordAlignmentWordViewModel(word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1));
                }
            }
        }
Пример #11
0
        public void Process(VarietyPair varietyPair)
        {
            IWordAligner aligner           = _project.WordAligners[_alignerID];
            var          ambiguousMeanings = new List <Tuple <Meaning, IWordAlignerResult, IWordAlignerResult[]> >();

            varietyPair.WordPairs.Clear();
            var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            int cognateCount      = 0;

            foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings)
            {
                Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();

                if (words1.Length == 1 && words2.Length == 1)
                {
                    Word     word1 = words1.Single();
                    Word     word2 = words2.Single();
                    WordPair wp    = varietyPair.WordPairs.Add(word1, word2);
                    _project.CognacyDecisions.UpdateActualCognacy(wp);
                    IWordAlignerResult alignerResult = aligner.Compute(wp);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult);
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    if (wp.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    wp.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
                else if (words1.Length > 0 && words2.Length > 0)
                {
                    IWordAlignerResult[] alignerResults   = words1.SelectMany(w1 => words2.Select(w2 => aligner.Compute(w1, w2))).ToArray();
                    IWordAlignerResult   maxAlignerResult = alignerResults.MaxBy(a => a.BestRawScore);
                    ambiguousMeanings.Add(Tuple.Create(meaning, maxAlignerResult, alignerResults));
                    WordPair wp = varietyPair.WordPairs.Add(maxAlignerResult.Words[0], maxAlignerResult.Words[1]);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, maxAlignerResult);
                }
            }

            ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[_cognateIdentifierID];

            for (int i = 0; i < ambiguousMeanings.Count; i++)
            {
                ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > newCognateCorrCounts = cognateCorrCounts.Clone();
                int newCognateCount = cognateCount;
                for (int j = i + 1; j < ambiguousMeanings.Count; j++)
                {
                    if (varietyPair.WordPairs[ambiguousMeanings[j].Item1].Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, newCognateCorrCounts, ambiguousMeanings[j].Item2.GetAlignments().First());
                        newCognateCount++;
                    }
                }

                IWordAlignerResult bestAlignerResult = null;
                WordPair           bestWordPair      = null;
                foreach (IWordAlignerResult alignerResult in ambiguousMeanings[i].Item3)
                {
                    ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > alignmentCognateCorrCounts = newCognateCorrCounts.Clone();
                    int alignmentCognateCount             = newCognateCount;
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1);
                    WordPair wordPair = varietyPair.WordPairs.Add(alignerResult.Words[0], alignerResult.Words[1]);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                    if (wordPair.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, alignmentCognateCorrCounts, alignment);
                        alignmentCognateCount++;
                    }
                    varietyPair.CognateCount = alignmentCognateCount;
                    varietyPair.CognateSoundCorrespondenceFrequencyDistribution = alignmentCognateCorrCounts;
                    cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                    wordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                    if (bestWordPair == null || Compare(wordPair, bestWordPair) > 0)
                    {
                        bestWordPair      = wordPair;
                        bestAlignerResult = alignerResult;
                    }
                }

                Debug.Assert(bestWordPair != null);
                varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1);
                varietyPair.WordPairs.Add(bestWordPair);
                _project.CognacyDecisions.UpdateActualCognacy(bestWordPair);
                if (bestWordPair.Cognacy)
                {
                    UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, bestAlignerResult.GetAlignments().First());
                    cognateCount++;
                }
            }

            varietyPair.CognateCount = cognateCount;
            varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
        }
Пример #12
0
        public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            int cat1Count = 0;
            int cat1And2Count = 0;
            int totalCount = 0;
            Alignment<Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                ShapeNode uLeftNode = alignment.GetLeftNode(0, column);
                Ngram<Segment> u = alignment[0, column].ToNgram(_segmentPool);
                ShapeNode uRightNode = alignment.GetRightNode(0, column);
                ShapeNode vLeftNode = alignment.GetLeftNode(1, column);
                Ngram<Segment> v = alignment[1, column].ToNgram(_segmentPool);
                ShapeNode vRightNode = alignment.GetRightNode(1, column);

                int cat = 3;
                if (u.Equals(v))
                {
                    cat = 1;
                }
                else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                {
                    cat = 0;
                }
                else if (u.Length == 0 || v.Length == 0)
                {
                    if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        cat = 1;
                    else if (_ignoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v))
                        cat = 0;
                }
                else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType)
                {
                    cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2;
                }
                else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType)
                {
                    if (_regularConsEqual)
                    {
                        if (IsRegular(wordPair, alignerResult, alignment, column, v))
                            cat = 1;
                        else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                            cat = 2;
                    }
                    else
                    {
                        if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                            cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2;
                    }
                }

                if (cat > 0 && cat < 3)
                {
                    cat1And2Count++;
                    if (cat == 1)
                        cat1Count++;
                }
                wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture));
                if (cat > 0)
                    totalCount++;
            }

            double type1Score = (double) cat1Count / totalCount;
            double type1And2Score = (double) cat1And2Count / totalCount;
            wordPair.PredictedCognacy = type1Score >= 0.5 && type1And2Score >= 0.75;
            wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25);
        }
Пример #13
0
        public void Process(VarietyPair varietyPair)
        {
            IWordAligner aligner = _project.WordAligners[_alignerID];

            varietyPair.WordPairs.Clear();
            var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            int cognateCount      = 0;

            foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings)
            {
                Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                if (words1.Length == 1 && words2.Length == 1)
                {
                    Word     word1 = words1.Single();
                    Word     word2 = words2.Single();
                    WordPair wp    = varietyPair.WordPairs.Add(word1, word2);
                    _project.CognacyDecisions.UpdateActualCognacy(wp);
                    IWordAlignerResult alignerResult = aligner.Compute(wp);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult);
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    if (wp.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    wp.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
                else if (words1.Length > 0 && words2.Length > 0)
                {
                    WordPair           bestWordPair      = null;
                    IWordAlignerResult bestAlignerResult = null;
                    foreach (Word w1 in words1)
                    {
                        foreach (Word w2 in words2)
                        {
                            IWordAlignerResult alignerResult = aligner.Compute(w1, w2);
                            if (bestAlignerResult == null || alignerResult.BestRawScore > bestAlignerResult.BestRawScore)
                            {
                                bestWordPair      = new WordPair(w1, w2);
                                bestAlignerResult = alignerResult;
                            }
                        }
                    }

                    Debug.Assert(bestWordPair != null);
                    varietyPair.WordPairs.Add(bestWordPair);
                    _project.CognacyDecisions.UpdateActualCognacy(bestWordPair);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(bestWordPair, bestAlignerResult);
                    Alignment <Word, ShapeNode> alignment = bestAlignerResult.GetAlignments().First();
                    if (bestWordPair.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    bestWordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
            }

            varietyPair.CognateCount = cognateCount;
            varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
        }
Пример #14
0
 private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment<Word, ShapeNode> alignment, int column, Ngram<Segment> v)
 {
     VarietyPair vp = wordPair.VarietyPair;
     SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses);
     FrequencyDistribution<Ngram<Segment>> freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context];
     int threshold;
     if (_automaticRegularCorrespondenceThreshold)
     {
         int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions.Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment)
             .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]);
         if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold))
             threshold = _defaultRegularCorrepondenceThreshold;
     }
     else
     {
         threshold = _defaultRegularCorrepondenceThreshold;
     }
     return freqDist[v] >= threshold;
 }
Пример #15
0
 public void UpdateCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
 {
     wordPair.CognacyScore = alignerResult.GetAlignments().First().NormalizedScore;
     wordPair.AreCognatePredicted = wordPair.CognacyScore >= _threshold;
 }
Пример #16
0
        public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            int cat1Count     = 0;
            int cat1And2Count = 0;
            int totalCount    = 0;
            Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                ShapeNode       uLeftNode  = alignment.GetLeftNode(0, column);
                Ngram <Segment> u          = alignment[0, column].ToNgram(_segmentPool);
                ShapeNode       uRightNode = alignment.GetRightNode(0, column);
                ShapeNode       vLeftNode  = alignment.GetLeftNode(1, column);
                Ngram <Segment> v          = alignment[1, column].ToNgram(_segmentPool);
                ShapeNode       vRightNode = alignment.GetRightNode(1, column);

                bool regular = wordPair.VarietyPair.SoundChangeFrequencyDistribution[alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses)][v] >= 3;

                int cat = 3;
                if (u.Equals(v))
                {
                    cat = 1;
                }
                else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                {
                    cat = 0;
                }
                else if (u.Length == 0 || v.Length == 0)
                {
                    if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) || regular)
                    {
                        cat = _ignoreRegularInsertionDeletion ? 0 : 1;
                    }
                }
                else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType)
                {
                    cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2;
                }
                else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType)
                {
                    if (_regularConsEqual)
                    {
                        if (regular)
                        {
                            cat = 1;
                        }
                        else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = 2;
                        }
                    }
                    else
                    {
                        if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = regular ? 1 : 2;
                        }
                    }
                }

                if (cat > 0 && cat < 3)
                {
                    cat1And2Count++;
                    if (cat == 1)
                    {
                        cat1Count++;
                    }
                }
                wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture));
                if (cat > 0)
                {
                    totalCount++;
                }
            }

            double type1Score     = (double)cat1Count / totalCount;
            double type1And2Score = (double)cat1And2Count / totalCount;

            wordPair.AreCognatePredicted = type1Score >= 0.5 && type1And2Score >= 0.75;
            wordPair.CognicityScore      = (type1Score * 0.75) + (type1And2Score * 0.25);
        }
Пример #17
0
 public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
 {
     wordPair.PredictedCognacyScore = alignerResult.GetAlignments().First().NormalizedScore;
     wordPair.PredictedCognacy      = wordPair.PredictedCognacyScore >= _threshold;
 }
Пример #18
0
        public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult)
        {
            wordPair.AlignmentNotes.Clear();
            int cat1Count     = 0;
            int cat1And2Count = 0;
            int totalCount    = 0;
            Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                ShapeNode       uLeftNode  = alignment.GetLeftNode(0, column);
                Ngram <Segment> u          = alignment[0, column].ToNgram(_segmentPool);
                ShapeNode       uRightNode = alignment.GetRightNode(0, column);
                ShapeNode       vLeftNode  = alignment.GetLeftNode(1, column);
                Ngram <Segment> v          = alignment[1, column].ToNgram(_segmentPool);
                ShapeNode       vRightNode = alignment.GetRightNode(1, column);

                int cat = 3;
                if (u.Equals(v))
                {
                    cat = 1;
                }
                else if (IgnoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                {
                    cat = 0;
                }
                else if (u.Length == 0 || v.Length == 0)
                {
                    if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                    {
                        cat = 1;
                    }
                    else if (IgnoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v))
                    {
                        cat = 0;
                    }
                }
                else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType)
                {
                    cat = SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2;
                }
                else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType)
                {
                    if (RegularConsonantEqual)
                    {
                        if (IsRegular(wordPair, alignerResult, alignment, column, v))
                        {
                            cat = 1;
                        }
                        else if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = 2;
                        }
                    }
                    else
                    {
                        if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode))
                        {
                            cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2;
                        }
                    }
                }

                if (cat > 0 && cat < 3)
                {
                    cat1And2Count++;
                    if (cat == 1)
                    {
                        cat1Count++;
                    }
                }
                wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture));
                if (cat > 0)
                {
                    totalCount++;
                }
            }

            double type1Score     = (double)cat1Count / totalCount;
            double type1And2Score = (double)cat1And2Count / totalCount;

            wordPair.PredictedCognacy      = type1Score >= 0.5 && type1And2Score >= 0.75;
            wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25);
        }